In [1]:
#AI based insight to predict the optimal value of bugs and usecases to be created 
#Using regression model to achieve this
#input variable - complexity(XS, S, L, M) and story points 0-20
#output to predict - use cases and bugs

import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.model_selection import cross_val_score
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeRegressor
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
from sklearn.multioutput import MultiOutputRegressor
from sklearn.svm import LinearSVR
from sklearn.metrics import r2_score


#read the csv file 
dataset = pd.read_csv('ProjectABC_details.csv')

#Get the feature varaibles and dependent variables
x = dataset.iloc[:,2:4].values;
y = dataset.iloc[:,4:6].values;

#Encode the feature variable
ci = ColumnTransformer(transformers=[('encoder',OneHotEncoder(),[0])],remainder = 'passthrough')
x = np.array(ci.fit_transform(x))

#Split the dataset into training set and test set
xtrain, xtest, ytrain, ytest = train_test_split(x,y,test_size = 0.3, random_state = 0)

#Train the multi linear regression model
lr = LinearRegression()
lr.fit(xtrain, ytrain)
ypred = lr.predict(xtest)
lr_score = r2_score(ytest, ypred)
#np.set_printoptions(precision = 2)
#print(np.concatenate((ypred.reshape(len(ypred),2),ytest.reshape(len(ytest),2)),1))
#print(mean_absolute_error(ytest, ypred))

#Calculate the cross value score for linearn regression model
scrtype = 'r2'
cs = cross_val_score(lr,x,y,scoring = scrtype)
cs = -cs

#Train the decision tree regression model
dt = DecisionTreeRegressor(random_state = 0)
dt.fit(xtrain,ytrain)
y1pred = dt.predict(xtest)
dt_score = r2_score(ytest, y1pred)
#print(np.concatenate((y1pred.reshape(len(y1pred),2),ytest.reshape(len(ytest),2)),1))

#Calculate the cross value score for decision regression model.
cs = cross_val_score(dt, x, y, scoring = scrtype)
cs = -cs

#Train the random forest regression model
rf = RandomForestRegressor(n_estimators = 10, random_state = 0)
rf.fit(xtrain,ytrain)
y2pred = rf.predict(xtest)
rf_score = r2_score(ytest, y2pred)
#print(np.concatenate((y2pred.reshape(len(y2pred),2),ytest.reshape(len(ytest),2)),1))

#Calculate the cross value score for random forest model
cs = cross_val_score(rf,x,y,scoring = scrtype)
cs = -cs

#Train the linear SVR model
lrs = LinearSVR()
wrapper = MultiOutputRegressor(lrs)
wrapper.fit(xtrain,ytrain)
y3pred = rf.predict(xtest)
lrs_score = r2_score(ytest, y3pred)
#print(np.concatenate((y3pred.reshape(len(y3pred),2),ytest.reshape(len(ytest),2)),1))

#Calculate the cross value score for linearn SVR model
cs = cross_val_score(wrapper,x,y,scoring = scrtype)
cs = -cs

print('*****Cross val score*****')
print('cross value of linear model:',cs.mean())
print('cross value of decision tree regression model:',cs.mean())
print('cross value score of random forest regression model:',cs.mean())
print('cross value of SVR model:',cs.mean())

print('*****r2 score*****')
print('r2 score of linear model:',lr_score)
print('r2 score of decision tree model:',dt_score)
print('r2 score of random forest model:',rf_score)
print('r2 score of SVR model:',lrs_score)

print('*****Predicted values*****')
print('Prediction using multi linear regressor:',lr.predict(([[1,0,0,0,8]])))
print('Prediction using decision tree regressor:',dt.predict(([[1,0,0,0,8]])))
print('Prediction using random forest regressor:',rf.predict(([[1,0,0,0,8]])))
print('Prediction using SVR regressor:',wrapper.predict(([[1,0,0,0,8]])))

import pickle
import pickle
with open("lr.pkl", "wb") as f:
    pickle.dump(lr, f)


*****Cross val score*****
cross value of linear model: -0.7261181465705624
cross value of decision tree regression model: -0.7261181465705624
cross value score of random forest regression model: -0.7261181465705624
cross value of SVR model: -0.7261181465705624
*****r2 score*****
r2 score of linear model: 0.6866376249973161
r2 score of decision tree model: 0.7959778225583158
r2 score of random forest model: 0.746832416237442
r2 score of SVR model: 0.746832416237442
*****Predicted values*****
Prediction using multi linear regressor: [[10.93621134  8.03350515]]
Prediction using decision tree regressor: [[11.          8.14285714]]
Prediction using random forest regressor: [[11.21166667  8.17857143]]
Prediction using SVR regressor: [[10.0070793   7.99836499]]


