# DATA PRE-PROCESSING

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

In [2]:
dataset = pd.read_csv("./Datasets/LT.csv")
x = dataset.iloc[1:, 1:5].values
y = dataset.iloc[1:, -1].values

In [3]:
from sklearn.impute import SimpleImputer
imputer = SimpleImputer(missing_values=np.nan, strategy="mean")
imputer.fit(x[:, :-1])
x[:, :-1] = imputer.transform(x[:, :-1])

In [4]:
from sklearn.model_selection import train_test_split
xtrain, xtest, ytrain, ytest = train_test_split(x,y,test_size=0.2)

# LINEAR REGRESSION MULTIVARIABLE


In [5]:
from sklearn.linear_model import LinearRegression
lr_m =  LinearRegression()
lr_m.fit(xtrain,ytrain)
lr_m_pred = lr_m.predict(xtest)

In [6]:
np.set_printoptions(precision=2)
print(np.concatenate((ytest.reshape(len(ytest),1), lr_m.predict(xtest).reshape(len(lr_m.predict(xtest)),1)),1))

[[1697.9  1703.57]
 [1374.6  1376.4 ]
 [1064.95 1080.03]
 ...
 [2647.75 2609.08]
 [ 951.2   952.65]
 [1440.6  1445.12]]


# LINEAR REGRESSION SINGLE VARIABLE

In [7]:
from sklearn.linear_model import LinearRegression
lr = LinearRegression()
lr.fit(xtrain[:,0].reshape(-1,1), ytrain)

LinearRegression()

In [8]:
lr_pred = lr.predict(xtest[:,0].reshape(-1,1))
print(np.concatenate((ytest.reshape(-1,1), lr_pred.reshape(-1,1)),-1))

[[1697.9  1720.19]
 [1374.6  1365.62]
 [1064.95 1087.96]
 ...
 [2647.75 2628.87]
 [ 951.2   946.67]
 [1440.6  1467.03]]


# POLYNOMIAL LINEAR REGRESSION

In [9]:
from sklearn.preprocessing import PolynomialFeatures
poly = PolynomialFeatures(degree = 1)
X_poly = poly.fit_transform(x) 
lin2 = LinearRegression()

In [10]:
plrxtrain, plrxtest, plrytrain, plrytest = train_test_split(X_poly,y,test_size=0.2)

In [11]:
lin2.fit(plrxtrain, plrytrain)

LinearRegression()

In [12]:
plr_pred = lin2.predict(plrxtest)
print(np.concatenate((plrytest.reshape(-1,1), plr_pred.reshape(-1,1)),-1))

[[ 784.2   791.21]
 [ 965.65  964.74]
 [1333.95 1340.37]
 ...
 [ 875.35  872.51]
 [1783.65 1776.13]
 [1171.25 1168.74]]


# SUPPORT VECTOR REGRESSION

In [13]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
scaler.fit(x)

StandardScaler()

In [14]:
temp = scaler.fit_transform(x)

In [15]:
svrxtrain, svrxtest, svrytrain, svrytest = train_test_split(temp,y,test_size=0.2)

In [16]:
from sklearn.svm import SVR
clf_svr= SVR(kernel="rbf", C=100, gamma="auto")
clf_svr.fit(svrxtrain,svrytrain)

SVR(C=100, gamma='auto')

In [17]:
svr_pred = clf_svr.predict(svrxtest)
print(np.concatenate((svrytest.reshape(-1,1), svr_pred.reshape(-1,1)),-1))

[[2982.65 2977.41]
 [1562.5  1555.16]
 [ 892.6   892.54]
 ...
 [1247.   1236.79]
 [1375.2  1375.24]
 [1290.3  1291.45]]


# DECISION TREE REGRESSION

In [18]:
from sklearn.tree import DecisionTreeRegressor 
  
regressor = DecisionTreeRegressor(random_state = 0) 

regressor.fit(xtrain, ytrain)

DecisionTreeRegressor(random_state=0)

In [19]:
ypred = regressor.predict(xtest)

In [20]:
from sklearn import metrics
print('r2 value:',metrics.r2_score(ytest, ypred))

r2 value: 0.9979126769063308


In [21]:
print('accuracy',100- (np.mean(np.abs((ytest - ypred) / ytest)) * 100))

accuracy 99.11815308907912


# RANDOM FOREST REGRESSION

In [22]:
from sklearn.ensemble import RandomForestRegressor
rand_reg = RandomForestRegressor()
rand_reg.fit(xtrain, ytrain)
rand_pred = rand_reg.predict(xtest)

In [23]:
print(np.concatenate((ytest.reshape(-1,1), rand_pred.reshape(-1,1)),-1))

[[1697.9  1711.53]
 [1374.6  1374.29]
 [1064.95 1083.17]
 ...
 [2647.75 2600.84]
 [ 951.2   953.22]
 [1440.6  1447.64]]


# Comparing Model R2 score

In [28]:
print('r2 value decision tree Regression:',metrics.r2_score(ytest, ypred))
print('r2 value Random Forest Regression:',metrics.r2_score(ytest, rand_pred))
print('r2 value Support Vector Regression:',metrics.r2_score(svrytest, svr_pred))
print('r2 value Polynomial Linear Regression:',metrics.r2_score(plrytest, plr_pred))
print('r2 value Linear Regression Single Variable:',metrics.r2_score(ytest, lr_pred))
print('r2 value Linear Regression Multi-Variable:',metrics.r2_score(ytest, lr_m.predict(xtest)))

r2 value decision tree Regression: 0.9979126769063308
r2 value Random Forest Regression: 0.9986835032464421
r2 value Support Vector Regression: 0.9976561045394448
r2 value Polynomial Linear Regression: 0.9990091286256881
r2 value Linear Regression Single Variable: 0.9953538212782052
r2 value Linear Regression Multi-Variable: 0.9987146689033681


In [46]:
model_comp = pd.DataFrame({'models':['Decision Tree Regression',
                                     'Random Forest Regression',
                                     'Support Vector Regression',
                                     'Polynomial Linear Regression',
                                     'Linear Regression Single Variable',
                                     'Linear Regression Multi-Variable'],
                           'accuracy': [metrics.r2_score(ytest, ypred),
                                        metrics.r2_score(ytest, rand_pred),
                                        metrics.r2_score(svrytest, svr_pred),
                                        metrics.r2_score(plrytest, plr_pred),
                                        metrics.r2_score(ytest, lr_pred),
                                        metrics.r2_score(ytest, lr_m.predict(xtest))]
})

In [47]:
model_comp.sort_values(by=['accuracy'],ascending=False)

Unnamed: 0,models,accuracy
3,Polynomial Linear Regression,0.999009
5,Linear Regression Multi-Variable,0.998715
1,Random Forest Regression,0.998684
0,Decision Tree Regression,0.997913
2,Support Vector Regression,0.997656
4,Linear Regression Single Variable,0.995354


Linear Regression Multivariable

Linear Regression Simple

Linear Regression Polynomial

Support Vector Regression

Decision Tree Regression

Random Forest Regression

# Results