# DATA PRE-PROCESSING

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

In [2]:
dataset = pd.read_csv("./Datasets/LT.csv")
x = dataset.iloc[1:, 1:5].values
y = dataset.iloc[1:, -1].values

In [3]:
from sklearn.impute import SimpleImputer
imputer = SimpleImputer(missing_values=np.nan, strategy="mean")
imputer.fit(x[:, :-1])
x[:, :-1] = imputer.transform(x[:, :-1])

In [4]:
from sklearn.model_selection import train_test_split
xtrain, xtest, ytrain, ytest = train_test_split(x,y,test_size=0.2)

# LINEAR REGRESSION MULTIVARIABLE


In [5]:
from sklearn.linear_model import LinearRegression
lr_m =  LinearRegression()
lr_m.fit(xtrain,ytrain)
lr_m_pred = lr_m.predict(xtest)

In [6]:
np.set_printoptions(precision=2)
print(np.concatenate((ytest.reshape(len(ytest),1), lr_m.predict(xtest).reshape(len(lr_m.predict(xtest)),1)),1))

[[1110.65 1109.9 ]
 [ 839.2   841.55]
 [1960.2  1951.71]
 ...
 [1370.9  1374.41]
 [1802.5  1806.97]
 [ 765.45  765.72]]


# LINEAR REGRESSION SINGLE VARIABLE

In [7]:
from sklearn.linear_model import LinearRegression
lr = LinearRegression()
lr.fit(xtrain[:,0].reshape(-1,1), ytrain)

LinearRegression()

In [8]:
lr_pred = lr.predict(xtest[:,0].reshape(-1,1))
print(np.concatenate((ytest.reshape(-1,1), lr_pred.reshape(-1,1)),-1))

[[1110.65 1088.83]
 [ 839.2   848.43]
 [1960.2  1931.74]
 ...
 [1370.9  1358.47]
 [1802.5  1812.06]
 [ 765.45  779.1 ]]


# POLYNOMIAL LINEAR REGRESSION

In [20]:
from sklearn.preprocessing import PolynomialFeatures
poly = PolynomialFeatures(degree = 1)
X_poly = poly.fit_transform(x) 
lin2 = LinearRegression()

In [21]:
plrxtrain, plrxtest, plrytrain, plrytest = train_test_split(X_poly,y,test_size=0.2)

In [22]:
lin2.fit(plrxtrain, plrytrain)

LinearRegression()

In [23]:
plr_pred = lin2.predict(plrxtest)
print(np.concatenate((plrytest.reshape(-1,1), plr_pred.reshape(-1,1)),-1))

[[1740.25 1735.04]
 [1367.3  1364.95]
 [1240.55 1237.06]
 ...
 [2448.4  2442.29]
 [1332.45 1321.1 ]
 [2297.6  2315.43]]


# SUPPORT VECTOR REGRESSION

In [9]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
scaler.fit(x)

StandardScaler()

In [10]:
temp = scaler.fit_transform(x)

In [11]:
svrxtrain, svrxtest, svrytrain, svrytest = train_test_split(temp,y,test_size=0.2)

In [12]:
from sklearn.svm import SVR
clf_svr= SVR(kernel="rbf", C=100, gamma="auto")
clf_svr.fit(svrxtrain,svrytrain)

SVR(C=100, gamma='auto')

In [13]:
svr_pred = clf_svr.predict(svrxtest)
print(np.concatenate((svrytest.reshape(-1,1), svr_pred.reshape(-1,1)),-1))

[[1873.85 1878.46]
 [1363.6  1367.55]
 [1366.95 1370.62]
 ...
 [1437.85 1442.66]
 [1578.05 1576.37]
 [1146.9  1150.68]]


# DECISION TREE REGRESSION

In [14]:
from sklearn.tree import DecisionTreeRegressor 
  
regressor = DecisionTreeRegressor(random_state = 0) 

regressor.fit(xtrain, ytrain)

DecisionTreeRegressor(random_state=0)

In [15]:
ypred = regressor.predict(xtest)

In [16]:
from sklearn import metrics
print('r2 value:',metrics.r2_score(ytest, ypred))

r2 value: 0.9953921411007015


In [17]:
print('accuracy',100- (np.mean(np.abs((ytest - ypred) / ytest)) * 100))

accuracy 99.07086848047787


# RANDOM FOREST REGRESSION

In [18]:
from sklearn.ensemble import RandomForestRegressor
rand_reg = RandomForestRegressor()
rand_reg.fit(xtrain, ytrain)
rand_pred = rand_reg.predict(xtest)

In [19]:
print(np.concatenate((ytest.reshape(-1,1), rand_pred.reshape(-1,1)),-1))

[[1110.65 1106.6 ]
 [ 839.2   844.89]
 [1960.2  1945.46]
 ...
 [1370.9  1371.85]
 [1802.5  1805.19]
 [ 765.45  767.21]]


# Comparing Model R2 score

Linear Regression Multivariable

Linear Regression Simple

Linear Regression Polynomial

Support Vector Regression

Decision Tree Regression

Random Forest Regression

# Results