In [1]:
from sklearn.datasets import make_regression
import pandas as pd
import numpy as np
from sklearn import preprocessing
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score
from sklearn import metrics

In [2]:
# Read data
df = pd.read_csv('CH22_Demand_XY_Train.csv')

In [3]:
# Data preparation
dataset = df.values
X = dataset[:,1:5]
Y = dataset[:,5]
X = np.asarray(X).astype('float32')
Y = np.asarray(Y).astype('float32')
X_train, X_val_and_test, Y_train, Y_val_and_test = train_test_split(X, Y, test_size=0.3)
X_val, X_test, Y_val, Y_test = train_test_split(X_val_and_test, Y_val_and_test, test_size=0.5)

In [4]:
# Linear Regression
from sklearn.linear_model import LinearRegression
model = LinearRegression()
model.fit(X_train, Y_train)
y_sum = 0
for ind in range(len(Y_test)):
    y_sum += Y_test[ind]
y_mean = y_sum / len(Y_test)
ssr = 0
sst = 0
ynew = model.predict(X_test)
for i in range(len(X_test)):
    ssr += (Y_test[i] - ynew[i])**2
    sst += (Y_test[i] - y_mean)**2
r2 = 1 - (ssr/sst)
print("R^2 value:",r2)
print("r2_score:",metrics.r2_score(Y_test, ynew))

R^2 value: 0.18407092471616227
r2_score: 0.18407092476272036


In [5]:
#  Support Vector Regression (SVR)
from sklearn.svm import SVC
from sklearn import svm
model = svm.SVR()
model.fit(X_train, Y_train)
y_sum = 0
for ind in range(len(Y_test)):
    y_sum += Y_test[ind]
y_mean = y_sum / len(Y_test)
ssr = 0
sst = 0
ynew = model.predict(X_test)
for i in range(len(X_test)):
    ssr += (Y_test[i] - ynew[i])**2
    sst += (Y_test[i] - y_mean)**2
r2 = 1 - (ssr/sst)
print("R^2 value:",r2)
print("r2_score:",metrics.r2_score(Y_test, ynew))

R^2 value: 0.0041926316951083065
r2_score: 0.00419263199792741


In [6]:
# K-Nearest Neignbors (KNN)
from sklearn.neighbors import KNeighborsClassifier
model = KNeighborsClassifier(n_neighbors=1)
Y_train = Y_train.astype('int')
model.fit(X_train, Y_train)
y_sum = 0
for ind in range(len(Y_test)):
    y_sum += Y_test[ind]
y_mean = y_sum / len(Y_test)
ssr = 0
sst = 0
ynew = model.predict(X_test)
for i in range(len(X_test)):
    ssr += (Y_test[i] - ynew[i])**2
    sst += (Y_test[i] - y_mean)**2
r2 = 1 - (ssr/sst)
print("R^2 value:",r2)
print("r2_score:",metrics.r2_score(Y_test, ynew))

R^2 value: -0.06870150376371198
r2_score: -0.06870150343876569


In [7]:
# Decision Tree Classifier
from sklearn.tree import DecisionTreeClassifier
model = DecisionTreeClassifier(criterion="entropy", max_depth=100000)
Y_train = Y_train.astype('int')
model.fit(X_train, Y_train)
y_sum = 0
for ind in range(len(Y_test)):
    y_sum += Y_test[ind]
y_mean = y_sum / len(Y_test)
ssr = 0
sst = 0
ynew = model.predict(X_test)
for i in range(len(X_test)):
    ssr += (Y_test[i] - ynew[i])**2
    sst += (Y_test[i] - y_mean)**2
r2 = 1 - (ssr/sst)
print("R^2 value:",r2)
print("r2_score:",metrics.r2_score(Y_test, ynew))

R^2 value: -0.12892644699850986
r2_score: -0.12892644665525088


In [8]:
# Linear Discriminant Analysis (LDA)
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
model = LinearDiscriminantAnalysis()
Y_train = Y_train.astype('int')
model.fit(X_train, Y_train)
y_sum = 0
for ind in range(len(Y_test)):
    y_sum += Y_test[ind]
y_mean = y_sum / len(Y_test)
ssr = 0
sst = 0
ynew = model.predict(X_test)
for i in range(len(X_test)):
    ssr += (Y_test[i] - ynew[i])**2
    sst += (Y_test[i] - y_mean)**2
r2 = 1 - (ssr/sst)
print("R^2 value:",r2)
print("r2_score:",metrics.r2_score(Y_test, ynew))

R^2 value: -0.40418560459619024
r2_score: -0.40418560416922467


In [9]:
# Gaussian NB
from sklearn.naive_bayes import GaussianNB
model = GaussianNB()
Y_train = Y_train.astype('int')
model.fit(X_train, Y_train)
y_sum = 0
for ind in range(len(Y_test)):
    y_sum += Y_test[ind]
y_mean = y_sum / len(Y_test)
ssr = 0
sst = 0
ynew = model.predict(X_test)
for i in range(len(X_test)):
    ssr += (Y_test[i] - ynew[i])**2
    sst += (Y_test[i] - y_mean)**2
r2 = 1 - (ssr/sst)
print("R^2 value:",r2)
print("r2_score:",metrics.r2_score(Y_test, ynew))

R^2 value: -0.21883785329581862
r2_score: -0.21883785292521662
