# Libs

In [100]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn import metrics

# Data

In [79]:
data = pd.read_excel("C:/Users/Almighty/Python workspace/BOT Boursa/2021/Scrapped Data/TAIR/TAIR 2020.xlsx",sheet_name=0,header=0)
data.head()

Unnamed: 0,symbole,date,ouverture,haut,bas,cloture,volume
0,TAIR,2020-01-02,0.63,0.63,0.62,0.63,13391
1,TAIR,2020-01-03,0.63,0.63,0.62,0.62,22163
2,TAIR,2020-01-06,0.63,0.63,0.59,0.6,124299
3,TAIR,2020-01-07,0.6,0.6,0.59,0.59,69788
4,TAIR,2020-01-08,0.59,0.59,0.58,0.59,60144


# Processing

In [80]:
# convert the pandas dataframe feature data type from TimeStamp to Date
data['date'] = data['date'].apply(lambda x:x.toordinal())

In [81]:
x = data.iloc[:,1:6]  # we do not start with the firt feature, because it has the name of the stock           
y = data.cloture # we're going to predict the closing prices of the TAIR

In [82]:
x.head()

Unnamed: 0,date,ouverture,haut,bas,cloture
0,737426,0.63,0.63,0.62,0.63
1,737427,0.63,0.63,0.62,0.62
2,737430,0.63,0.63,0.59,0.6
3,737431,0.6,0.6,0.59,0.59
4,737432,0.59,0.59,0.58,0.59


In [83]:
# convert ti numpy
x = x.values
x

array([[7.37426e+05, 6.30000e-01, 6.30000e-01, 6.20000e-01, 6.30000e-01],
       [7.37427e+05, 6.30000e-01, 6.30000e-01, 6.20000e-01, 6.20000e-01],
       [7.37430e+05, 6.30000e-01, 6.30000e-01, 5.90000e-01, 6.00000e-01],
       ...,
       [7.37788e+05, 5.80000e-01, 5.80000e-01, 5.70000e-01, 5.80000e-01],
       [7.37789e+05, 5.90000e-01, 5.90000e-01, 5.80000e-01, 5.80000e-01],
       [7.37790e+05, 5.80000e-01, 5.90000e-01, 5.70000e-01, 5.90000e-01]])

In [84]:
y.head()

0    0.63
1    0.62
2    0.60
3    0.59
4    0.59
Name: cloture, dtype: float64

In [85]:
y.values

array([0.63, 0.62, 0.6 , 0.59, 0.59, 0.59, 0.57, 0.57, 0.57, 0.57, 0.57,
       0.57, 0.59, 0.61, 0.62, 0.62, 0.6 , 0.61, 0.61, 0.61, 0.6 , 0.58,
       0.6 , 0.6 , 0.63, 0.64, 0.63, 0.65, 0.68, 0.72, 0.71, 0.7 , 0.71,
       0.72, 0.72, 0.7 , 0.69, 0.7 , 0.7 , 0.71, 0.71, 0.71, 0.7 , 0.7 ,
       0.68, 0.67, 0.65, 0.63, 0.63, 0.64, 0.62, 0.61, 0.6 , 0.59, 0.59,
       0.59, 0.6 , 0.59, 0.58, 0.58, 0.58, 0.59, 0.58, 0.57, 0.56, 0.55,
       0.54, 0.55, 0.55, 0.54, 0.53, 0.52, 0.53, 0.54, 0.55, 0.55, 0.55,
       0.56, 0.55, 0.54, 0.54, 0.54, 0.54, 0.54, 0.53, 0.54, 0.54, 0.54,
       0.54, 0.54, 0.52, 0.52, 0.53, 0.54, 0.55, 0.56, 0.57, 0.59, 0.6 ,
       0.61, 0.62, 0.61, 0.6 , 0.6 , 0.61, 0.6 , 0.6 , 0.59, 0.6 , 0.58,
       0.57, 0.58, 0.59, 0.58, 0.59, 0.57, 0.58, 0.57, 0.57, 0.58, 0.59,
       0.6 , 0.59, 0.6 , 0.62, 0.64, 0.64, 0.64, 0.64, 0.61, 0.61, 0.63,
       0.63, 0.63, 0.61, 0.61, 0.61, 0.6 , 0.62, 0.61, 0.6 , 0.6 , 0.6 ,
       0.59, 0.58, 0.59, 0.58, 0.57, 0.59, 0.58, 0.

In [86]:
# Split the data
X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.33, random_state=73) # random_state to reproduce the same result, and test size will the 33% of the train data

X_train

In [88]:
print("X_train = ",X_train.shape)
print("y_train",y_train.shape)
print("X_test = ",X_test.shape)
print("y_test",y_test.shape)

X_train =  (164, 5)
y_train (164,)
X_test =  (81, 5)
y_test (81,)


# Linear Regression

In [102]:
lr = LinearRegression()
lr_model = lr.fit(X_train, y_train)

In [103]:
y_pred_lr = lr_model.predict(X_test) 
y_pred_lr

array([0.59, 0.71, 0.58, 0.63, 0.63, 0.59, 0.61, 0.56, 0.64, 0.6 , 0.59,
       0.6 , 0.61, 0.58, 0.62, 0.6 , 0.57, 0.62, 0.58, 0.61, 0.57, 0.58,
       0.56, 0.54, 0.55, 0.57, 0.58, 0.58, 0.55, 0.6 , 0.64, 0.59, 0.63,
       0.52, 0.63, 0.6 , 0.6 , 0.6 , 0.58, 0.62, 0.63, 0.62, 0.62, 0.57,
       0.64, 0.62, 0.6 , 0.56, 0.58, 0.72, 0.6 , 0.59, 0.64, 0.61, 0.57,
       0.62, 0.58, 0.54, 0.62, 0.64, 0.59, 0.56, 0.7 , 0.6 , 0.61, 0.7 ,
       0.58, 0.57, 0.61, 0.7 , 0.54, 0.59, 0.57, 0.6 , 0.67, 0.6 , 0.61,
       0.56, 0.63, 0.57, 0.61])

In [104]:
print('LR')
print('Mean Absolute Error:', metrics.mean_absolute_error(y_test, y_pred_lr))  
print('Mean Squared Error:', metrics.mean_squared_error(y_test, y_pred_lr))  
print('Root Mean Squared Error:', np.sqrt(metrics.mean_squared_error(y_test, y_pred_lr))) 

LR
Mean Absolute Error: 1.585837085791736e-15
Mean Squared Error: 3.364680454348339e-30
Root Mean Squared Error: 1.834306532275437e-15


# DTR model

In [89]:
dtr = DecisionTreeRegressor()  
dtr_model = dtr.fit(X_train, y_train)

In [91]:
y_pred_dtr = dtr_model.predict(X_test) 
y_pred_dtr

array([0.59, 0.71, 0.58, 0.63, 0.63, 0.59, 0.61, 0.57, 0.64, 0.6 , 0.59,
       0.6 , 0.61, 0.58, 0.62, 0.6 , 0.57, 0.62, 0.58, 0.61, 0.57, 0.58,
       0.57, 0.54, 0.55, 0.57, 0.58, 0.58, 0.55, 0.6 , 0.64, 0.59, 0.63,
       0.52, 0.63, 0.6 , 0.6 , 0.6 , 0.58, 0.62, 0.63, 0.62, 0.62, 0.57,
       0.64, 0.62, 0.6 , 0.57, 0.58, 0.72, 0.6 , 0.59, 0.64, 0.61, 0.57,
       0.62, 0.58, 0.54, 0.62, 0.64, 0.59, 0.57, 0.7 , 0.6 , 0.61, 0.7 ,
       0.58, 0.57, 0.61, 0.7 , 0.54, 0.59, 0.57, 0.6 , 0.68, 0.6 , 0.61,
       0.57, 0.63, 0.57, 0.61])

In [93]:
print('DTR')
print('Mean Absolute Error:', metrics.mean_absolute_error(y_test, y_pred_dtr))  
print('Mean Squared Error:', metrics.mean_squared_error(y_test, y_pred_dtr))  
print('Root Mean Squared Error:', np.sqrt(metrics.mean_squared_error(y_test, y_pred_dtr))) 

DTR
Mean Absolute Error: 0.000740740740740814
Mean Squared Error: 7.40740740740742e-06
Root Mean Squared Error: 0.002721655269759089


# RFR

In [96]:
rfr = RandomForestRegressor()  
rfr_model = rfr.fit(X_train, y_train)  



In [98]:
y_pred_rfr = dtr_model.predict(X_test) 
y_pred_rfr

array([0.59, 0.71, 0.58, 0.63, 0.63, 0.59, 0.61, 0.57, 0.64, 0.6 , 0.59,
       0.6 , 0.61, 0.58, 0.62, 0.6 , 0.57, 0.62, 0.58, 0.61, 0.57, 0.58,
       0.57, 0.54, 0.55, 0.57, 0.58, 0.58, 0.55, 0.6 , 0.64, 0.59, 0.63,
       0.52, 0.63, 0.6 , 0.6 , 0.6 , 0.58, 0.62, 0.63, 0.62, 0.62, 0.57,
       0.64, 0.62, 0.6 , 0.57, 0.58, 0.72, 0.6 , 0.59, 0.64, 0.61, 0.57,
       0.62, 0.58, 0.54, 0.62, 0.64, 0.59, 0.57, 0.7 , 0.6 , 0.61, 0.7 ,
       0.58, 0.57, 0.61, 0.7 , 0.54, 0.59, 0.57, 0.6 , 0.68, 0.6 , 0.61,
       0.57, 0.63, 0.57, 0.61])

In [99]:
print('RFR')
print('Mean Absolute Error:', metrics.mean_absolute_error(y_test, y_pred_rfr))  
print('Mean Squared Error:', metrics.mean_squared_error(y_test, y_pred_rfr))  
print('Root Mean Squared Error:', np.sqrt(metrics.mean_squared_error(y_test, y_pred_rfr))) 

RFR
Mean Absolute Error: 0.000740740740740814
Mean Squared Error: 7.40740740740742e-06
Root Mean Squared Error: 0.002721655269759089
