<a href="https://colab.research.google.com/github/MWFK/Machine-Learning-From-Zero-to-Hero/blob/main/Regression_Algos_for_Stock_Prices_.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# TunisAir_2020_Stock_Prices
In this notebook we'll try to predict TunisAir stock prices based on Scrapped data of 2020 from ilboursa.com

We'll use different algos, and try to compare the results.

This notebook is for academical purposes only (basic introductory level), so please do not consider it for real live trading.

# Import Libs

In [12]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn import metrics
import datetime

# Import Data

In [39]:
data = pd.read_csv('https://raw.githubusercontent.com/MWFK/Machine-Learning-From-Zero-to-Hero/main/TunisAir_2020_Stock_Prices.csv', sep=";", decimal="," ) #sep = delimiter
data.head()

Unnamed: 0,symbole,date,ouverture,haut,bas,cloture,volume
0,TAIR,02/01/2020,0.63,0.63,0.62,0.63,13391
1,TAIR,03/01/2020,0.63,0.63,0.62,0.62,22163
2,TAIR,06/01/2020,0.63,0.63,0.59,0.6,124299
3,TAIR,07/01/2020,0.6,0.6,0.59,0.59,69788
4,TAIR,08/01/2020,0.59,0.59,0.58,0.59,60144


# Processing

In [46]:
# convert the pandas dataframe feature data type from TimeStamp to Date
# data['date'] = data['date'].apply(lambda x:x.toordinal())
# data['date'] = pd.to_datetime(data['date'], format='DD/MM/YY')

#data['ouverture'] = data['ouverture'].astype(float)
#data['ouverture'] = pd.to_numeric(data['ouverture'])
#data['haut'] = data['haut'].astype(float)
#data['bas'] = data['bas'].astype(float)

x = data.iloc[:,2:5]  # we do not start with the firt feature, because it has the name of the stock      
x.head()

Unnamed: 0,ouverture,haut,bas
0,0.63,0.63,0.62
1,0.63,0.63,0.62
2,0.63,0.63,0.59
3,0.6,0.6,0.59
4,0.59,0.59,0.58


In [48]:
y = data.cloture # we're going to predict the closing prices of the TAIR
y.head()

0    0.63
1    0.62
2    0.60
3    0.59
4    0.59
Name: cloture, dtype: float64

In [49]:
# Split the data 
X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=73) # random_state to reproduce the same result, and test size will the 33% of the train data
print("X_train = ",X_train.shape)
print("y_train",y_train.shape)
print("X_test = ",X_test.shape)
print("y_test",y_test.shape)

X_train =  (196, 3)
y_train (196,)
X_test =  (49, 3)
y_test (49,)


# Linear Regression

In [50]:
# Create a basic linear model
lr = LinearRegression()
lr_model = lr.fit(X_train, y_train)

In [51]:
# Predict the stock prices
y_pred_lr = lr_model.predict(X_test) 
y_pred_lr

array([0.58174284, 0.69521019, 0.57200118, 0.62750515, 0.62414203,
       0.58517539, 0.60465871, 0.56232895, 0.64026221, 0.60465871,
       0.58517539, 0.59828018, 0.62070948, 0.58853852, 0.61048125,
       0.60122616, 0.57543373, 0.62750515, 0.57837971, 0.60122616,
       0.57837971, 0.58517539, 0.56863806, 0.54284563, 0.5588964 ,
       0.56569208, 0.57543373, 0.58517539, 0.54915474, 0.60500644,
       0.64993445, 0.59491705, 0.62708801, 0.52336231, 0.63045113,
       0.58756538, 0.5914845 , 0.59786303, 0.58517539, 0.62070948,
       0.64019279, 0.6177635 , 0.61096782, 0.57837971, 0.63381426,
       0.62365546, 0.59786303, 0.56232895, 0.58517539])

In [52]:
# Prediction Evaluation
print('LR')
print('Mean Absolute Error:', metrics.mean_absolute_error(y_test, y_pred_lr))  
print('Mean Squared Error:', metrics.mean_squared_error(y_test, y_pred_lr))  
print('Root Mean Squared Error:', np.sqrt(metrics.mean_squared_error(y_test, y_pred_lr))) 

LR
Mean Absolute Error: 0.00551824048033516
Mean Squared Error: 4.1706624086361904e-05
Root Mean Squared Error: 0.00645806659042487


# DTR model

In [53]:
dtr = DecisionTreeRegressor()  
dtr_model = dtr.fit(X_train, y_train)

In [54]:
y_pred_dtr = dtr_model.predict(X_test) 
y_pred_dtr

array([0.586     , 0.71      , 0.574     , 0.63666667, 0.62666667,
       0.584     , 0.60583333, 0.56      , 0.65      , 0.60583333,
       0.584     , 0.59666667, 0.62      , 0.59      , 0.61      ,
       0.6       , 0.57625   , 0.63666667, 0.575     , 0.6       ,
       0.575     , 0.584     , 0.565     , 0.54      , 0.55      ,
       0.57      , 0.57625   , 0.584     , 0.545     , 0.61      ,
       0.64      , 0.59571429, 0.62666667, 0.53      , 0.63666667,
       0.586     , 0.586     , 0.59666667, 0.584     , 0.62      ,
       0.63      , 0.61      , 0.61      , 0.575     , 0.63666667,
       0.63      , 0.59666667, 0.56      , 0.584     ])

In [55]:
print('DTR')
print('Mean Absolute Error:', metrics.mean_absolute_error(y_test, y_pred_dtr))  
print('Mean Squared Error:', metrics.mean_squared_error(y_test, y_pred_dtr))  
print('Root Mean Squared Error:', np.sqrt(metrics.mean_squared_error(y_test, y_pred_dtr))) 

DTR
Mean Absolute Error: 0.005443148688046647
Mean Squared Error: 4.643878129483081e-05
Root Mean Squared Error: 0.006814600596867788


# RFR model

In [56]:
rfr = RandomForestRegressor()  
rfr_model = rfr.fit(X_train, y_train)  

In [57]:
y_pred_rfr = dtr_model.predict(X_test) 
y_pred_rfr

array([0.586     , 0.71      , 0.574     , 0.63666667, 0.62666667,
       0.584     , 0.60583333, 0.56      , 0.65      , 0.60583333,
       0.584     , 0.59666667, 0.62      , 0.59      , 0.61      ,
       0.6       , 0.57625   , 0.63666667, 0.575     , 0.6       ,
       0.575     , 0.584     , 0.565     , 0.54      , 0.55      ,
       0.57      , 0.57625   , 0.584     , 0.545     , 0.61      ,
       0.64      , 0.59571429, 0.62666667, 0.53      , 0.63666667,
       0.586     , 0.586     , 0.59666667, 0.584     , 0.62      ,
       0.63      , 0.61      , 0.61      , 0.575     , 0.63666667,
       0.63      , 0.59666667, 0.56      , 0.584     ])

In [58]:
print('RFR')
print('Mean Absolute Error:', metrics.mean_absolute_error(y_test, y_pred_rfr))  
print('Mean Squared Error:', metrics.mean_squared_error(y_test, y_pred_rfr))  
print('Root Mean Squared Error:', np.sqrt(metrics.mean_squared_error(y_test, y_pred_rfr))) 

RFR
Mean Absolute Error: 0.005443148688046647
Mean Squared Error: 4.643878129483081e-05
Root Mean Squared Error: 0.006814600596867788
