### Importing neccessary libraries 

In [284]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor

### Uploading data

In [285]:
data = pd.read_csv(r"1729258-1613615-Stock_Price_data_set_(1).csv")

In [286]:
data.head()

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume
0,2018-02-05,262.0,267.899994,250.029999,254.259995,254.259995,11896100
1,2018-02-06,247.699997,266.700012,245.0,265.720001,265.720001,12595800
2,2018-02-07,266.579987,272.450012,264.329987,264.559998,264.559998,8981500
3,2018-02-08,267.079987,267.619995,250.0,250.100006,250.100006,9306700
4,2018-02-09,253.850006,255.800003,236.110001,249.470001,249.470001,16906900


In [287]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1009 entries, 0 to 1008
Data columns (total 7 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   Date       1009 non-null   object 
 1   Open       1009 non-null   float64
 2   High       1009 non-null   float64
 3   Low        1009 non-null   float64
 4   Close      1009 non-null   float64
 5   Adj Close  1009 non-null   float64
 6   Volume     1009 non-null   int64  
dtypes: float64(5), int64(1), object(1)
memory usage: 55.3+ KB


In [288]:
data.count()

Date         1009
Open         1009
High         1009
Low          1009
Close        1009
Adj Close    1009
Volume       1009
dtype: int64

In [289]:
data.isna().sum()

Date         0
Open         0
High         0
Low          0
Close        0
Adj Close    0
Volume       0
dtype: int64

### Converting Date from object to datetime data type and splitting Date into separate columns 

In [290]:
data['Date'] = pd.to_datetime(data['Date'])
data.dtypes

Date         datetime64[ns]
Open                float64
High                float64
Low                 float64
Close               float64
Adj Close           float64
Volume                int64
dtype: object

In [291]:
data['Date'].dt.year.unique()

array([2018, 2019, 2020, 2021, 2022], dtype=int64)

In [292]:
data['Day'] = data['Date'].dt.day
data['Month'] = data['Date'].dt.month
data['Year'] = data['Date'].dt.year

data.head()

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume,Day,Month,Year
0,2018-02-05,262.0,267.899994,250.029999,254.259995,254.259995,11896100,5,2,2018
1,2018-02-06,247.699997,266.700012,245.0,265.720001,265.720001,12595800,6,2,2018
2,2018-02-07,266.579987,272.450012,264.329987,264.559998,264.559998,8981500,7,2,2018
3,2018-02-08,267.079987,267.619995,250.0,250.100006,250.100006,9306700,8,2,2018
4,2018-02-09,253.850006,255.800003,236.110001,249.470001,249.470001,16906900,9,2,2018


In [293]:
data.drop(['Date'], axis = 1, inplace = True)
data.head()

Unnamed: 0,Open,High,Low,Close,Adj Close,Volume,Day,Month,Year
0,262.0,267.899994,250.029999,254.259995,254.259995,11896100,5,2,2018
1,247.699997,266.700012,245.0,265.720001,265.720001,12595800,6,2,2018
2,266.579987,272.450012,264.329987,264.559998,264.559998,8981500,7,2,2018
3,267.079987,267.619995,250.0,250.100006,250.100006,9306700,8,2,2018
4,253.850006,255.800003,236.110001,249.470001,249.470001,16906900,9,2,2018


### Building machine learning model

In [294]:
temp_col = data.columns.to_list()

new_col = temp_col[:3] + temp_col[4:]
new_col.append(temp_col[3])

data = data.reindex(columns = new_col)
data.head()

Unnamed: 0,Open,High,Low,Adj Close,Volume,Day,Month,Year,Close
0,262.0,267.899994,250.029999,254.259995,11896100,5,2,2018,254.259995
1,247.699997,266.700012,245.0,265.720001,12595800,6,2,2018,265.720001
2,266.579987,272.450012,264.329987,264.559998,8981500,7,2,2018,264.559998
3,267.079987,267.619995,250.0,250.100006,9306700,8,2,2018,250.100006
4,253.850006,255.800003,236.110001,249.470001,16906900,9,2,2018,249.470001


In [295]:
scaler = StandardScaler()
data = scaler.fit_transform(data)

data[0]

array([-1.44777164, -1.4414654 , -1.51014126, -1.52204669,  0.7917907 ,
       -1.22417332, -1.32500464, -1.38099829, -1.52204669])

In [296]:
x = data[:, :-1]
y = data[:, -1]

In [297]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.1, random_state = 0)

### Implementing Linear Regression

In [298]:
model = LinearRegression()
model.fit(x_train, y_train)

In [299]:
y_pred = model.predict(x_test)

In [300]:
def metrics(y_true, y_pred):
    print(f'RMSE', mean_squared_error(y_true, y_pred) ** 0.5)
    print(f'R_square value:', r2_score(y_true, y_pred))

def accuracy(y_true, y_pred):
    errors = abs(y_true - y_pred)
    mape = 100 * np.mean(errors/y_true)
    accuracy = 100 - mape
    return accuracy

In [301]:
metrics(y_test, y_pred)

RMSE 6.288279333906807e-16
R_square value: 1.0


In [302]:
accuracy(y_test, y_pred)

99.99999999999999

### Implementing Random Forest

In [307]:
model_random_forest = RandomForestRegressor(n_estimators = 500, min_samples_split = 3)
model_random_forest.fit(x_train, y_train)

In [304]:
pred_rf = model_random_forest.predict(x_test)

In [305]:
metrics(y_test, pred_rf)

RMSE 0.008670908382360796
R_square value: 0.9999163335382636


In [306]:
accuracy(y_test, pred_rf)

99.9040085274132