## Stock Price Prediction

In [87]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error,mean_squared_error, SCORERS
import warnings
warnings.filterwarnings('ignore')

In [65]:
df = pd.read_csv('reliance_data.csv')

In [66]:
df.head()

Unnamed: 0,Date,Symbol,Series,Prev Close,Open,High,Low,Last,Close,VWAP,Volume,Turnover,Trades,Deliverable Volume,%Deliverble
0,01-01-1996,RELIANCE,EQ,204.65,205.0,206.1,203.65,,205.75,205.26,3717450,76300000000000.0,,,
1,02-01-1996,RELIANCE,EQ,205.75,205.25,206.25,202.65,,204.15,204.13,6024650,123000000000000.0,,,
2,03-01-1996,RELIANCE,EQ,204.15,207.5,216.95,205.25,,205.7,207.04,7473500,155000000000000.0,,,
3,04-01-1996,RELIANCE,EQ,205.7,203.75,204.4,201.05,,203.8,202.47,7744000,157000000000000.0,,,
4,05-01-1996,RELIANCE,EQ,203.8,203.0,203.0,200.65,,202.4,202.05,5952000,120000000000000.0,,,


In [67]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 6205 entries, 0 to 6204
Data columns (total 15 columns):
 #   Column              Non-Null Count  Dtype  
---  ------              --------------  -----  
 0   Date                6205 non-null   object 
 1   Symbol              6205 non-null   object 
 2   Series              6205 non-null   object 
 3   Prev Close          6205 non-null   float64
 4   Open                6205 non-null   float64
 5   High                6205 non-null   float64
 6   Low                 6205 non-null   float64
 7   Last                5657 non-null   float64
 8   Close               6205 non-null   float64
 9   VWAP                6205 non-null   float64
 10  Volume              6205 non-null   int64  
 11  Turnover            6205 non-null   float64
 12  Trades              2356 non-null   float64
 13  Deliverable Volume  4693 non-null   float64
 14  %Deliverble         4693 non-null   float64
dtypes: float64(11), int64(1), object(3)
memory usage: 727.3

In [68]:
df.isna().sum()

Date                     0
Symbol                   0
Series                   0
Prev Close               0
Open                     0
High                     0
Low                      0
Last                   548
Close                    0
VWAP                     0
Volume                   0
Turnover                 0
Trades                3849
Deliverable Volume    1512
%Deliverble           1512
dtype: int64

### Previous close, Open and Close Columns have no null values. We will proceed with these columns. Since all the columns are on same scale, we use the data as it is.

In [69]:
open_df=df.iloc[:,3:5]

In [70]:
closed_df = df.iloc[:,8:9]

In [71]:
x_train, x_test,y_train,y_test = train_test_split(open_df,closed_df,test_size=.3,random_state=1)
print("Trainig output values: \n", y_train)
print("Test output values: \n", y_test)

Trainig output values: 
         Close
1922   428.70
1298   399.40
4906   872.40
2860  1691.30
5730  1121.00
...       ...
905    174.65
5192  1051.20
3980   755.70
235    183.15
5157  1017.65

[4343 rows x 1 columns]
Test output values: 
         Close
1050   234.85
3407  1974.30
6113  2004.00
3532   978.95
3584  1016.55
...       ...
5876  1184.35
4682   945.70
4402   852.90
2361   555.20
3578  1032.80

[1862 rows x 1 columns]


In [72]:
lr=LinearRegression()

In [73]:
lr.fit(x_train,y_train)

In [74]:
y_pred = lr.predict(x_test)

In [114]:
r2_sq=lr.score(x_train,y_train)
r2_sq

0.9982855734869923

In [97]:
mae = mean_absolute_error(y_test,y_pred)
mae

12.746168066862667

In [101]:
rmse = mean_squared_error(y_test,y_pred,squared=False)
rmse

22.34640083572042

### We will try Gradient Descent to check for better convergence

In [102]:
from sklearn.linear_model import SGDRegressor
from sklearn.model_selection import GridSearchCV

In [103]:
sgd = SGDRegressor(max_iter=200)

In [139]:
penalty=['l1','l2','elasticnet']    
alpha=[0.1,0.001,0.0001]
l1_ratio=[0.15,0.01,0.001]
eta0=[0.1,0.001,0.0001]
learning_rate=['constant', 'adaptive']

In [140]:
param_dist = dict(penalty=penalty,alpha=alpha,l1_ratio=l1_ratio,eta0=eta0,learning_rate=learning_rate)

In [141]:
gridSV=GridSearchCV(estimator=sgd,param_grid=param_dist,n_jobs=-1,verbose=3)

In [142]:
gridSV.fit(x_train,y_train)

Fitting 5 folds for each of 162 candidates, totalling 810 fits


In [143]:
gridSV.best_estimator_

In [144]:
y_pred1 = gridSV.predict(x_test)

In [145]:
rmse1=mean_squared_error(y_test,y_pred1)
rmse1

7.21067267357892e+23