# Reliance Stock Prediction

## dependent variable = Close Price

### 1. Import required packages

In [13]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt


import chart_studio.plotly as py
import plotly.graph_objs as go
from plotly.offline import plot

#for offline plotting
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
init_notebook_mode(connected=True) 

### 2. Read the data from the source

In [14]:
df = pd.read_csv('reliance_stock_data_(1990-2023).csv')

In [15]:
df.head(10)

Unnamed: 0,Date,Price,Open,High,Low,Volume,Change%
0,Nov 03 2023,2319.7,2327.2,2334.95,2315.75,4.43M,-0.02%
1,Nov 02 2023,2320.2,2313.95,2324.3,2307.95,5.28M,0.99%
2,Nov 01 2023,2297.4,2289.15,2317.5,2275.2,5.15M,0.42%
3,Oct 31 2023,2287.9,2328.0,2328.0,2282.9,6.40M,-1.06%
4,Oct 30 2023,2312.5,2274.0,2325.0,2269.95,8.41M,2.06%
5,Oct 27 2023,2265.8,2240.0,2273.5,2235.95,5.89M,1.77%
6,Oct 26 2023,2226.5,2251.0,2258.0,2220.3,7.61M,-1.39%
7,Oct 25 2023,2257.95,2250.05,2281.2,2243.0,5.81M,-0.23%
8,Oct 23 2023,2263.2,2290.0,2306.25,2255.25,3.93M,-1.56%
9,Oct 20 2023,2299.1,2300.0,2314.7,2296.3,4.46M,-0.31%


In [16]:
df = df.rename(columns={' Open': 'Open', ' High': 'High', ' Low': 'Low', ' Volume':'Volume',' Change%':'Change%'})

df.isna().sum()

In [17]:
df = df.dropna()

In [18]:
df.isna().sum()

Date       0
Price      0
Open       0
High       0
Low        0
Volume     0
Change%    0
dtype: int64

In [19]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 8148 entries, 0 to 8150
Data columns (total 7 columns):
 #   Column   Non-Null Count  Dtype  
---  ------   --------------  -----  
 0   Date     8148 non-null   object 
 1   Price    8148 non-null   float64
 2   Open     8148 non-null   float64
 3   High     8148 non-null   float64
 4   Low      8148 non-null   float64
 5   Volume   8148 non-null   object 
 6   Change%  8148 non-null   object 
dtypes: float64(4), object(3)
memory usage: 509.2+ KB


In [20]:
# Convert the 'Date' column to the desired format
df['Date'] = pd.to_datetime(df['Date'], format='%b %d %Y').dt.strftime('%Y-%m-%d')

# Print the DataFrame
print(df)

            Date    Price     Open     High      Low Volume Change%
0     2023-11-03  2319.70  2327.20  2334.95  2315.75  4.43M  -0.02%
1     2023-11-02  2320.20  2313.95  2324.30  2307.95  5.28M   0.99%
2     2023-11-01  2297.40  2289.15  2317.50  2275.20  5.15M   0.42%
3     2023-10-31  2287.90  2328.00  2328.00  2282.90  6.40M  -1.06%
4     2023-10-30  2312.50  2274.00  2325.00  2269.95  8.41M   2.06%
...          ...      ...      ...      ...      ...    ...     ...
8146  1990-01-08     6.71     7.05     7.05     6.67      0   0.00%
8147  1990-01-05     6.71     6.37     6.71     6.37      0   3.23%
8148  1990-01-04     6.50     6.24     6.58     6.24      0  -6.07%
8149  1990-01-03     6.92     7.18     7.18     6.84      0  -4.81%
8150  1990-01-02     7.27     7.31     7.44     7.14      0  -1.62%

[8148 rows x 7 columns]


In [21]:
# Convert the 'Date' column to datetime if it's not already
df['Date'] = pd.to_datetime(df['Date'])

# Calculate the total number of days
total_days = (df['Date'].max() - df['Date'].min()).days

print(f"Total number of days: {total_days} days")


Total number of days: 12358 days


In [22]:
df.columns

Index(['Date', 'Price', 'Open', 'High', 'Low', 'Volume', 'Change%'], dtype='object')

In [23]:
df= df

In [24]:
df.drop(['Volume','Change%'], axis = 1, inplace= True)

KeyError: "[' Volume', ' Change%'] not found in axis"

### 3. To do Exploratory Data Analysis

In [None]:
df.info()

In [None]:
df.describe()

In [None]:
df.corr()

In [None]:
df.columns

In [None]:
# selected_columns = ['Date', ' Price', ' Open', ' High', ' Low']
# selected_data = df[selected_columns]

# # Create a box plot
# plt.figure(figsize=(10, 6))
# selected_data.boxplot()
# plt.title('Box Plot of Stock Data')
# plt.ylabel('Value')
# plt.xlabel('Variables')
# plt.xticks(rotation=45)
# plt.tight_layout()
# plt.show()


In [None]:
# Setting the layout for our plot
layout = go.Layout(
    title='Stock Prices of Reliance',
    xaxis=dict(
        title='Date',
        titlefont=dict(
            family='Courier New, monospace',
            size=18,
            color='#7f7f7f'
        )
    ),
    yaxis=dict(
        title='Price',
        titlefont=dict(
            family='Courier New, monospace',
            size=18,
            color='#7f7f7f'
        )
    )
)

reliance_data = [{'x':df['Date'], 'y':df['Price']}]
plot = go.Figure(data=reliance_data, layout=layout)

In [None]:
#plot(plot) #plotting offline
iplot(plot)

## 4. Data Pre- Processing

In [None]:
# Building the regression model
from sklearn.model_selection import train_test_split

#For preprocessing
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import StandardScaler

#For model evaluation
from sklearn.metrics import mean_squared_error as mse
from sklearn.metrics import r2_score

### Split the data into train and test sets

In [None]:
X = np.array(df.index).reshape(-1,1)
Y = df[' Price']
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.3, random_state=101)

### Feature scaling

In [None]:
scaler = StandardScaler().fit(X_train)

### 5. Model Building

In [None]:
from sklearn.linear_model import LinearRegression

#Creating a linear model
lm = LinearRegression()
lm.fit(X_train, Y_train)

In [None]:
#Plot actual and predicted values for train dataset
trace0 = go.Scatter(
    x = X_train.T[0],
    y = Y_train,
    mode = 'markers',
    name = 'Actual'
)
trace1 = go.Scatter(
    x = X_train.T[0],
    y = lm.predict(X_train).T,
    mode = 'lines',
    name = 'Predicted'
)
reliance_data = [trace0,trace1]
layout.xaxis.title.text = 'Day'
plot2 = go.Figure(data=reliance_data, layout=layout)

In [None]:
iplot(plot2)

## 6.Model Evaluation

In [None]:
#Calculate scores for model evaluation
scores = f'''
{'Metric'.ljust(10)}{'Train'.center(20)}{'Test'.center(20)}
{'r2_score'.ljust(10)}{r2_score(Y_train, lm.predict(X_train))}\t{r2_score(Y_test, lm.predict(X_test))}
{'MSE'.ljust(10)}{mse(Y_train, lm.predict(X_train))}\t{mse(Y_test, lm.predict(X_test))}
'''
print(scores)