In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
#%matplotlib inline enables the drawing of matplotlib figures in the IPython environment

import chart_studio.plotly as py
import plotly.graph_objs as go
from plotly.offline import plot

#for offline plotting
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
init_notebook_mode(connected=True) 

In [None]:
# Reading input data set
dataset = pd.read_csv('G:\Dattathreya\PROJECT\DATA SETS\INR=X.csv')
dataset.head()
# returns top 5 rows from the data set

In [None]:
dataset.info()
# to find no.of colums in the dataset and also returns null values

In [None]:
# changing date to date time function using pandas and datetime()
dataset['Date'] = pd.to_datetime(dataset['Date'])

# To print the time period of data and number of days.
# using formatted string litreal (f' ')
print(
    f'Dataframe contains stock prices between {dataset.Date.min()} {dataset.Date.max()}')
print(
    f'Total no.of days of data collected = {(dataset.Date.max() - dataset.Date.min()).days} days')

In [None]:
# describe function gives the minimum, maximum, mean, standard deviation, and quatiles of the data
dataset.describe()

In [None]:
# Arranging the layout for our plot(graph)
layout = go.Layout(
    title='Stock Prices of Data',
    xaxis=dict(
        title='Date',
        titlefont=dict(
            family='Courier New, monospace',
            size=18,
            color='#7f7f7f'
        )
    ),
    yaxis=dict(
        title='Price',
        titlefont=dict(
            family='Courier New, monospace',
            size=18,
            color='#7f7f7f'
        )
    )
)

dataset_data = [{'x': dataset['Date'], 'y':dataset['Close']}]
plot = go.Figure(data=dataset_data, layout=layout)

# ploting the graph using iplot() function
# plot(iplot)
# plotting offline
iplot(plot)

In [None]:
# Building the regression model
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression

#For preprocessing
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import StandardScaler

#for model evaluation and finding accuracy
from sklearn.metrics import mean_squared_error as mse
from sklearn.metrics import r2_score

# Split the data into train and test sets
X = np.array(dataset.index).reshape(-1, 1)
Y = dataset['Close']
X_train, X_test, Y_train, Y_test = train_test_split(
    X, Y, test_size=0.3, random_state=101)

# Creating a linear model
lrm = LinearRegression()
lrm.fit(X_train, Y_train)

In [None]:
# Plot actual and predicted values for train dataset
trainact = go.Scatter(
    x=X_train.T[0],
    y=Y_train,
    mode='markers',
    name='Actual'
)
trainpred = go.Scatter(
    x=X_train.T[0],
    y=lrm.predict(X_train).T,
    mode='lines',
    name='Predicted'
)
dataset_data = [trainact, trainpred]
layout.xaxis.title.text = 'Day'
plot2 = go.Figure(data=dataset_data, layout=layout)

iplot(plot2)

In [None]:
#Calculate scores for model evaluation
scores = f'''
{'Metric'.ljust(10)}{'Train'.center(20)}{'Test'.center(20)}
{'r2_score'.ljust(10)}{r2_score(Y_train, lrm.predict(X_train))}\t{r2_score(Y_test, lrm.predict(X_test))}
{'MSE'.ljust(10)}{mse(Y_train, lrm.predict(X_train))}\t{mse(Y_test, lrm.predict(X_test))}
'''
print(scores)