In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

import chart_studio.plotly as py
import plotly.graph_objs as go
from plotly.offline import plot

#for offline plotting
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
init_notebook_mode(connected=True) 

# Building the regression model
from sklearn.model_selection import train_test_split

#For preprocessing
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import StandardScaler

#For model evaluation
from sklearn.metrics import mean_squared_error as mse
from sklearn.metrics import r2_score

from sklearn.linear_model import LinearRegression

In [2]:
tesla = pd.read_csv('TSLA.csv')
tesla.head()

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume
0,2010-06-29,1.266667,1.666667,1.169333,1.592667,1.592667,281494500
1,2010-06-30,1.719333,2.028,1.553333,1.588667,1.588667,257806500
2,2010-07-01,1.666667,1.728,1.351333,1.464,1.464,123282000
3,2010-07-02,1.533333,1.54,1.247333,1.28,1.28,77097000
4,2010-07-06,1.333333,1.333333,1.055333,1.074,1.074,103003500


In [3]:
tesla.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3383 entries, 0 to 3382
Data columns (total 7 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   Date       3383 non-null   object 
 1   Open       3383 non-null   float64
 2   High       3383 non-null   float64
 3   Low        3383 non-null   float64
 4   Close      3383 non-null   float64
 5   Adj Close  3383 non-null   float64
 6   Volume     3383 non-null   int64  
dtypes: float64(5), int64(1), object(1)
memory usage: 185.1+ KB


In [4]:
tesla['Date'] = pd.to_datetime(tesla['Date'])

In [5]:
print(f'Dataframe contains stock prices between {tesla.Date.min()} {tesla.Date.max()}') 
print(f'Total days = {(tesla.Date.max()  - tesla.Date.min()).days} days')

Dataframe contains stock prices between 2010-06-29 00:00:00 2023-12-05 00:00:00
Total days = 4907 days


In [6]:
tesla.describe()

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume
count,3383,3383.0,3383.0,3383.0,3383.0,3383.0,3383.0
mean,2017-03-17 02:29:24.351167744,69.59592,71.139907,67.934383,69.578123,69.578123,96727170.0
min,2010-06-29 00:00:00,1.076,1.108667,0.998667,1.053333,1.053333,1777500.0
25%,2013-11-05 12:00:00,10.422334,10.824666,10.21,10.544,10.544,45749250.0
50%,2017-03-17 00:00:00,16.866667,17.132668,16.625999,16.879333,16.879333,80989500.0
75%,2020-07-27 12:00:00,99.818333,102.410663,97.599998,99.872333,99.872333,123417600.0
max,2023-12-05 00:00:00,411.470001,414.496674,405.666656,409.970001,409.970001,914082000.0
std,,101.016818,103.287828,98.520375,100.941653,100.941653,80229000.0


In [7]:
# Setting the layout for our plot
layout = go.Layout(
    title='Stock Prices of Tesla',
    xaxis=dict(
        title='Date',
        titlefont=dict(
            family='Courier New, monospace',
            size=18,
            color='#7f7f7f'
        )
    ),
    yaxis=dict(
        title='Price',
        titlefont=dict(
            family='Courier New, monospace',
            size=18,
            color='#7f7f7f'
        )
    )
)

tesla_data = [{'x':tesla['Date'], 'y':tesla['Close']}]
plot = go.Figure(data=tesla_data, layout=layout)

In [8]:
#plot(plot) #plotting offline
iplot(plot)

In [9]:
#Split the data into train and test sets
X = np.array(tesla.index).reshape(-1,1)
Y = tesla['Close']
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.3, random_state=101)

In [10]:
# Feature scaling
scaler = StandardScaler().fit(X_train)

In [11]:
#Creating a linear model
lm = LinearRegression()
lm.fit(X_train, Y_train)

In [12]:
#Plot actual and predicted values for train dataset
trace0 = go.Scatter(
    x = X_train.T[0],
    y = Y_train,
    mode = 'markers',
    name = 'Actual'
)
trace1 = go.Scatter(
    x = X_train.T[0],
    y = lm.predict(X_train).T,
    mode = 'lines',
    name = 'Predicted'
)
tesla_data = [trace0,trace1]
layout.xaxis.title.text = 'Day'
plot2 = go.Figure(data=tesla_data, layout=layout)

In [13]:
iplot(plot2)

In [14]:
#Calculate scores for model evaluation
scores = f'''
{'Metric'.ljust(10)}{'Train'.center(20)}{'Test'.center(20)}
{'r2_score'.ljust(10)}{r2_score(Y_train, lm.predict(X_train))}\t{r2_score(Y_test, lm.predict(X_test))}
{'MSE'.ljust(10)}{mse(Y_train, lm.predict(X_train))}\t{mse(Y_test, lm.predict(X_test))}
'''
print(scores)


Metric           Train                Test        
r2_score  0.5945107637044718	0.5799120344296581
MSE       4127.368305434119	4284.6718211374855

