***Color manual***

<div class="alert alert-block alert-success">
<b>Green - Libraries</b>
</div>

<div class="alert alert-block alert-info">
<b>Blue - Information</b>
</div>

<div class="alert alert-block alert-danger">
<b>Red - Problems</b>
</div>

***

<div class="alert alert-block alert-success">
<b>Green - Libraries</b>
</div>

In [24]:
import numpy as np  
import chart_studio
import pandas as pd
import statsmodels.api as sm
import plotly.graph_objs as go
import chart_studio.plotly as py

from sklearn.linear_model import LinearRegression

chart_studio.tools.set_config_file(world_readable = True, sharing = 'public')

<div class="alert alert-block alert-info">
<b>Create data frame</b>
</div>

In [25]:
data = {'Weight': [0.9, 1.8, 2.4, 3.5, 3.9, 4.4, 5.1, 5.6, 6.3],
        'Size': [1.4, 2.6, 1.0, 3.7, 5.5, 3.2, 3.0, 4.9, 6.3]}

df = pd.DataFrame(data, columns = ['Weight', 'Size'])


df

Unnamed: 0,Weight,Size
0,0.9,1.4
1,1.8,2.6
2,2.4,1.0
3,3.5,3.7
4,3.9,5.5
5,4.4,3.2
6,5.1,3.0
7,5.6,4.9
8,6.3,6.3


<div class="alert alert-block alert-info">
<b>Linear regression</b>
</div>

In [26]:
x = df['Weight']
y = df['Size']

x = sm.add_constant(x)

model = sm.OLS(y, x, missing = 'drop')

results = model.fit()

results.summary()


kurtosistest only valid for n>=20 ... continuing anyway, n=9



0,1,2,3
Dep. Variable:,Size,R-squared:,0.613
Model:,OLS,Adj. R-squared:,0.558
Method:,Least Squares,F-statistic:,11.1
Date:,"Tue, 05 May 2020",Prob (F-statistic):,0.0126
Time:,05:45:34,Log-Likelihood:,-13.208
No. Observations:,9,AIC:,30.42
Df Residuals:,7,BIC:,30.81
Df Model:,1,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,0.5813,0.965,0.603,0.566,-1.700,2.862
Weight,0.7778,0.233,3.332,0.013,0.226,1.330

0,1,2,3
Omnibus:,0.025,Durbin-Watson:,2.111
Prob(Omnibus):,0.988,Jarque-Bera (JB):,0.254
Skew:,0.027,Prob(JB):,0.881
Kurtosis:,2.178,Cond. No.,10.5


In [27]:
# If you need to use sklearn then
# model = LinearRegression().fit(x, y)
# y_pred = model.predict(x)

# If you need to use statsmodels then
y_pred = results.predict()

In [28]:
fig = go.Figure()

fig.add_trace(go.Scatter(x = df['Weight'], y = df['Size'], 
    name = 'Actual', 
    mode = 'markers',
    marker=dict(size = 8),
    hoverinfo = 'x+y+text',
    line = dict(color = 'rgb(255, 255, 0)')))

fig.add_trace(go.Scatter(x = df['Weight'], y = y_pred, 
    name = 'Predicted', 
    mode = 'lines',
    marker=dict(size = 8),
    hoverinfo = 'x+y+text',
    line = dict(color = 'rgb(0, 255, 155)')))

fig.update_layout(title_text = 'Linear regression', 
    title_x = 0.5,
    title_y = 0.8,
    autosize = True,
    legend = dict(x = 0, y = 1.5),
    xaxis_title = 'Mouse weight',
    yaxis_title = 'Mouse size',
    paper_bgcolor = 'rgba(1,1,1,1)',
    plot_bgcolor = 'rgba(1,1,1,1)',
    hoverlabel= dict(
    font_size = 15, 
    font_family = 'Helvetica'),
    font = dict(
    family = 'Helvetica, Helvetica',
    size = 15,
    color = 'white'))

fig.update_xaxes(ticks = 'outside', 
                 tickwidth = 2, 
                 tickcolor = 'black', 
                 ticklen = 10,
                 showgrid = False,
                 zeroline = False)

fig.update_yaxes(ticks = 'outside', 
                 tickwidth = 2, 
                 tickcolor = 'black', 
                 ticklen = 10,
                 showgrid = False,
                 zeroline = False)

py.iplot(fig, filename = 'Linear regression', auto_open = False)

$$Y_i = b_0 + b_1 X_i + \epsilon_i$$ <br/>