In [80]:
from scipy.io import loadmat
from sklearn import linear_model
from sklearn.preprocessing import PolynomialFeatures
from sklearn.preprocessing import StandardScaler
import plotly.graph_objects as go
from plotly import express as px
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import pathlib
import seaborn as sns
plt.rcParams['figure.figsize'] = [8, 6]
plt.rcParams['figure.dpi'] = 100

In [81]:
data = loadmat('ex5/ex5data1.mat')

In [82]:
x = data['X']
y = data['y']
Xval = data['Xval']
yval = data['yval']
xtest = data['Xtest']
ytest = data['ytest']

In [83]:
train_df = pd.DataFrame([x.reshape(-1), y.reshape(-1)]).T
train_df.columns = ['x', 'y']

In [84]:
fig = go.Figure(px.scatter(train_df, x='x', y='y'))
fig.show()

In [85]:
lr = linear_model.LinearRegression()
lr.fit(x, y)
y_pred = lr.intercept_[0] + lr.coef_[0][0] * x
train_df['y_pred_linear'] = y_pred

In [86]:
fig2 = go.Figure(fig)
trace2 = go.Scatter(x=train_df['x'],
                    y=train_df['y_pred_linear'],
                    name='Prediction values (linear)',
                    mode='lines')

fig2.add_trace(trace2)
fig2.show()

Lets try out a polynomial fit

From the graph it seems to be following a 2rd order polynomial function

In [87]:
train_df_poly = train_df[['x', 'y']]
train_df_poly['x2'], train_df_poly['x3'] = train_df_poly['x']**2, train_df_poly['x']**3

In [88]:
pr = linear_model.LinearRegression()
pr.fit(train_df_poly[['x', 'x2', 'x3']], train_df_poly[['y']])
y_pred_poly = (pr.coef_[0] * train_df_poly[['x', 'x2', 'x3']]).sum(axis=1) + pr.intercept_[0]
train_df_poly['y_pred_poly'] = y_pred_poly
train_df_poly = train_df_poly.sort_values('x')

In [89]:
fig3 = go.Figure(fig2)
trace3 = go.Scatter(x=train_df_poly['x'],
                    y=train_df_poly['y_pred_poly'],
                    name='Prediction values (poly)',
                    mode='lines')

fig3.add_trace(trace3)
fig3.show()

Lets add regularization to the model

In [155]:
pr_r = linear_model.ElasticNet(alpha=25)
pr_r.fit(train_df_poly[['x', 'x2', 'x3']], train_df_poly[['y']])
y_pred_poly = (pr_r.coef_ * train_df_poly[['x', 'x2', 'x3']]).sum(axis=1) + pr_r.intercept_[0]
train_df_poly['y_pred_poly_reg'] = y_pred_poly
train_df_poly = train_df_poly.sort_values('x')

In [156]:
fig4 = go.Figure(fig3)
trace4 = go.Scatter(x=train_df_poly['x'],
                    y=train_df_poly['y_pred_poly_reg'],
                    name='Prediction values (poly reglrzd)',
                    mode='lines')

fig4.add_trace(trace4)
fig4.show()