## Regression analysis Example


In [1]:
import numpy as np
import pandas as pd
import seaborn as sns

In [2]:
N = 10000
x_left = np.random.uniform(0,.5, N)
x_right = np.random.uniform(.5,1, N)
x_full = np.append(x_left, x_right)


In [3]:
def true_y(x):
    if x<.5:
        y = .3*x
    else: 
        y = -.1*x
        
    return y
y_left = np.array([true_y(x1) for x1 in x_left])
y_right = np.array([true_y(x1) for x1 in x_right])

y_full = np.array([true_y(x1) for x1 in x_full])

In [4]:
df_left = pd.DataFrame({'x': x_left, 'y': y_left})
X_left = df_left['x']
X_left = X_left.values.reshape(-1,1)
y_left = df_left['y']

df_right = pd.DataFrame({'x': x_right, 'y': y_right})
X_right = df_right['x']
X_right = X_right.values.reshape(-1,1)
y_right = df_right['y']

df_full = pd.DataFrame({'x': x_full, 'y': y_full})
X_full = df_full['x']
X_full = X_full.values.reshape(-1,1)
y_full = df_full['y']

In [5]:
import plotly.io as pio
pio.renderers.default = 'iframe'
import plotly.express as px
df_full_plot = df_full.sort_values(by=['x'])
fig = px.line(df_full_plot, x='x', y = 'y')
fig.show()

#### Linear Regression vs SGDRegressor


In [6]:
from sklearn.linear_model import LinearRegression
from sklearn import metrics
from sklearn.model_selection import train_test_split
from sklearn.linear_model import SGDRegressor

X_train, X_test, y_train, y_test = train_test_split(X_left, y_left, test_size=0.3, random_state=0)
model = LinearRegression()
model.fit(X_train, y_train)

model_partial = SGDRegressor(learning_rate='constant')
model_partial.partial_fit(X_train, y_train)

SGDRegressor(learning_rate='constant')

In [7]:
y_pred = model.predict(X_test)
y_pred_partial = model_partial.predict(X_test)

print('Accuracy of linear regression classifier on test set: {:.2f}'.format(model.score(X_test, y_test)))
print('Accuracy of incremental linear regression classifier on test set: {:.2f}'.format(model_partial.score(X_test, y_test)))

Accuracy of linear regression classifier on test set: 1.00
Accuracy of incremental linear regression classifier on test set: 0.94


In [8]:
x_left_test = np.random.uniform(0,0.50, N)
x_right_test = np.random.uniform(0.50,1.0, N)

y_left_test = np.array([true_y(x1) for x1 in x_left_test])
y_right_test = np.array([true_y(x1) for x1 in x_right_test])

df_left_test = pd.DataFrame({'x': x_left_test, 'y': y_left_test})
df_left_test = df_left_test.sort_values(by=['x'])
X_left_test = df_left_test['x']
X_left_test = X_left_test.values.reshape(-1,1)
y_left_test = df_left_test['y']


df_right_test = pd.DataFrame({'x': x_right_test, 'y': y_right_test})
df_right_test = df_right_test.sort_values(by=['x'])
X_right_test = df_right_test['x']
X_right_test = X_right_test.values.reshape(-1,1)
y_right_test = df_right_test['y']

# Model Monitoring

In [9]:
X_test = X_left_test
y_pred = model.predict(X_test)
y_pred_partial = model_partial.predict(X_test)
y_test = y_left_test
print('Accuracy of linear regression classifier on test set: {:.2f}'.format(model.score(X_test, y_test)))
print('Accuracy of incremental linear regression classifier on test set: {:.2f}'.format(model_partial.score(X_test, y_test)))

Accuracy of linear regression classifier on test set: 1.00
Accuracy of incremental linear regression classifier on test set: 0.94


In [10]:
import plotly.io as pio
pio.renderers.default = 'iframe'
import plotly.express as px
import plotly.graph_objects as go
fig = go.Figure()
fig.add_trace(go.Scatter(x=np.ravel(X_test), y=y_test,
                    mode='lines',
                    name='observed'))
fig.add_trace(go.Scatter(x=np.ravel(X_test), y=y_pred,
                    mode='lines',
                    name='predicted'))
fig.add_trace(go.Scatter(x=np.ravel(X_test), y=y_pred_partial,
                    mode='lines',
                    name='predicted_partial'))
# fig = px.line(np.ravel(X_test), y_test)
fig.show()

# Model drift


In [11]:
X_test = X_right_test
y_pred = model.predict(X_test)
y_pred_partial = model_partial.predict(X_test)
y_test = y_right_test
print('Accuracy of linear regression classifier on test set: {:.2f}'.format(model.score(X_test, y_test)))
print('Accuracy of incremental linear regression classifier on test set: {:.2f}'.format(model_partial.score(X_test, y_test)))

Accuracy of linear regression classifier on test set: -448.97
Accuracy of incremental linear regression classifier on test set: -342.85


In [12]:
import plotly.io as pio
pio.renderers.default = 'iframe'
import plotly.express as px
import plotly.graph_objects as go
fig = go.Figure()
fig.add_trace(go.Scatter(x=np.ravel(X_test), y=y_test,
                    mode='lines',
                    name='observed'))
fig.add_trace(go.Scatter(x=np.ravel(X_test), y=y_pred,
                    mode='lines',
                    name='predicted'))
fig.add_trace(go.Scatter(x=np.ravel(X_test), y=y_pred_partial,
                    mode='lines',
                    name='predicted_partial'))
# fig = px.line(np.ravel(X_test), y_test)
fig.show()

# Model  Retraining

#### What happens if N is increased?
#### What happens if it is not trained in mini batches?

In [13]:
from sklearn.linear_model import LinearRegression
from sklearn import metrics
from sklearn.model_selection import train_test_split
N1= int(N/10)
for i in range(0,N1):
    n1 = int(N*i/N1)
    n2 = int(N*(i+1)/N1)
    X_train, X_test, y_train, y_test = train_test_split(X_right[n1:n2], y_right[n1:n2], test_size=0.01, random_state=0, shuffle=False)
    model_partial.partial_fit(X_train, y_train)
    model.fit(X_train, y_train)



In [14]:
X_test = X_right_test
y_pred = model.predict(X_test)
y_pred_partial = model_partial.predict(X_test)
y_test = y_right_test
print('Accuracy of linear regression classifier on test set: {:.2f}'.format(model.score(X_test, y_test)))
print('Accuracy of incremental linear regression classifier on test set: {:.2f}'.format(model_partial.score(X_test, y_test)))

Accuracy of linear regression classifier on test set: 1.00
Accuracy of incremental linear regression classifier on test set: 0.66


In [15]:
import plotly.io as pio
pio.renderers.default = 'iframe'
import plotly.express as px
import plotly.graph_objects as go
fig = go.Figure()
fig.add_trace(go.Scatter(x=np.ravel(X_test), y=y_test,
                    mode='lines',
                    name='observed'))
fig.add_trace(go.Scatter(x=np.ravel(X_test), y=y_pred,
                    mode='lines',
                    name='predicted'))
fig.add_trace(go.Scatter(x=np.ravel(X_test), y=y_pred_partial,
                    mode='lines',
                    name='predicted_partial'))
# fig = px.line(np.ravel(X_test), y_test)
fig.show()

In [16]:
X_test = X_left_test
y_pred = model.predict(X_test)
y_pred_partial = model_partial.predict(X_test)
y_test = y_left_test
print('Accuracy of linear regression classifier on test set: {:.2f}'.format(model.score(X_test, y_test)))
print('Accuracy of incremental linear regression classifier on test set: {:.2f}'.format(model_partial.score(X_test, y_test)))

Accuracy of linear regression classifier on test set: -6.15
Accuracy of incremental linear regression classifier on test set: -9.26


In [17]:
import plotly.io as pio
pio.renderers.default = 'iframe'
import plotly.express as px
import plotly.graph_objects as go
fig = go.Figure()
fig.add_trace(go.Scatter(x=np.ravel(X_test), y=y_test,
                    mode='lines',
                    name='observed'))
fig.add_trace(go.Scatter(x=np.ravel(X_test), y=y_pred,
                    mode='lines',
                    name='predicted'))
fig.add_trace(go.Scatter(x=np.ravel(X_test), y=y_pred_partial,
                    mode='lines',
                    name='predicted_partial'))
fig.show()

## Holy grail: Continual learning?