# Advice for Applying Machine Learning

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
import matplotlib.pyplot as plt
import numpy as np
from utils import generate_data, LinearModel

import warnings
warnings.filterwarnings("ignore")

## Generating data

In [None]:
X, y, X_ideal, y_ideal = generate_data(18, 3, 0.7)

## Plotting data

In [None]:
plt.scatter(X, y, label="Data")
plt.plot(X_ideal, y_ideal, 'r', label="Ideal")
plt.legend()
plt.show()

## Splitting dataset

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.33, random_state=1)
print("X_train.shape", X_train.shape, "y_train.shape", y_train.shape)
print("X_test.shape", X_test.shape, "y_test.shape", y_test.shape)

In [None]:
plt.scatter(X_train, y_train, label="Training")
plt.scatter(X_test, y_test, label="Test")
plt.legend()
plt.show()

# Compare performance on training and test data

In [None]:
# create a model in sklearn, train on training data
degree = 10

model = LinearModel(degree)

# Training
model.fit(X_train, y_train)

# MSE

y_hat = model.predict(X_train)
mse_train = mean_squared_error(y_train, y_hat)

print(f"MSE X Train: {mse_train}")

y_hat = model.predict(X_test)
mse_test = mean_squared_error(y_test, y_hat)

print(f"MSE X Train: {mse_test}")

## Plotting

In [None]:
plt.plot(X_ideal, model.predict(X_ideal), label="degree=10")
plt.scatter(X_test, y_test,color='r',label="Test")
plt.scatter(X_train, y_train, color='b',label="Train")
plt.plot(X_ideal, y_ideal, label="ideal")
plt.legend()
plt.show()

# How to choose the right degree?

| data             | % of total | Description |
|------------------|:----------:|:---------|
| training         | 60         | Data used to tune model parameters $w$ and $b$ in training or fitting |
| cross-validation | 20         | Data used to tune other model parameters like degree of polynomial, regularization or the architecture of a neural network.|
| test             | 20         | Data used to test the model after tuning to gauge performance on new data |

In [None]:
# Generate  data
X,y, X_ideal,y_ideal = generate_data(40, 5, 0.7)
print("X.shape", X.shape, "y.shape", y.shape)

#split the data using sklearn routine 
X_train, X_, y_train, y_ = train_test_split(X,y,test_size=0.40, random_state=1)
X_cv, X_test, y_cv, y_test = train_test_split(X_,y_,test_size=0.50, random_state=1)
print("X_train.shape", X_train.shape, "y_train.shape", y_train.shape)
print("X_cv.shape", X_cv.shape, "y_cv.shape", y_cv.shape)
print("X_test.shape", X_test.shape, "y_test.shape", y_test.shape)

In [None]:
plt.scatter(X_train, y_train, color = "red",           label="train")
plt.scatter(X_cv, y_cv,       color = "blue", label="cv")
plt.scatter(X_test, y_test,   color = "orange",   label="test")
plt.legend(loc='upper left')
plt.show()

# Finding optimal degree

In [None]:
max_degree = 11
err_train = np.zeros(max_degree)    
err_cv = np.zeros(max_degree)      

y_pred = np.zeros((max_degree, len(X_ideal)))

for degree in range(1, max_degree):

    model = LinearModel(degree)
    model.fit(X_train, y_train)
    
    y_hat = model.predict(X_train)
    err_train[degree] = mean_squared_error(y_train, y_hat)

    y_hat = model.predict(X_cv)
    err_cv[degree] = mean_squared_error(y_cv, y_hat)
    
    # Just for plotting results
    
    y_hat = model.predict(X_ideal)
    y_pred[degree] = y_hat

optimal_degree = np.argmin(err_cv[1:])+1
print(optimal_degree)

In [None]:
plt.plot(np.arange(1,max_degree), err_train[1:])
plt.scatter(np.arange(1,max_degree), err_train[1:])
plt.plot(np.arange(1,max_degree), err_cv[1:])
plt.scatter(np.arange(1,max_degree), err_cv[1:])
plt.xlabel("Degree")
plt.ylabel("MSE")
plt.show()

In [None]:
# plt.plot(X_ideal, y_pred.T)
plt.plot(X_ideal, y_pred[optimal_degree])
plt.scatter(X_train, y_train)
plt.scatter(X_cv, y_cv)
plt.show()

# Tuning in lambda

$$ J(\mathbf{w},b) = 
            \frac{1}{2m}\sum_{i=0}^{m-1} ( f_{\mathbf{w},b}(\mathbf{x}^{(i)}) - y^{(i)})^2 + \frac{\lambda}{2m} \sum_{j=0}^{n-1} (\mathbf{w}_j)^2
$$


In [None]:
lambda_range = np.array([0.0, 1e-6, 1e-5, 1e-4,1e-3,1e-2, 1e-1,1,10,100])
num_steps = len(lambda_range)
degree = 10

err_train = np.zeros(num_steps)    
err_cv = np.zeros(num_steps)

y_pred = np.zeros((num_steps, len(X_ideal)))

for i in range(num_steps):
    lambda_= lambda_range[i]

    model = LinearModel(degree, lambda_)
    model.fit(X_train, y_train)
    
    y_hat = model.predict(X_train)
    err_train[i] = mean_squared_error(y_hat, y_train)

    y_hat = model.predict(X_cv)
    err_cv[i] = mean_squared_error(y_hat, y_cv)

    # Just for plotting results

    y_hat = model.predict(X_ideal)
    y_pred[i] = y_hat
    
    
optimal_reg_idx = np.argmin(err_cv) 
print(lambda_range[optimal_reg_idx])

In [None]:
# plt.plot(X_ideal, y_pred.T)
plt.plot(X_ideal, y_pred[optimal_reg_idx], color='r')
plt.scatter(X_train, y_train)
plt.scatter(X_cv, y_cv)
plt.show()

In [None]:
x = np.arange(num_steps)
plt.plot(x, err_cv, label="cv")
plt.plot(x, err_train, label="train")
plt.xticks(x, lambda_range)

plt.scatter(x, err_cv)
plt.scatter(x, err_train)
plt.legend()

plt.xlabel("Lambda")
plt.ylabel("MSE")

plt.show()