## Scikit-learn

In [10]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np

df = pd.read_csv("../Advertising.csv", index_col=0)
print(f"Number of features {df.shape[1]-1}")
print(f"Number of features {df.shape[0]}")

df.head()

Number of features 3
Number of features 200


Unnamed: 0,TV,radio,newspaper,sales
1,230.1,37.8,69.2,22.1
2,44.5,39.3,45.1,10.4
3,17.2,45.9,69.3,9.3
4,151.5,41.3,58.5,18.5
5,180.8,10.8,58.4,12.9


In [13]:
X,y = df.drop("sales", axis=1), df["sales"]
X.shape, y.shape

((200, 3), (200,))

## Scikit-learn steps

** Steps **
1. train/test split or traing / validation / test split
2. scale dataset
- many algorithms require scaling, some don't
- which type of scaling method to use?
- scale training data using training data, scale test data using training data
3. Fit algoritm to training data
4. Predict on test data
5. Evaluation metrics on test data


### Train/Test Split

In [20]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
X_train.shape, X_test.shape, y_train.shape, y_test.shape

((140, 3), (60, 3), (140,), (60,))

### Feature scaling

Normalization (MinMaxScaling)

Compute min and max from training data and use on training and test data

Feature standardization (standard score scaling)

In [32]:
from sklearn.preprocessing import MinMaxScaler

#instantiate an object from the class MinMaxScaler()
scaler = MinMaxScaler()
print(type(scaler))

scaler.fit(X_train)
scaler.data_max_

#Data scaled to 0 to 1
scaled_X_train = scaler.transform(X_train)
scaled_X_test = scaler.transform(X_test)
print(f"Min value in X_train: {scaled_X_train.min()}")
print(f"Max value in X_train: {scaled_X_train.min()}")

print(f"Min value in X_test: {scaled_X_test.min()}")
print(f"Max value in X_test: {scaled_X_test.max()}")


<class 'sklearn.preprocessing._data.MinMaxScaler'>
Min value in X_train: 0.0
Max value in X_train: 0.0
Min value in X_test: 0.005964214711729622
Max value in X_test: 1.1302186878727631


### Algorithm - linear regression

In [37]:
from sklearn.linear_model import LinearRegression

#SVD - Singular Value Decomposition that is used for calculation pseudoinverse in OLS normal equation

model_SVD = LinearRegression()
model_SVD.fit(scaled_X_train, y_train)

#Weights
print(f"Weights (Beta_hats) {model_SVD.coef_}")
print(f"Intercept {model_SVD.intercept_}")


Weights (Beta_hats) [13.02832938  9.88465985  0.69237469]
Intercept 2.7418553248528124


### Stochastic gradient descent

In [67]:
from sklearn.linear_model import SGDRegressor

model_SGD = SGDRegressor(loss = "squared_error", learning_rate="invscaling", max_iter=100000)
model_SGD.fit(scaled_X_train, y_train)

print(f"Weights (Beta_hats) {model_SGD.coef_}")
print(f"Intercept {model_SGD.intercept_}")

Weights (Beta_hats) [11.97727943  9.01254139  1.35110619]
Intercept [3.55933714]


### Manual test

In [78]:
# Sanity check
test_sample_features = scaled_X_test[0].reshape(1,-1)
test_sample_target = y_test.values[0]

#uses the weights and intercept from the fitting
model_SGD.predict(test_sample_features)[0], model_SVD.predict(test_sample_features)[0], test_sample_target

(16.593727399810554, 16.56539629743484, 16.9)

### Evaluation

In [83]:
from sklearn.metrics import mean_absolute_error, mean_squared_error

y_pred_SVD = model_SVD.predict(scaled_X_test)
y_pred_SGD = model_SGD.predict(scaled_X_test)

mae_SVD = mean_absolute_error(y_test, y_pred_SVD)
mae_SGD = mean_absolute_error(y_test, y_pred_SGD)

mse_SVD = mean_squared_error(y_test, y_pred_SVD)
mse_SGD = mean_squared_error(y_test, y_pred_SGD)

rmse_SVD = np.sqrt(mse_SVD)
rmse_SGD = np.sqrt(mse_SGD)

print(f"SVD: MAE {mae_SVD:.2f}, MSE {mse_SVD:.2f}, RMSE {rmse_SVD:.2f}")
print(f"SGD: MAE {mae_SGD:.2f}, MSE {mse_SGD:.2f}, RMSE {rmse_SGD:.2f}")

SVD: MAE 1.51, MSE 3.80, RMSE 1.95
SGD: MAE 1.52, MSE 4.09, RMSE 2.02
