<a href="https://colab.research.google.com/github/ImTeddyGraham/Data-ML-Projects/blob/main/Linear_Regression_And_OLS.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import numpy as np

import pandas as pd

from sklearn.pipeline import Pipeline
from sklearn.linear_model import SGDRegressor
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

In [2]:
# HELPER FUNCTIONS 
# Mean Error
def Mean_Error(y_true, y_pred, k=2):
  return (1/y_true.shape[0])*sum(np.power(np.abs(y_true-y_pred), k))

# R squared coefficient
def R2_score(y_true, y_pred):
  y_mean = np.full(y_true.shape, y_true.mean())
  model_MSE = Mean_Error(y_true, y_pred)
  baseline_MSE = Mean_Error(y_true, y_mean)
  return 1 - (model_MSE/baseline_MSE)

In [3]:
# DATA PREPROCESSING 

In [4]:
# Load Dataset 
X = pd.read_csv('https://archive.ics.uci.edu/ml/machine-learning-databases/wine/wine.data')

In [5]:
# Add missing feature names
X.columns = ['Class', 'Alcohol', 'Malic acid', 'Ash', 'Alcalinity of ash',
                   'Magnesium', 'Total phenols', 'Flavanoids',
                   'Nonflavanoid phenols', 'Proanthocyanins', 'Color intensity',
                   'Hue', 'OD280/OD315 of diluted wines', 'Proline'] 

In [6]:
# Create target vector
y = X.pop('Class')

In [7]:
# Splits data into test (20%) and training (80%) sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=69)

In [8]:
# MODEL CREATION AND TRAINING

# ORDINARY LEAST SQUARES 

In [9]:
# Matrix Method For Least Square Approximate Solution
# remember we must add a column of 1's to our data matrix so we can have a constant term (intercept/bias)

# w_hat = (X.T @ X)^(-1) @ X.T @ y
def matrix_OLS(A, y):
  new_A = np.hstack((np.ones((A.shape[0],1)), A))
  return np.linalg.inv(new_A.T @ new_A) @ new_A.T @ y

In [10]:
# Stores weights for least squares approx soliution
OLS_weights = matrix_OLS(X_train, y_train)

In [11]:
# Obtains predictions for training and test data y_hat = X @ w_hat
OLS_test_pred = np.hstack((np.ones((X_test.shape[0], 1)), X_test)) @ OLS_weights
OLS_train_pred = np.hstack((np.ones((X_train.shape[0], 1)), X_train)) @ OLS_weights

In [12]:
# Training data metrics 
OLS_training_MAE = Mean_Error(y_train, OLS_train_pred, k=1)
OLS_training_MSE = Mean_Error(y_train, OLS_train_pred)
OLS_training_R2 = R2_score(y_train, OLS_train_pred)

# Test data metrics
OLS_test_MAE = Mean_Error(y_test, OLS_test_pred, k=1)
OLS_test_MSE = Mean_Error(y_test, OLS_test_pred)
OLS_test_R2 = R2_score(y_test, OLS_test_pred)

# SGD LINEAR REGRESSION

In [13]:
# Creates a pipeline with linear regression model and scales inputs 
pipe = Pipeline([('scaler', StandardScaler()), ('clf', SGDRegressor())])

In [14]:
# Fits model to test data
pipe.fit(X_train, y_train)

Pipeline(steps=[('scaler', StandardScaler()), ('clf', SGDRegressor())])

In [15]:
# Creates another linear regression model. This model takes in unscaled features
reg = SGDRegressor()

In [16]:
# Fits model to the same training set
reg.fit(X_train, y_train)

SGDRegressor()

In [17]:
# MODEL USING STANDARD SCALER 

scaled_weights = np.concatenate([pipe['clf'].intercept_, pipe['clf'].coef_])

# training data metrics 
test_MAE_scaled = Mean_Error(y_test, pipe.predict(X_test), k=1)
test_MSE_scaled = Mean_Error(y_test, pipe.predict(X_test))
test_R2_scaled = R2_score(y_test, pipe.predict(X_test))

# test data metrics
train_MAE_scaled = Mean_Error(y_train, pipe.predict(X_train), k=1)
train_MSE_scaled = Mean_Error(y_train, pipe.predict(X_train))
train_R2_scaled = R2_score(y_train, pipe.predict(X_train))

In [18]:
# MODEL USING UNSCALED INPUTS

unscaled_weights = np.concatenate([reg.intercept_, reg.coef_])

# training data metrics 
test_MAE_unscaled = Mean_Error(y_test, reg.predict(X_test), k=1)
test_MSE_unscaled = Mean_Error(y_test, reg.predict(X_test))
test_R2_unscaled = R2_score(y_test, reg.predict(X_test))

# test data metrics
train_MAE_unscaled = Mean_Error(y_train, reg.predict(X_train), k=1)
train_MSE_unscaled = Mean_Error(y_train, reg.predict(X_train))
train_R2_unscaled = R2_score(y_train, reg.predict(X_train))

In [19]:
# Model summaries

print(f"OLS MODEL TRAINING METRICS:\nMAE = {OLS_training_MAE:.4f}\nMSE = {OLS_training_MSE:.4f}\nR^2 = {OLS_training_R2:.4f}"\
      f"\n\nOLS MODEL TEST METRICS:\nMAE = {OLS_test_MAE:.4f}\nMSE = {OLS_test_MSE:.4f}\nR^2 = {OLS_test_R2:.4f}\n")
print(f"OLS WEIGHTS:\n{OLS_weights}\n\n\n")

print(f"SCALED MODEL TRAINING METRICS:\nMAE = {train_MAE_scaled:.4f}\nMSE = {train_MSE_scaled:.4f}\nR^2 = {train_R2_scaled:.4f}"\
      f"\n\nSCALED MODEL TEST METRICS:\nMAE = {test_MAE_scaled:.4f}\nMSE = {test_MSE_scaled:.4f}\nR^2 = {test_R2_scaled:.4f}\n")
print(f"MODEL WEIGHTS:\n{scaled_weights}\n\n\n")

print(f"UNSCALED MODEL TRAINING METRICS:\nMAE = {train_MAE_unscaled}\nMSE = {train_MSE_unscaled}\nR^2 = {train_R2_unscaled}"\
      f"\n\nUNSCALED MODEL TEST METRICS:\nMAE = {test_MAE_unscaled}\nMSE = {test_MSE_unscaled}\nR^2 = {test_R2_unscaled}\n")
print(f"MODEL WEIGHTS:\n{unscaled_weights}")

OLS MODEL TRAINING METRICS:
MAE = 0.1933
MSE = 0.0606
R^2 = 0.8977

OLS MODEL TEST METRICS:
MAE = 0.2034
MSE = 0.0614
R^2 = 0.8991

OLS WEIGHTS:
[ 4.59635051e+00 -1.33215978e-01  2.62970629e-02 -6.57439762e-02
  3.89027154e-02 -6.51032673e-04  1.64831734e-01 -3.93600926e-01
 -4.56859176e-01  4.34239551e-02  7.27289200e-02 -8.33029804e-02
 -2.83815825e-01 -7.28661195e-04]



SCALED MODEL TRAINING METRICS:
MAE = 0.2050
MSE = 0.0663
R^2 = 0.8881

SCALED MODEL TEST METRICS:
MAE = 0.1924
MSE = 0.0568
R^2 = 0.9066

MODEL WEIGHTS:
[ 1.92429883e+00 -1.09065327e-01  3.65737361e-02 -4.78187505e-02
  1.48722403e-01  5.60334030e-03 -8.37373884e-03 -2.28685027e-01
 -2.88633838e-02  4.71843189e-04  1.48880952e-01 -5.63074654e-02
 -2.06125998e-01 -1.98180721e-01]



UNSCALED MODEL TRAINING METRICS:
MAE = 170293835911665.25
MSE = 3.362996898876712e+28
R^2 = -5.675699604971798e+28

UNSCALED MODEL TEST METRICS:
MAE = 178693445270631.4
MSE = 3.6653409331279964e+28
R^2 = -6.028276458545539e+28

MODEL WEIG