## 5.1

In [9]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.linear_model import Ridge as SklearnRidge
from sklearn.metrics import mean_absolute_percentage_error


In [10]:
data = pd.read_csv('../ToyotaCorolla.csv')

df = data.copy()



In [11]:
X = df[['KM', 'Weight', 'Age_08_04']]
y = df['Price']

X_train, X_test, y_train, y_test = train_test_split(X , y, shuffle= True, random_state=42, test_size=0.2)

In [12]:

class LassoCoordinateDescent:
    def __init__(self, lambda_=0.1, max_iter=1000, tol=1e-4):
        self.lambda_ = lambda_
        self.max_iter = max_iter
        self.tol = tol
        self.coef_ = None
        self.scaler_ = None
    
    def soft_thresholding(self, rho, lambda_):
        if rho < -lambda_:
            return rho + lambda_
        elif rho > lambda_:
            return rho - lambda_
        else:
            return 0
    
    def fit(self, X, y):
        # Standardize the features
        self.scaler_ = StandardScaler()
        X_scaled = self.scaler_.fit_transform(X)
        
        # Initialize coefficients
        n, d = X_scaled.shape
        self.coef_ = np.zeros(d)
        
        for iteration in range(self.max_iter):
            coef_old = self.coef_.copy()
            
            for j in range(d):
                X_j = X_scaled[:, j]
                residual = y - X_scaled @ self.coef_ + self.coef_[j] * X_j
                rho = X_j.T @ residual
                
                # Update coefficient for feature j using soft-thresholding
                self.coef_[j] = self.soft_thresholding(rho / (X_j.T @ X_j), self.lambda_)
            
            # Check for convergence
            if np.max(np.abs(self.coef_ - coef_old)) < self.tol:
                print(f'Converged after {iteration} iterations.')
                break
    
    def predict(self, X):
        # Standardize the test data using the scaler fitted on training data
        X_scaled = self.scaler_.transform(X)
        
        # Compute predictions
        return X_scaled @ self.coef_

    def mean_absolute_percentage_error(self, y_true, y_pred):
        """
        Compute the Mean Absolute Percentage Error (MAPE)
        """
        return np.mean(np.abs((y_true - y_pred) / y_true)) * 100

    def score(self, X_test, y_test):
        """
        Compute the MAPE on the test set
        """
        y_pred = self.predict(X_test)
        return self.mean_absolute_percentage_error(y_test, y_pred)


# Example usage:

# Assume you've split your data into training and testing sets
# X_train, X_test, y_train, y_test = ...

# Create a Lasso model
lambda_ = 0.1  # Regularization strength
lasso = LassoCoordinateDescent(lambda_=lambda_)

# Fit the model to the training data
lasso.fit(X_train, y_train)

# Predict on the test data
y_pred = lasso.predict(X_test)
print('Predicted prices:', y_pred)

# Evaluate the model using MAPE
mape = lasso.score(X_test, y_test)
print('Mean Absolute Percentage Error on test set:', mape)


Converged after 22 iterations.
Predicted prices: [ 8.37938566e+02 -2.09182131e+03 -5.98711838e+02 -2.02708390e+03
 -7.97637371e+02 -3.22406476e+03 -2.26956277e+03 -2.56862115e+03
  3.15083889e+03  2.31475333e+03 -1.68600436e+03 -1.56099518e+03
  2.06830864e+03  2.57019566e+03 -1.30765158e+03 -2.06099337e+03
  1.71650520e+03  6.97568004e+03 -3.95600156e+03 -1.73171557e+03
  1.84970812e+03  7.02686782e+03 -3.06405610e+03 -4.00459346e+01
 -3.08908516e+03  5.17143392e+03 -8.04840632e+02 -3.97676402e+03
  5.02295306e+03  3.72262277e+03 -2.90237347e+03 -1.03161893e+03
 -2.43641731e+03 -8.82120472e+02 -6.59314289e+02 -3.00184255e+03
 -2.15116550e+03 -1.01819966e+03  5.70134346e+03 -1.68791287e+03
 -1.02862529e+03 -1.39316010e+03 -3.11817878e+03 -5.36228134e+03
 -3.58522236e+03  6.13567272e+03 -1.55272001e+03 -3.14718740e+02
 -1.52014827e+03  2.74866226e+03  2.22527147e+03 -3.98226159e+03
  4.10078744e+03  5.43939127e+03 -4.67524176e+03 -3.34286740e+03
 -5.42288873e+02 -2.04871126e+02  5.17895

In [13]:
class RidgeRegression:
    def __init__(self, lambda_=1.0):
        self.lambda_ = lambda_  # Regularization strength
        self.coef_ = None
        self.scaler_ = None
    
    def fit(self, X, y):
        # Standardize the features
        self.scaler_ = StandardScaler()
        X_scaled = self.scaler_.fit_transform(X)
        
        # Solve the normal equation (X'X + lambda*I) * beta = X'y
        n, d = X_scaled.shape
        I = np.eye(d)  # Identity matrix of size d
        XTX = X_scaled.T @ X_scaled
        XTy = X_scaled.T @ y
        
        # Solve for beta (ridge coefficients)
        self.coef_ = np.linalg.solve(XTX + self.lambda_ * I, XTy)
    
    def predict(self, X):
        # Standardize the test data using the same scaler as training data
        X_scaled = self.scaler_.transform(X)
        
        # Compute predictions
        return X_scaled @ self.coef_

    def score(self, X_test, y_test):
        """
        Compute the Mean Squared Error on the test set
        """
        y_pred = self.predict(X_test)
        return np.mean((y_test - y_pred) ** 2)

    def mean_absolute_percentage_error(self, y_true, y_pred):
        """
        Compute the Mean Absolute Percentage Error (MAPE)
        """
        return np.mean(np.abs((y_true - y_pred) / y_true)) * 100

# Create a Ridge regression model
ridge = RidgeRegression(lambda_=1.0)

# Fit the model
ridge.fit(X_train, y_train)

# Predict on the test set
y_pred = ridge.predict(X_test)
print('Predicted prices:', y_pred)

# Evaluate the model
mse = ridge.score(X_test, y_test)
print('Mean Squared Error on test set:', mse)

# Evaluate using MAPE
mape = ridge.mean_absolute_percentage_error(y_test, y_pred)
print('Mean Absolute Percentage Error on test set:', mape)

Predicted prices: [ 8.37559741e+02 -2.09063528e+03 -5.98709753e+02 -2.02548063e+03
 -7.96529378e+02 -3.22158228e+03 -2.26852891e+03 -2.56636204e+03
  3.14847864e+03  2.31375190e+03 -1.68482343e+03 -1.55961740e+03
  2.06708689e+03  2.56814825e+03 -1.30725298e+03 -2.06008173e+03
  1.71476123e+03  6.97223887e+03 -3.95470251e+03 -1.73000559e+03
  1.84794725e+03  7.02289323e+03 -3.06169172e+03 -4.01301002e+01
 -3.08801976e+03  5.16901841e+03 -8.04609644e+02 -3.97531951e+03
  5.02035263e+03  3.72046906e+03 -2.90069380e+03 -1.03103394e+03
 -2.43435008e+03 -8.81852393e+02 -6.58965723e+02 -2.99991457e+03
 -2.15037311e+03 -1.01782778e+03  5.69807036e+03 -1.68758536e+03
 -1.02779830e+03 -1.39215889e+03 -3.11574600e+03 -5.36103271e+03
 -3.58333177e+03  6.13216392e+03 -1.55192940e+03 -3.14506992e+02
 -1.51934717e+03  2.74766461e+03  2.22404014e+03 -3.97974964e+03
  4.09815979e+03  5.43720385e+03 -4.67389789e+03 -3.34066875e+03
 -5.42444853e+02 -2.05779618e+02  5.17678466e+03  5.54103433e+03
  1.511

In [15]:

# Custom Ridge Regression implementation
ridge_custom = RidgeRegression(lambda_=1.0)
ridge_custom.fit(X_train, y_train)
beta_custom = ridge_custom.coef_

# Sklearn Ridge Regression
ridge_sklearn = SklearnRidge(alpha=1.0, fit_intercept=False)
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
ridge_sklearn.fit(X_train_scaled, y_train)
beta_sklearn = ridge_sklearn.coef_

# Compare coefficients
print('Custom Ridge Coefficients:\n', beta_custom)
print('Sklearn Ridge Coefficients:\n', beta_sklearn)

# Predictions and comparison
y_pred_custom = ridge_custom.predict(X_test)
y_pred_sklearn = ridge_sklearn.predict(scaler.transform(X_test))

# Compare predictions
print('Custom Ridge Predictions:\n', y_pred_custom)
print('Sklearn Ridge Predictions:\n', y_pred_sklearn)

# Evaluate both models (MSE)
mape_custom = mean_absolute_percentage_error(y_test, y_pred_custom)
mape_sklearn = mean_absolute_percentage_error(y_test, y_pred_sklearn)

print('Custom Ridge Mape:', mape_custom)
print('Sklearn Ridge Mape:', mape_sklearn)

Custom Ridge Coefficients:
 [ -927.87484004  1050.44594184 -2211.42339228]
Sklearn Ridge Coefficients:
 [ -927.87484004  1050.44594184 -2211.42339228]
Custom Ridge Predictions:
 [ 8.37559741e+02 -2.09063528e+03 -5.98709753e+02 -2.02548063e+03
 -7.96529378e+02 -3.22158228e+03 -2.26852891e+03 -2.56636204e+03
  3.14847864e+03  2.31375190e+03 -1.68482343e+03 -1.55961740e+03
  2.06708689e+03  2.56814825e+03 -1.30725298e+03 -2.06008173e+03
  1.71476123e+03  6.97223887e+03 -3.95470251e+03 -1.73000559e+03
  1.84794725e+03  7.02289323e+03 -3.06169172e+03 -4.01301002e+01
 -3.08801976e+03  5.16901841e+03 -8.04609644e+02 -3.97531951e+03
  5.02035263e+03  3.72046906e+03 -2.90069380e+03 -1.03103394e+03
 -2.43435008e+03 -8.81852393e+02 -6.58965723e+02 -2.99991457e+03
 -2.15037311e+03 -1.01782778e+03  5.69807036e+03 -1.68758536e+03
 -1.02779830e+03 -1.39215889e+03 -3.11574600e+03 -5.36103271e+03
 -3.58333177e+03  6.13216392e+03 -1.55192940e+03 -3.14506992e+02
 -1.51934717e+03  2.74766461e+03  2.224040

In [18]:
class RidgeGradientDescent:
    def __init__(self, lambda_=1.0, alpha=0.01, max_iter=1000, tol=1e-6):
        self.lambda_ = lambda_  # Regularization strength
        self.alpha = alpha  # Learning rate
        self.max_iter = max_iter  # Maximum number of iterations
        self.tol = tol  # Tolerance for stopping criterion
        self.coef_ = None  # Coefficients (beta)
        self.scaler_ = None  # For feature scaling
    
    def fit(self, X, y):
        # Standardize the features
        self.scaler_ = StandardScaler()
        X_scaled = self.scaler_.fit_transform(X)
        
        n, d = X_scaled.shape
        self.coef_ = np.zeros(d)  # Initialize coefficients
        
        for iteration in range(self.max_iter):
            # Compute predictions
            y_pred = X_scaled @ self.coef_
            
            # Compute the gradient
            gradient = -(X_scaled.T @ (y - y_pred)) / n + (self.lambda_ / n) * self.coef_
            
            # Update coefficients (gradient descent step)
            self.coef_ -= self.alpha * gradient
            
            # Check convergence (if the norm of the gradient is small enough)
            if np.linalg.norm(gradient, ord=2) < self.tol:
                print(f'Converged after {iteration} iterations.')
                break
    
    def predict(self, X):
        # Standardize the test data using the same scaler as training data
        X_scaled = self.scaler_.transform(X)
        return X_scaled @ self.coef_

    def mean_absolute_percentage_error(self, y_true, y_pred):
        """
        Compute the Mean Absolute Percentage Error (MAPE)
        """
        return np.mean(np.abs((y_true - y_pred) / y_true)) * 100

    def score(self, X_test, y_test):
        # Compute predictions and MAPE
        y_pred = self.predict(X_test)
        return self.mean_absolute_percentage_error(y_test, y_pred)

# Create the Ridge Regression model using Gradient Descent
ridge_gd = RidgeGradientDescent(lambda_=1.0, alpha=0.01, max_iter=1000)

# Fit the model to the training data
ridge_gd.fit(X_train, y_train)

# Predict on the test data
y_pred_gd = ridge_gd.predict(X_test)

mape_gd = ridge_gd.score(X_test, y_test)
print(f'MAPE ridge gd: {mape_gd}')
print(f'Coefficients: {ridge_gd.coef_}')

MAPE ridge gd: 108.69660491352622
Coefficients: [ -940.75439835  1062.45985747 -2193.66964844]


In [19]:
from sklearn.neural_network import MLPRegressor

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Define the MLPRegressor model
mlp = MLPRegressor(hidden_layer_sizes=(100, 50),  # Two hidden layers with 100 and 50 neurons
                   activation='relu',  # Activation function
                   solver='adam',  # Optimization method
                   max_iter=500,  # Maximum number of iterations
                   random_state=42)

# Fit the model to the training data
mlp.fit(X_train_scaled, y_train)

# Predict on the train set
y_train_pred = mlp.predict(X_train_scaled)

# Predict on the test set
y_test_pred = mlp.predict(X_test_scaled)

# Evaluate the model using MAPE on both train and test sets
mape_train = mean_absolute_percentage_error(y_train, y_train_pred) * 100
mape_test = mean_absolute_percentage_error(y_test, y_test_pred) * 100

print(f'Train set MAPE: {mape_train:.2f}%')
print(f'Test set MAPE: {mape_test:.2f}%')

Train set MAPE: 9.61%
Test set MAPE: 9.52%




##### mape of 8% for random forest vs 9.5% for mlp