# Ridge Regression

### Libraries Imports 

In [20]:
import numpy as np 
from Preprocess import load_and_preprocess_ozone
from sklearn.metrics import mean_squared_error, r2_score

### Ridge Regression Class

In [21]:
class RidgeRegression: 
    def __init__(self, penality): 
        self.penality = penality 

    def fit(self, X, y):
        samples_nb = X.shape[0]
        X_with_intercept = np.c_[np.ones((samples_nb, 1)), X]

        identity_matrix = np.identity(X_with_intercept.shape[1]) 
        identity_matrix[0, 0] = 0  # Do not regularise the intercept

        # Calculation of the matrices A and B for Ridge's solution
        A = X_with_intercept.T.dot(X_with_intercept) + self.penality * identity_matrix
        B = X_with_intercept.T.dot(y)
        
        # Display for debugging
        print('A matrix:\n', A)
        print('B vector:\n', B)

        self.thetas = np.linalg.solve(A, B)  # Solve for the parameters
        return self

    def predict(self, X): 
        samples_nb = X.shape[0]
        X_with_intercept = np.c_[np.ones((samples_nb, 1)), X]
        predictions = X_with_intercept.dot(self.thetas)  # Calculate predictions
        return predictions  


### Run and Test the Model

In [22]:
def runAndTestRidgeRegression(): 

    # Load the pre-processed data 
    ozone_data = load_and_preprocess_ozone()

    # Split the dataset into training and testing set (80% | 20%)
    n = int(0.8 * ozone_data.shape[0])  
    train = ozone_data.iloc[:n]
    test = ozone_data.iloc[n:]

    # Separation of features and target
    X_train = train.drop('maxO3', axis=1).copy()
    y_train = train['maxO3']
    X_test = test.drop('maxO3', axis=1).copy()
    y_test = test['maxO3']

    # Creation and fitting of the Ridge regression model
    model = RidgeRegression(penality=1.0)
    model.fit(X_train.values, y_train.values)

    # Predictions on test data
    predictions = model.predict(X_test.values)
    print('Predictions:', predictions)
    print('True values:', y_test.values) 


In [23]:
# Execution of the function to test the model
runAndTestRidgeRegression()

A matrix:
 [[1108.          -33.42619372  -29.86808102  -33.03345812  -35.10865473
   -25.71411859  -20.32004406    7.62977659   12.45823083    4.95345032
     6.91500477  -15.62813991   -8.25006219  -21.16908129    3.09051798
   -38.45683042    4.17098178  -12.47998843   -3.09714594  -46.32157692
    -5.87459005   13.91871694]
 [ -33.42619372 1148.41281335  957.64865717  827.16052627  783.01481782
   796.87912047  169.92063888   56.39156758  -46.8656516   -81.85736034
   -38.65650715   56.10324075   96.76682744  102.67354304  -51.30156829
    92.13826248  -83.66640991   83.96585761  -35.84941509   49.52776089
    -3.62804977 -111.73863596]
 [ -29.86808102  957.64865717 1168.79226213 1093.64359452 1037.7530118
  1027.87276653 -293.90508182 -412.96730991 -409.90272615 -360.85840966
  -265.87325556   -1.73123829 -241.13346036  -52.90731663 -213.4997299
   -77.32053605 -178.12934221  -59.80437065 -126.88006948 -100.66508852
   -31.81108638  135.373742  ]
 [ -33.03345812  827.16052627 1093

### Ridge Regression Performance Evaluation

In [24]:
# Metrics calculation
mse = mean_squared_error(y_test.values, predictions)
rmse = np.sqrt(mse)
r2 = r2_score(y_test.values, predictions)

print(f'Mean Squared Error (MSE): {mse}')
print(f'Root Mean Squared Error (RMSE): {rmse}')
print(f'R2 Score (R²): {r2}')

NameError: name 'y_test' is not defined