# Ridge Regression with L2 Regularization
This notebook demonstrates how to use Ridge Regression with L2 regularization on a dataset.

In [10]:

import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import Ridge
from sklearn.metrics import mean_squared_error, r2_score
import numpy as np
from sklearn.model_selection import GridSearchCV
from sklearn.linear_model import Ridge


In [3]:
# Read dataset
file_path = 'https://drive.google.com/uc?id=1TL0eEUAK_ufGVksQwMtJe2z19r69BQnV'
data = pd.read_csv(file_path)
data.head()

Unnamed: 0,player,forehand_winner_per,backhand_winner_per,forehand_unforced_error_per,backhand_unforced_error_per,net_per,net_point_direct_win_per,net_point_winning_per,net_point_error_per,passing_per,...,pts_won_Ite_3_shots_per,shots_in_pts_won_per,shots_in_pts_lost_per,shots_in_won_vs_lost_ratio,inside_in_per,inside_out_per,Counter Puncher,Attacking Baseliner,All-Court Player,Solid Baseliner
0,Aaron Krickstein,4.99,3.69,7.72,8.9,9.03,31.96,5.21,42.65,12.25,...,25.73,43.51,56.49,0.78,2.1,8.72,0.0003703907,99.995334,0.004289121,6e-06
1,Adam Pavlasek,10.28,3.74,7.48,6.54,29.91,33.33,19.63,50.0,25.0,...,30.82,60.0,40.0,1.5,2.15,12.37,3.403358e-07,8.876978,3.998631e-06,91.123018
2,Adrian Mannarino,4.95,3.48,9.87,7.11,11.77,46.86,7.72,53.49,11.81,...,34.81,52.36,47.64,1.16,0.48,5.45,9.043304e-05,99.905192,0.09470405,1.3e-05
3,Adriano Panatta,8.19,5.17,9.05,7.76,46.98,34.0,21.55,50.46,6.0,...,24.89,40.0,60.0,0.67,0.0,6.03,1.001937e-09,0.00012,8.597095e-11,99.99988
4,Agustin Calleri,10.8,6.82,11.36,15.91,27.84,60.71,15.91,53.06,0.0,...,21.24,38.64,61.36,0.63,1.23,11.55,0.0001299238,99.569118,0.3369169,0.093835


In [6]:
# Define features (X) and target variables (y)
X = data.drop(columns=['player', 'Counter Puncher', 'Attacking Baseliner', 'All-Court Player', 'Solid Baseliner'])
y = data[['Counter Puncher', 'Attacking Baseliner', 'All-Court Player', 'Solid Baseliner']]

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [11]:
# Define parameter grid for Ridge
param_grid = {'alpha': [0.1, 1.0, 10.0, 100.0]}

# Set up Ridge with GridSearchCV
ridge_cv = GridSearchCV(Ridge(), param_grid, cv=5, scoring='neg_mean_squared_error')
ridge_cv.fit(X, y)

# Best alpha
print(f"Best alpha: {ridge_cv.best_params_['alpha']}")

# Best model evaluation
best_ridge = ridge_cv.best_estimator_
y_pred = best_ridge.predict(X_test)
rmse = np.sqrt(mean_squared_error(y_test, y_pred))
print(f"Root Mean Squared Error (with CV): {rmse}")

Best alpha: 0.1
Root Mean Squared Error (with CV): 16.73261325476393


In [12]:

# Ridge Regression with L2 regularization
ridge = Ridge(alpha=ridge_cv.best_params_['alpha'])
ridge.fit(X_train, y_train)

# Predictions
y_pred = ridge.predict(X_test)

# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
rmse = np.sqrt(mse)

print(f"Root Mean Squared Error: {rmse}")
print(f"R-squared: {r2}")


Root Mean Squared Error: 18.165373132761378
R-squared: 0.4319786501195456
