In [338]:
import numpy as np
import pandas as pd


In [341]:
data = pd.read_csv("EPL_Soccer_MLR_LR.csv")
data.head()

Unnamed: 0,PlayerName,Club,DistanceCovered(InKms),Goals,MinutestoGoalRatio,ShotsPerGame,AgentCharges,BMI,Cost,PreviousClubCost,Height,Weight,Score
0,"Braund, Mr. Owen Harris",MUN,3.96,7.5,37.5,12.3,60.0,20.56,109.1,63.32,195.9,78.9,19.75
1,"Allen, Mr. William Henry",MUN,4.41,8.3,38.2,12.7,68.0,20.67,102.8,58.55,189.7,74.4,21.3
2,"Moran, Mr. James",MUN,4.14,5.0,36.4,11.6,21.0,21.86,104.6,55.36,177.8,69.1,19.88
3,"McCarthy, Mr. Timothy J",MUN,4.11,5.3,37.3,12.6,69.0,21.88,126.4,57.18,185.0,74.9,23.66
4,"Palsson, Master. Gosta Leonard",MUN,4.45,6.8,41.5,14.0,29.0,18.96,80.3,53.2,184.6,64.6,17.64


In [343]:
data.shape

(202, 13)

In [340]:
data.isnull().sum()

PlayerName                15
Club                      15
DistanceCovered(InKms)    15
Goals                     15
MinutestoGoalRatio        15
ShotsPerGame              15
AgentCharges              15
BMI                       15
Cost                      15
PreviousClubCost          15
Height                    15
Weight                    15
Score                     15
dtype: int64

In [325]:
# Data pre-processing

enc = OrdinalEncoder()
enc_data = pd.DataFrame(enc.fit_transform(data[['PlayerName','Club']]))
# enc_data

data["PlayerName"]=enc_data[0]
data["Club"]=enc_data[1]

In [326]:
data.tail(15)

Unnamed: 0,PlayerName,Club,DistanceCovered(InKms),Goals,MinutestoGoalRatio,ShotsPerGame,AgentCharges,BMI,Cost,PreviousClubCost,Height,Weight,Score
202,,,,,,,,,,,,,
203,,,,,,,,,,,,,
204,,,,,,,,,,,,,
205,,,,,,,,,,,,,
206,,,,,,,,,,,,,
207,,,,,,,,,,,,,
208,,,,,,,,,,,,,
209,,,,,,,,,,,,,
210,,,,,,,,,,,,,
211,,,,,,,,,,,,,


In [342]:
data=data.drop(index=[202,203,204,205,206,207,208,209,210,211,212,213,214,215,216],axis=0)
data.isnull().sum()

PlayerName                0
Club                      0
DistanceCovered(InKms)    0
Goals                     0
MinutestoGoalRatio        0
ShotsPerGame              0
AgentCharges              0
BMI                       0
Cost                      0
PreviousClubCost          0
Height                    0
Weight                    0
Score                     0
dtype: int64

In [328]:
# Shuffle the data
data = data.sample(frac=1, random_state=42)

In [329]:
# Split the data into train and test sets
train_size = int(0.8 * len(data))
train_data = data[:train_size]
test_data = data[train_size:]

In [330]:
# Linear Regression Model
def linear_regression(X, y):
    # Add a column of ones to X for the bias term
    X = np.concatenate((np.ones((X.shape[0], 1)), X), axis=1)

    # Calculate the optimal weights using the formula: w = (X^T * X)^(-1) * X^T * y
    weights = np.linalg.inv(X.T.dot(X)).dot(X.T).dot(y)

    return weights

In [331]:
# Ridge Regression
def ridge_regression(X, y, alpha):
    # Add a column of ones to X for the bias term
    X = np.concatenate((np.ones((X.shape[0], 1)), X), axis=1)

    # Calculate the optimal weights using the formula: w = (X^T * X + alpha * I)^(-1) * X^T * y
    weights = np.linalg.inv(X.T.dot(X) + alpha * np.eye(X.shape[1])).dot(X.T).dot(y)

    return weights

In [332]:
# Lasso Regression
def lasso_regression(X, y, alpha, max_iterations=1000, tol=1e-4):
    # Add a column of ones to X for the bias term
    X = np.concatenate((np.ones((X.shape[0], 1)), X), axis=1)

    # Initialize weights
    weights = np.zeros(X.shape[1])

    # Perform coordinate descent
    for _ in range(max_iterations):
        old_weights = weights.copy()
        for j in range(X.shape[1]):
            X_j = X[:, j]
            X_j_squared_sum = np.sum(X_j ** 2)
            X_j_dot_residual = X_j.dot(y - X.dot(weights) + weights[j] * X_j)
            if j == 0:
                weights[j] = X_j_dot_residual / X_j_squared_sum
            else:
                weights[j] = np.sign(X_j_dot_residual) / X_j_squared_sum * max(abs(X_j_dot_residual) - alpha / 2, 0)
        if np.linalg.norm(weights - old_weights) < tol:
            break

    return weights

In [333]:
X_train = train_data.iloc[:, 2:-1].values
y_train = train_data.iloc[:, -1].values
X_test = test_data.iloc[:, 2:-1].values
y_test = test_data.iloc[:, -1].values

In [334]:
# Linear Regression
weights_lr = linear_regression(X_train, y_train)

# Ridge Regression
alpha_ridge = 0.01  # Regularization parameter
weights_ridge = ridge_regression(X_train, y_train, alpha_ridge)

# Lasso Regression
alpha_lasso = 0.01  # Regularization parameter
weights_lasso = lasso_regression(X_train, y_train, alpha_lasso)

In [335]:

# Model Validation

# Mean Absolute Error
def mean_absolute_error(y_true, y_pred):
    return np.mean(np.abs(y_true - y_pred))

# R2 squared
def r2_squared(y_true, y_pred):
    ssr = np.sum((y_pred - y_true) ** 2)
    sst = np.sum((y_true - np.mean(y_true)) ** 2)
    return 1 - (ssr / sst)

# Predict using the models
y_pred_lr = np.concatenate((np.ones((X_test.shape[0], 1)), X_test), axis=1).dot(weights_lr)
y_pred_ridge = np.concatenate((np.ones((X_test.shape[0], 1)), X_test), axis=1).dot(weights_ridge)
y_pred_lasso = np.concatenate((np.ones((X_test.shape[0], 1)), X_test), axis=1).dot(weights_lasso)

mae_lr = mean_absolute_error(y_test, y_pred_lr)
mae_ridge = mean_absolute_error(y_test, y_pred_ridge)
mae_lasso = mean_absolute_error(y_test, y_pred_lasso)

print("Mean Absolute Error (Linear Regression):", mae_lr)
print("Mean Absolute Error (Ridge Regression):", mae_ridge)
print("Mean Absolute Error (Lasso Regression):", mae_lasso)

# R2 squared score for each model
r2_lr = r2_squared(y_test, y_pred_lr)
r2_ridge = r2_squared(y_test, y_pred_ridge)
r2_lasso = r2_squared(y_test, y_pred_lasso)

print("R2 squared (Linear Regression):", r2_lr)
print("R2 squared (Ridge Regression):", r2_ridge)
print("R2 squared (Lasso Regression):", r2_lasso)


Mean Absolute Error (Linear Regression): 0.6418064238025551
Mean Absolute Error (Ridge Regression): 0.6315946943996982
Mean Absolute Error (Lasso Regression): 0.9860213782288391
R2 squared (Linear Regression): 0.9835104473351716
R2 squared (Ridge Regression): 0.9840765149441071
R2 squared (Lasso Regression): 0.9614371018986154
