In [3]:
import pandas as pd
import numpy as np
from sklearn.model_selection import cross_val_score, KFold
from sklearn.linear_model import BayesianRidge
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.impute import SimpleImputer
from sklearn.feature_selection import SelectKBest, f_regression

file_path = "MinMaxData1.xlsx"
df = pd.read_excel(file_path, engine="openpyxl")

Q1 = df["fee_cleaned"].quantile(0.25)
Q3 = df["fee_cleaned"].quantile(0.75)
IQR = Q3 - Q1
df = df[~((df["fee_cleaned"] < (Q1 - 1.5 * IQR)) | (df["fee_cleaned"] > (Q3 + 1.5 * IQR)))]

df.dropna(subset=["fee_cleaned"], inplace=True)

features = df[
    [
        "matches_played", "goals", "assists", "yellow_card", "90s_played",
        "minutes", "xGoals", "xAssists", "passes", "pass_complete%",
        "tackle", "block", "touches", "carries", "goals_against", "goals_against90",
        "Saves", "Saves%", "PSxG", "age"
    ]
]

target = df["fee_cleaned"]

imputer = SimpleImputer(strategy="mean")
features_imputed = imputer.fit_transform(features)

scaler = StandardScaler()
features_scaled = scaler.fit_transform(features_imputed)

selector = SelectKBest(score_func=f_regression, k=10)
features_selected = selector.fit_transform(features_scaled, target)

kfold = KFold(n_splits=5, shuffle=True, random_state=42)

bayesian_ridge_model = BayesianRidge()
mse_scores_bayesian_ridge = -cross_val_score(bayesian_ridge_model, features_selected, target, cv=kfold, scoring='neg_mean_squared_error')
r_squared_scores_bayesian_ridge = cross_val_score(bayesian_ridge_model, features_selected, target, cv=kfold, scoring='r2')

mean_mse_bayesian_ridge, std_mse_bayesian_ridge = np.mean(mse_scores_bayesian_ridge), np.std(mse_scores_bayesian_ridge)
mean_r_squared_bayesian_ridge, std_r_squared_bayesian_ridge = np.mean(r_squared_scores_bayesian_ridge), np.std(r_squared_scores_bayesian_ridge)

print("Bayesian Ridge Regression:")
print("Mean Squared Error (Cross-Validated):", mean_mse_bayesian_ridge)
print("MSE Standard Deviation:", std_mse_bayesian_ridge)
print("R-squared (Cross-Validated):", mean_r_squared_bayesian_ridge)
print("R-squared Standard Deviation:", std_r_squared_bayesian_ridge)


Bayesian Ridge Regression:
Mean Squared Error (Cross-Validated): 93.80694562001456
MSE Standard Deviation: 11.5117052314532
R-squared (Cross-Validated): 0.10480778155801806
R-squared Standard Deviation: 0.07146809667867343
