In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.compose import ColumnTransformer
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error, r2_score, mean_squared_error


df = pd.read_excel("project1_aiml (1).xlsx")


goal_weights = {
    "FW": 1.0,
    "MF": 1.2,
    "DF": 1.5,
    "GK": 2.0
}

def map_position(pos):
    if "FW" in pos: return "FW"
    if "MF" in pos: return "MF"
    if "DF" in pos: return "DF"
    if "GK" in pos: return "GK"
    return "Unknown"

df.dropna(subset=["Rating"], inplace=True)
df["Pos"] = df["Pos"].astype(str).apply(map_position)
df["Gls"] = pd.to_numeric(df["Gls"], errors='coerce').fillna(0)
df["Ast"] = pd.to_numeric(df["Ast"], errors='coerce').fillna(0)

df["Adjusted Goals"] = df.apply(lambda row: row["Gls"] * goal_weights.get(row["Pos"], 1.0), axis=1)

target = "Rating"
categorical_features = ["Pos", "Team", "Nation"]
numerical_features = ["Adjusted Goals", "Ast", "Min", "xG", "xAG", "PrgP"]
X = df[categorical_features + numerical_features]
y = df[target]


X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


preprocessor = ColumnTransformer([
    ("cat", OneHotEncoder(handle_unknown='ignore'), categorical_features),
    ("num", StandardScaler(), numerical_features)
])

X_train = preprocessor.fit_transform(X_train)
X_test = preprocessor.transform(X_test)


rf_model = RandomForestRegressor(n_estimators=100, random_state=42)
rf_model.fit(X_train, y_train)


y_pred = rf_model.predict(X_test)


mae = mean_absolute_error(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse)
r2 = r2_score(y_test, y_pred)
print(f"Mean Absolute Error: {mae}")
print(f"Mean Squared Error: {mse}")
print(f"Root Mean Squared Error: {rmse}")
print(f"R-squared Score: {r2}")







Mean Absolute Error: 0.10853288590604042
Mean Squared Error: 0.021048235167785274
Root Mean Squared Error: 0.14508009914452524
R-squared Score: 0.658695250266993


In [None]:
new_players = pd.DataFrame({
    "Pos": ["FW", "MF", "DF"],
    "Team": ["Team A", "Team B", "Team C"],
    "Nation": ["Country X", "Country Y", "Country Z"],
    "Gls": [5, 2, 1],
    "Ast": [3, 6, 2],
    "Min": [1200, 1800, 2100],
    "xG": [3.5, 2.2, 1.1],
    "xAG": [2.0, 3.8, 1.5],
    "PrgP": [45, 38, 25]
})




new_players["Adjusted Goals"] = new_players["Gls"] * new_players["Pos"].map(goal_weights).fillna(1.0)


new_X = new_players[["Pos", "Team", "Nation", "Adjusted Goals", "Ast", "Min", "xG", "xAG", "PrgP"]]


new_X_transformed = preprocessor.transform(new_X)


new_ratings = rf_model.predict(new_X_transformed)
new_players["Predicted Rating"] = new_ratings

print(new_players[["Pos", "Team", "Nation", "Predicted Rating"]])


  Pos    Team     Nation  Predicted Rating
0  FW  Team A  Country X            6.8609
1  MF  Team B  Country Y            6.9258
2  DF  Team C  Country Z            6.8044


In [None]:
comparison_df = pd.DataFrame({
    "Actual Rating": y_test.values,
    "Predicted Rating": y_pred
})

print(comparison_df.head(10))


   Actual Rating  Predicted Rating
0           6.94            6.8314
1           7.07            7.1154
2           6.78            6.7843
3           6.84            6.9485
4           6.77            6.8799
5           6.95            6.7996
6           7.00            6.8192
7           6.89            6.8956
8           7.24            7.0659
9           6.71            6.8275


In [7]:
#svr

import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.compose import ColumnTransformer
from sklearn.svm import SVR
from sklearn.metrics import mean_absolute_error, r2_score, mean_squared_error

df = pd.read_excel("project1_aiml.xlsx")

goal_weights = {
    "FW": 1.0,
    "MF": 1.2,
    "DF": 1.5,
    "GK": 2.0
}

def map_position(pos):
    if "FW" in pos: return "FW"
    if "MF" in pos: return "MF"
    if "DF" in pos: return "DF"
    if "GK" in pos: return "GK"
    return "Unknown"

df.dropna(subset=["Rating"], inplace=True)
df["Pos"] = df["Pos"].astype(str).apply(map_position)
df["Gls"] = pd.to_numeric(df["Gls"], errors='coerce').fillna(0)
df["Ast"] = pd.to_numeric(df["Ast"], errors='coerce').fillna(0)

df["Adjusted Goals"] = df.apply(lambda row: row["Gls"] * goal_weights.get(row["Pos"], 1.0), axis=1)

target = "Rating"
categorical_features = ["Pos", "Team", "Nation"]
numerical_features = ["Adjusted Goals", "Ast", "Min", "xG", "xAG", "PrgP"]
X = df[categorical_features + numerical_features]
y = df[target]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

preprocessor = ColumnTransformer([
    ("cat", OneHotEncoder(handle_unknown='ignore'), categorical_features),
    ("num", StandardScaler(), numerical_features)
])

X_train = preprocessor.fit_transform(X_train)
X_test = preprocessor.transform(X_test)

svr_model = SVR(kernel="rbf")
svr_model.fit(X_train, y_train)

y_pred = svr_model.predict(X_test)

mae = mean_absolute_error(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse)
r2 = r2_score(y_test, y_pred)
print(f"Mean Absolute Error: {mae}")
print(f"Mean Squared Error: {mse}")
print(f"Root Mean Squared Error: {rmse}")
print(f"R-squared Score: {r2}")


Mean Absolute Error: 0.10356163284552483
Mean Squared Error: 0.018657868831104294
Root Mean Squared Error: 0.13659380963683637
R-squared Score: 0.6951698357900291


In [8]:
#XGB Regressor

import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.compose import ColumnTransformer
from xgboost import XGBRegressor
from sklearn.metrics import mean_absolute_error, r2_score, mean_squared_error

df = pd.read_excel("project1_aiml.xlsx")

goal_weights = {
    "FW": 1.0,
    "MF": 1.2,
    "DF": 1.5,
    "GK": 2.0
}

def map_position(pos):
    if "FW" in pos: return "FW"
    if "MF" in pos: return "MF"
    if "DF" in pos: return "DF"
    if "GK" in pos: return "GK"
    return "Unknown"

df.dropna(subset=["Rating"], inplace=True)
df["Pos"] = df["Pos"].astype(str).apply(map_position)
df["Gls"] = pd.to_numeric(df["Gls"], errors='coerce').fillna(0)
df["Ast"] = pd.to_numeric(df["Ast"], errors='coerce').fillna(0)

df["Adjusted Goals"] = df.apply(lambda row: row["Gls"] * goal_weights.get(row["Pos"], 1.0), axis=1)

target = "Rating"
categorical_features = ["Pos", "Team", "Nation"]
numerical_features = ["Adjusted Goals", "Ast", "Min", "xG", "xAG", "PrgP"]
X = df[categorical_features + numerical_features]
y = df[target]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

preprocessor = ColumnTransformer([
    ("cat", OneHotEncoder(handle_unknown='ignore'), categorical_features),
    ("num", StandardScaler(), numerical_features)
])

X_train = preprocessor.fit_transform(X_train)
X_test = preprocessor.transform(X_test)

xgb_model = XGBRegressor(n_estimators=100, random_state=42)
xgb_model.fit(X_train, y_train)

y_pred = xgb_model.predict(X_test)

mae = mean_absolute_error(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse)
r2 = r2_score(y_test, y_pred)
print(f"Mean Absolute Error: {mae}")
print(f"Mean Squared Error: {mse}")
print(f"Root Mean Squared Error: {rmse}")
print(f"R-squared Score: {r2}")


Mean Absolute Error: 0.10773341351707513
Mean Squared Error: 0.01952195829137909
Root Mean Squared Error: 0.13972100161170864
R-squared Score: 0.6810524392935675
