In [None]:
import pandas as pd
import numpy as np
import joblib
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, roc_auc_score
from sklearn.preprocessing import StandardScaler

In [None]:
df = pd.read_csv('/content/team_features.csv')
df.head()

Unnamed: 0,date,team,opponent,team_score,opponent_score,win,neutral,avg_goals_last_5,win_rate_last_5,avg_goals_conceded_last_5,goal_diff,avg_goal_diff_last_5,days_since_last_match
0,2010-01-02,Iran,North Korea,1.0,0.0,1,True,,,,1.0,,
1,2010-01-02,Mali,Qatar,0.0,0.0,0,False,,,,0.0,,
2,2010-01-02,North Korea,Iran,0.0,1.0,0,True,,,,-1.0,,
3,2010-01-02,Qatar,Mali,0.0,0.0,0,False,,,,0.0,,
4,2010-01-02,Syria,Zimbabwe,6.0,0.0,1,True,,,,6.0,,


In [None]:
df.isna().sum()

Unnamed: 0,0
date,0
team,0
opponent,0
team_score,0
opponent_score,0
win,0
neutral,0
avg_goals_last_5,309
win_rate_last_5,309
avg_goals_conceded_last_5,309


In [None]:
FEATURES = ["avg_goals_last_5", "win_rate_last_5", "neutral", "avg_goals_conceded_last_5",
            "avg_goal_diff_last_5"]

TARGET = "win"
df_model = df.dropna(subset=FEATURES + ["win"]).copy()

In [None]:
scaler = StandardScaler()
df_model[FEATURES] = scaler.fit_transform(df_model[FEATURES])

In [None]:
train_df = df_model[df_model["date"] < "2019-01-01"]
test_df  = df_model[df_model["date"] >= "2019-01-01"]

X_train = train_df[FEATURES]
y_train = train_df[TARGET]

X_test = test_df[FEATURES]
y_test = test_df[TARGET]

In [None]:
model = LogisticRegression(max_iter=1000)
model.fit(X_train, y_train)

y_pred = model.predict(X_test)
y_proba = model.predict_proba(X_test)[:, 1]

print("Accuracy:", accuracy_score(y_test, y_pred))
print("ROC AUC:", roc_auc_score(y_test, y_proba))

Accuracy: 0.6233005333133028
ROC AUC: 0.6213775419608948


In [None]:
coef_df = pd.DataFrame({
    "feature": FEATURES,
    "coefficient": model.coef_[0]
}).sort_values(by="coefficient", ascending=False)

coef_df

Unnamed: 0,feature,coefficient
4,avg_goal_diff_last_5,0.14374
0,avg_goals_last_5,0.093878
1,win_rate_last_5,0.028106
2,neutral,-0.020492
3,avg_goals_conceded_last_5,-0.136348


In [None]:
joblib.dump(model, 'soccer_model.pkl')
joblib.dump(scaler, 'soccer_scaler.pkl')
print("Model and Scaler saved as .pkl files")

Model and Scaler saved as .pkl files
