In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import joblib
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, accuracy_score

df = pd.read_csv(r'C:\Krishna\project ML\data\Data.csv')

def match_result(row):
    if row['Home Team Goals'] > row['Away Team Goals']:
        return 'Home Win'
    elif row['Home Team Goals'] < row['Away Team Goals']:
        return 'Away Win'
    else:
        return 'Draw'

df['Result'] = df.apply(match_result, axis=1)

drop_cols = ['Datetime', 'Win conditions', 'Referee', 'Assistant 1', 'Assistant 2']
df = df.drop(columns=drop_cols)

df = df.dropna(subset=['Home Team Goals', 'Away Team Goals'])

X = df.drop(columns=['Result'])
y = df['Result']

num_cols = X.select_dtypes(include=['int64','float64']).columns
cat_cols = X.select_dtypes(include=['object']).columns

num_pipeline = Pipeline([
    ('imputer', SimpleImputer(strategy='median')),
    ('scaler', StandardScaler())
])

cat_pipeline = Pipeline([
    ('imputer', SimpleImputer(strategy='most_frequent')),
    ('ohe', OneHotEncoder(handle_unknown='ignore'))
])

preprocessor = ColumnTransformer([
    ('num', num_pipeline, num_cols),
    ('cat', cat_pipeline, cat_cols)
])

model = Pipeline([
    ('pre', preprocessor),
    ('clf', RandomForestClassifier(n_estimators=200, random_state=42))
])

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

model.fit(X_train, y_train)

y_pred = model.predict(X_test)
print("Accuracy:", accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred))

joblib.dump(model, 'model.pkl')
print("Model saved as model.pkl")


Accuracy: 0.8596491228070176
              precision    recall  f1-score   support

    Away Win       0.94      0.89      0.91        35
        Draw       0.83      0.50      0.62        38
    Home Win       0.84      0.99      0.91        98

    accuracy                           0.86       171
   macro avg       0.87      0.79      0.82       171
weighted avg       0.86      0.86      0.85       171

Model saved as model.pkl
