In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import RandomForestClassifier
import joblib
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score
import warnings
file_path = "Football_data_Set.xlsx"
excel_data = pd.ExcelFile(file_path)
matches_df = excel_data.parse('Matches')
matches_df.columns = matches_df.columns.str.strip()

label_encoders = {}
for col in ['Team_1', 'Team_2', 'Place']:
    le = LabelEncoder()
    matches_df[col] = le.fit_transform(matches_df[col])
    label_encoders[col] = le
    

In [2]:
X = matches_df[['Team_1', 'Team_2', 'Season', 'Place']]
y = matches_df['Result']

In [3]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [4]:
model = RandomForestClassifier(random_state=42)
model.fit(X_train, y_train)
y_pred = model.predict(X_test)


In [5]:
cm = confusion_matrix(y_test, y_pred)
print("Confusion Matrix:")
print(cm)

Confusion Matrix:
[[6 1]
 [1 4]]


In [6]:
print("\nClassification Report:")
print(classification_report(y_test, y_pred))

joblib.dump(model, 'football_match_winner_model.pkl')
joblib.dump(label_encoders, 'label_encoders.pkl')


Classification Report:
              precision    recall  f1-score   support

           1       0.86      0.86      0.86         7
           2       0.80      0.80      0.80         5

    accuracy                           0.83        12
   macro avg       0.83      0.83      0.83        12
weighted avg       0.83      0.83      0.83        12



['label_encoders.pkl']

In [7]:
print("Target variable distribution:")
print(y.value_counts())

Target variable distribution:
2    27
1    27
0     4
Name: Result, dtype: int64


In [8]:
accuracy = accuracy_score(y_test, y_pred)
print(f"Random Forest Model Accuracy: {accuracy:.2f}")
train_acc = model.score(X_train, y_train)
test_acc = model.score(X_test, y_test)
print(f"Training Accuracy: {train_acc:.2f}")
print(f"Test Accuracy: {test_acc:.2f}")

Random Forest Model Accuracy: 0.83
Training Accuracy: 0.87
Test Accuracy: 0.83


In [15]:
model = joblib.load("football_match_winner_model.pkl")
label_encoders = joblib.load("label_encoders.pkl")

In [26]:
input_data = {
    'Team_1': ['Arsenal'],
    'Team_2': ['Mancity'],
    'Season': [2021],
    'Place': ['Away']
}

In [27]:
df_input = pd.DataFrame(input_data)

In [28]:
for col in ['Team_1', 'Team_2', 'Place']:
    df_input[col] = label_encoders[col].transform(df_input[col])

In [29]:
predicted_class = model.predict(df_input)[0]
predicted_proba = model.predict_proba(df_input)[0]

In [30]:
if predicted_class == 0:
    result = "Draw"
elif predicted_class == 1:
    result = "Team 1 Wins"
else:
    result = "Team 2 Wins"

print(f"Prediction: {result}")
print(f"Probability (Draw): {predicted_proba[0]*100:.2f}%")
print(f"Probability (Team 1 Wins): {predicted_proba[1]*100:.2f}%")
print(f"Probability (Team 2 Wins): {predicted_proba[2]*100:.2f}%")

Prediction: Team 2 Wins
Probability (Draw): 0.00%
Probability (Team 1 Wins): 1.00%
Probability (Team 2 Wins): 99.00%
