In [43]:
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report

import joblib
import warnings
warnings.filterwarnings("ignore")


In [44]:
matches = pd.read_csv("/content/datasets/matches_1930_2022.csv")
ranking = pd.read_csv("/content/datasets/fifa_ranking_2022-10-06.csv")


In [45]:
matches = matches[[
    "home_team",
    "away_team",
    "home_score",
    "away_score"
]]

matches.dropna(inplace=True)


In [46]:
def get_result(row):
    if row["home_score"] > row["away_score"]:
        return "HomeWin"
    elif row["home_score"] < row["away_score"]:
        return "AwayWin"
    else:
        return "Draw"

matches["result"] = matches.apply(get_result, axis=1)


In [47]:
ranking = ranking[["team", "rank", "points"]]


In [48]:
matches = matches.merge(
    ranking,
    how="left",
    left_on="home_team",
    right_on="team"
)

matches.rename(columns={
    "rank": "home_rank",
    "points": "home_points"
}, inplace=True)

matches.drop("team", axis=1, inplace=True)




In [49]:
matches = matches.merge(
    ranking,
    how="left",
    left_on="away_team",
    right_on="team"
)

matches.rename(columns={
    "rank": "away_rank",
    "points": "away_points"
}, inplace=True)

matches.drop("team", axis=1, inplace=True)


In [50]:
matches.dropna(inplace=True)


In [51]:
matches["rank_diff"] = matches["home_rank"] - matches["away_rank"]
matches["points_diff"] = matches["home_points"] - matches["away_points"]



In [52]:
le_team = LabelEncoder()

all_teams = pd.concat([matches["home_team"], matches["away_team"]])
le_team.fit(all_teams)

matches["home_encoded"] = le_team.transform(matches["home_team"])
matches["away_encoded"] = le_team.transform(matches["away_team"])



In [53]:
le_result = LabelEncoder()
matches["result_encoded"] = le_result.fit_transform(matches["result"])


In [54]:
X = matches[[
    "home_encoded",
    "away_encoded",
    "rank_diff",
    "points_diff"
]]

y = matches["result_encoded"]



In [55]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y,
    test_size=0.2,
    random_state=42
)



In [56]:
model = RandomForestClassifier(
    n_estimators=300,
    max_depth=12,
    random_state=42
)

model.fit(X_train, y_train)


In [57]:
y_pred = model.predict(X_test)

print("Accuracy:", accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred))


Accuracy: 0.5096774193548387
              precision    recall  f1-score   support

           0       0.45      0.23      0.31        39
           1       0.21      0.08      0.12        38
           2       0.55      0.86      0.67        78

    accuracy                           0.51       155
   macro avg       0.41      0.39      0.36       155
weighted avg       0.44      0.51      0.44       155



In [58]:
joblib.dump(model, "fifa_match_model.pkl")
joblib.dump(le_team, "team_encoder.pkl")
joblib.dump(le_result, "result_encoder.pkl")


['result_encoder.pkl']

In [59]:
def predict_match(home_team, away_team):

    if home_team not in ranking["team"].values or \
       away_team not in ranking["team"].values:
        return "One or both teams not found in ranking dataset"

    home_rank = ranking.loc[ranking["team"] == home_team, "rank"].values[0]
    away_rank = ranking.loc[ranking["team"] == away_team, "rank"].values[0]

    home_points = ranking.loc[ranking["team"] == home_team, "points"].values[0]
    away_points = ranking.loc[ranking["team"] == away_team, "points"].values[0]

    rank_diff = home_rank - away_rank
    points_diff = home_points - away_points

    home_enc = le_team.transform([home_team])[0]
    away_enc = le_team.transform([away_team])[0]

    features = [[home_enc, away_enc, rank_diff, points_diff]]

    prediction = model.predict(features)
    probabilities = model.predict_proba(features)[0]

    result = le_result.inverse_transform(prediction)[0]

    return {
        "Predicted Result": result,
        "Probabilities": probabilities
    }
