In [None]:
!pip install -q catboost unidecode

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m98.2/98.2 MB[0m [31m6.2 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m235.5/235.5 kB[0m [31m21.2 MB/s[0m eta [36m0:00:00[0m
[?25h

In [None]:
import pandas as pd
from catboost import CatBoostClassifier
from sklearn.model_selection import train_test_split
from sklearn.multioutput import MultiOutputClassifier
from sklearn.metrics import accuracy_score, classification_report
import joblib, unidecode

In [None]:
df = pd.read_csv('football_matches_dataset_v2.csv')

In [None]:
df.columns

Unnamed: 0,home_team,away_team,home_team_score,away_team_score,match_type,home_team_p1,home_team_p2,home_team_p3,home_team_p4,home_team_p5,...,home_team_p8_rating,away_team_p8_rating,home_team_p9_rating,away_team_p9_rating,home_team_p10_rating,away_team_p10_rating,home_team_p11_rating,away_team_p11_rating,home_team_avg_rating,away_team_avg_rating
0,atletico de madrid,inter milan,5.0,5.0,club,nahuel molina,jan oblak,rodrigo de paul,marcos llorente,arthur vermeeren,...,77.0,66.0,70.0,76.0,62.0,66.0,77.0,,76.272727,73.2
1,borussia dortmund,psv eindhoven,2.0,2.0,club,julian brandt,gregor kobel,donyell malen,nico schlotterbeck,ian maatsen,...,79.0,54.0,62.0,,,63.0,84.0,,74.5,68.0
2,fc barcelona,ssc napoli,3.0,3.0,club,gavi,pedri,lamine yamal,frenkie de jong,ronald araujo,...,80.0,69.0,61.0,73.0,,65.0,73.0,70.0,74.0,70.0
3,arsenal fc,fc porto,4.0,4.0,club,bukayo saka,declan rice,martin ødegaard,william saliba,gabriel martinelli,...,81.0,73.0,61.0,,74.0,79.0,,61.0,73.2,75.777778
4,real madrid,rb leipzig,1.0,1.0,club,jude bellingham,vinicius junior,federico valverde,rodrygo,aurelien tchouameni,...,69.0,70.0,89.0,76.0,63.0,60.0,82.0,66.0,75.555556,68.5


In [None]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1178 entries, 0 to 1177
Data columns (total 51 columns):
 #   Column                Non-Null Count  Dtype  
---  ------                --------------  -----  
 0   home_team             1177 non-null   object 
 1   away_team             1177 non-null   object 
 2   home_team_score       1177 non-null   float64
 3   away_team_score       1177 non-null   float64
 4   match_type            1177 non-null   object 
 5   home_team_p1          1141 non-null   object 
 6   home_team_p2          1141 non-null   object 
 7   home_team_p3          1141 non-null   object 
 8   home_team_p4          1141 non-null   object 
 9   home_team_p5          1141 non-null   object 
 10  home_team_p6          1141 non-null   object 
 11  home_team_p7          1141 non-null   object 
 12  home_team_p8          1141 non-null   object 
 13  home_team_p9          1141 non-null   object 
 14  home_team_p10         1141 non-null   object 
 15  home_team_p11        

In [None]:
features = ['home_team_avg_rating', 'away_team_avg_rating']
targets = ['home_team_score', 'away_team_score']

df[targets] = df[targets].astype(int)

X = df[features]
y = df[targets]
# => maximum number of goals in the dataset
max_goals = max(df['home_team_score'].max(), df['away_team_score'].max())

IntCastingNaNError: Cannot convert non-finite values (NA or inf) to integer

## **1.4 Model Training Stage**

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
base_model = CatBoostClassifier(iterations=1000, learning_rate=0.1, depth=6, verbose=False)
model = MultiOutputClassifier(base_model, n_jobs=-1)
model.fit(X_train, y_train)

In [None]:
joblib.dump(model, 'model/multi_target_goals_model.pkl')

## **1.5 Model Predictions**

In [None]:
pred_model = joblib.load('model/multi_target_goals_model.pkl')

In [None]:
y_pred = pred_model.predict(X_test)

## **1.6 Evaluation**

In [None]:
home_accuracy = accuracy_score(y_test.iloc[:, 0], y_pred[:, 0])
print(f'Home Team Goals Accuracy: {home_accuracy:.2f}')
print('Home Team Goals Classification Report:')
print(classification_report(y_test.iloc[:, 0], y_pred[:, 0]))

away_accuracy = accuracy_score(y_test.iloc[:, 1], y_pred[:, 1])
print(f'Away Team Goals Accuracy: {away_accuracy:.2f}')
print('Away Team Goals Classification Report:')
print(classification_report(y_test.iloc[:, 1], y_pred[:, 1]))

In [None]:
combined_accuracy = (home_accuracy + away_accuracy) / 2
print(f'Combined Accuracy: {combined_accuracy:.2f}')