In [1]:
import os
import sys
import warnings
import pandas as pd
from pathlib import Path

In [11]:
from split_start import *
from split_block import *
from prepare_dataset import *
from sklearn.tree import DecisionTreeClassifier
from multi_imbalance.ensemble.mrbbagging import MRBBagging
from sklearn import metrics

In [3]:
working_dir = Path(os.getcwd()).parent
sys.path.append(os.path.join(working_dir, 'Preprocessing\\'))

from data_aggregator import *
from season import *
from parameters import *
from team import *

## Przygotowanie danych - pobranie przetworzonych danych do pamięci podręcznej

In [4]:
no_last_matches = 3

data_aggregator = DataAggregator()

all_seasons, all_data_past =\
                    data_aggregator.get_data_for_seasons([Season.y2010, Season.y2011,
                                                         Season.y2012, Season.y2013,
                                                         Season.y2014, Season.y2015, Season.y2016], 
                                                         Parameters(no_last_matches=no_last_matches))

## Przygotowanie podstawowych wartości

In [5]:
all_seasons['match_date'] = pd.to_datetime(all_seasons['match_date'])

In [6]:
list_of_parameters = ['home_team_score',
                        'away_team_score', 
                        'home_team_seasons_played',
                        'away_team_seasons_played', 
                        'home_team_last_season_points',
                        'away_team_last_season_points', 
                        'home_players_avg_age',
                        'away_players_avg_age', 
                        'home_players_avg_rating',
                        'away_players_avg_rating', 
                        'home_elo_rating', 
                        'away_elo_rating',
                        'avg_home_win_odds', 
                        'avg_draw_odds', 
                        'avg_away_win_odds',
                        'home_avg_corners', 
                        'away_avg_corners', 
                        'home_avg_shots',
                        'away_avg_shots', 
                        'home_won_games', 
                        'away_won_games', 
                        'home_tied_games',
                        'away_tied_games', 
                        'home_lost_games', 
                        'away_lost_games',
                        'home_scored_goals', 
                        'away_scored_goals'
                     ]

In [7]:
dataset = prepare_dataset(all_seasons, list_of_parameters, all_data_past, add_direct = True, avg = 3, train_size = 0.9,
                          test_size = 0.1, undersample = False, globalCS = False)

In [8]:
Counter(dataset['y_train'])

Counter({1: 923, 0: 522, 2: 597})

## Algorytm Multi-class Roughly Balanced Bagging

In [9]:
Xtrain = dataset['X_train']
Ytrain = dataset['y_train']

Xtest = dataset['X_test']
Ytest = dataset['y_test']

In [49]:
mrbag = MRBBagging(70, DecisionTreeClassifier(criterion = 'entropy', max_depth = 20))

In [50]:
mrbag.fit(Xtrain.to_numpy(), Ytrain.to_numpy())

MRBBagging(k=70,
           learning_algorithm=DecisionTreeClassifier(criterion='entropy',
                                                     max_depth=20))

In [51]:
Ypred = mrbag.predict(Xtest.to_numpy())

In [52]:
print('\n##################################\nAccuracy: ')

print(metrics.accuracy_score(Ytest, Ypred))

print('\n##################################\nPrecision, Recall and fscore:: ')

print(metrics.precision_recall_fscore_support(Ytest, Ypred, average='macro'))


##################################
Accuracy: 
0.4933920704845815

##################################
Precision, Recall and fscore:: 
(0.4784071729957806, 0.4758846807953951, 0.473063973063973, None)


In [53]:
print("Poprawnie sklasyfikowane przykłady:")
sorted(Counter(Ytest[Ypred == Ytest]).items(), key = lambda el : el[0])

Poprawnie sklasyfikowane przykłady:


[(0, 21), (1, 58), (2, 33)]

In [54]:
print("Błędnie sklasyfikowane przykłady:")
sorted(Counter(Ytest[Ypred != Ytest]).items(), key = lambda el : el[0])

Błędnie sklasyfikowane przykłady:


[(0, 43), (1, 40), (2, 32)]

In [55]:
confusion = metrics.confusion_matrix(Ytest, Ypred)
print("Macierz pomyłek:")
print(confusion)

Macierz pomyłek:
[[21 20 23]
 [17 58 23]
 [10 22 33]]


## Nauczenie modelu na całym dostępnym zbiorze danych i jego zapis w celu wykorzystania w interaktywnym notebooku

In [58]:
import pickle

finalModel = mrbag
dataset_final = prepare_dataset(all_seasons, list_of_parameters, all_data_past, add_direct = True, avg = 3, train_size = 1.0,
                          test_size = 0.0, undersample = False, globalCS = False)

finalModel.fit(dataset_final['X_train'].to_numpy(), dataset_final['y_train'].to_numpy())

filename = 'mrbbag.pkl'
pickle.dump(finalModel, open(filename, "wb"))