# Initialization

In [None]:
%load_ext autoreload
%autoreload 2
import sys, warnings, time, numpy, yaml, pandas, logging, os, random
sys.path.append("../src/") # go to parent dir
from data_access import get_X, get_y, get_train_test
from models.factory import ModelFactory
warnings.filterwarnings('ignore')
with open('../confs/logs.yaml', 'rt') as f:
    config = yaml.safe_load(f.read())
logging.config.dictConfig(config)

In [None]:
type='player'
type='team'
type='mix'
X_train, y_train, X_test, y_test, target = get_train_test(train_size=0.8, random_state=42, type=type)
train_scores = get_y('train', type)
X_submission = get_X('test', type)
features=None
features=['TEAM_GAME_WON_season_average', 'TEAM_PENALTIES_5_last_match_sum', 'PLAYER_ASSISTS_season_average', 'PLAYER_SHOTS_OFF_TARGET_season_average', 'TEAM_GAME_DRAW_5_last_match_sum', 'PLAYER_PENALTIES_COMMITTED_season_std', 'PLAYER_GOALS_CONCEDED_season_average', 'PLAYER_PUNCHES_5_last_match_std', 'PLAYER_CLEARANCES_5_last_match_std', 'TEAM_GOALS_season_sum', 'TEAM_GAME_DRAW_5_last_match_average', 'PLAYER_YELLOWCARDS_season_average', 'TEAM_SHOTS_OFF_TARGET_5_last_match_average', 'TEAM_SHOTS_OUTSIDEBOX_5_last_match_std', 'PLAYER_DUELS_WON_season_average']
features=['TEAM_GAME_WON_season_average', 'TEAM_GAME_WON_season_sum', 'TEAM_SHOTS_ON_TARGET_season_sum', 'PLAYER_GOALS_CONCEDED_season_average', 'TEAM_GAME_LOST_season_average', 'TEAM_GAME_LOST_season_sum', 'PLAYER_KEY_PASSES_season_average', 'PLAYER_GOALS_season_average', 'TEAM_PASSES_season_average', 'TEAM_SHOTS_INSIDEBOX_5_last_match_sum', 'PLAYER_ACCURATE_PASSES_season_average', 'PLAYER_SHOTS_ON_TARGET_5_last_match_sum', 'TEAM_BALL_POSSESSION_season_average', 'TEAM_SHOTS_ON_TARGET_season_average', 'PLAYER_ASSISTS_season_sum', 'PLAYER_GOALS_season_sum', 'PLAYER_PASSES_season_sum', 'PLAYER_SHOTS_TOTAL_season_average', 'TEAM_SUCCESSFUL_PASSES_season_average', 'PLAYER_ASSISTS_season_average']
if not features is None:
    X_train = X_train[features]
    X_test = X_test[features]
    X_submission = X_submission[features]

# Methods

In [None]:
def eval_model(model, x_train, x_test, x_submission, save_model=False, save_proba=False):
    start = time.time()
    model.fit()
    score = model.evaluate(x_test)
    if save_proba:
        def save_model(model, x, root_path):
            y = model.predict(x)
            y.columns = ['HOME_WINS', 'DRAW', 'AWAY_WINS']
            y = model.format_y(y)
            y.to_csv(f'{root_path}{model.name}.csv', index=False)
        save_model(model, x_train, f'../data/output/{type}/train/')
        save_model(model, x_test, f'../data/output/{type}/test/')
        save_model(model, x_submission, f'../data/output/{type}/submission/')
    end = time.time()
    logging.info(f'{model.name}={score} in {numpy.round((end-start), 2)}s')
    if save_model:
        model.save(x_submission, root_path=f'../data/output/{type}/predictions/')
    return {'name': model.name, 'score': score, 'time': numpy.round((end-start), 2)}

In [None]:
%load_ext autoreload
%autoreload 2
def eval_model_for_name(name, x_train, x_test, x_submission, save_proba=False, save_model=False):
    with open('../confs/models.yaml', 'r') as file:
        configurations = yaml.safe_load(file)
    factory = ModelFactory(configurations, x_train, y_train, train_scores)
    model = factory.get_model(name)
    return eval_model(model, x_train=x_train, x_test=x_test, x_submission=x_submission, save_proba=save_proba, save_model=save_model)

In [None]:
def get_features(name = 'random_forest'):
    with open('../confs/models.yaml', 'r') as file:
        configurations = yaml.safe_load(file)
    factory = ModelFactory(configurations, X_train, y_train, train_scores)
    model = factory.get_model(name)
    eval_model(model)
    df_importances = model.get_feature_importances()
    return list(df_importances['feature'])

# Evaluation

In [None]:
%load_ext autoreload
%autoreload 2
save_model = True
with open('../confs/models.yaml', 'r') as file:
    configurations = yaml.safe_load(file)
factory = ModelFactory(configurations, X_train, y_train, train_scores)
lines = []
for model in factory.get_models():
    model.fit()
    score = model.evaluate(X_test)
    lines.append(eval_model(model, x_train=X_train, x_test=X_test, x_submission=X_submission, save_model=save_model))
df = pandas.DataFrame(lines)
df = df.sort_values(by=['score'], ascending=False)
df=df.sort_values(by=['score'], ascending=False)
df.to_csv(f'../data/output/{type}/result.csv')

In [None]:
print(df.sort_values(by=['score'], ascending=False))
df.to_csv(f'../data/output/{type}/result.csv')