# 03.2.1 Goal Expectancy (Goals)

## Now moved to src!

In [1]:
% matplotlib inline

import os
import sys

import pandas as pd
import xarray as xr

test_dir = '../data/test/'
datacube_path = test_dir + 'XArrayDataSet_1.nc'
team_list_path = test_dir + 'team_list.pickle'

pd.set_option("display.width",100)
pd.options.display.float_format = '{:,.2f}'.format

# add the 'src' directory to path to import modules
src_dir = os.path.join(os.getcwd(), os.pardir, 'src')
sys.path.append(src_dir)

from utilities.utilities import get_fixture_list, get_team_list

## Set Up

In [2]:
fixtures = get_fixture_list(datacube_path)
print(fixtures.head())
print(fixtures.shape, '\n')

team_list= get_team_list(team_list_path)
print(team_list)
print(len(team_list), '\n')

ds = xr.open_dataset(datacube_path)

print(fixtures.iloc[50:56,:])

                     h_team             a_team
0                   Arsenal     Leicester City
1  Brighton and Hove Albion    Manchester City
2                   Chelsea            Burnley
3            Crystal Palace  Huddersfield Town
4                   Everton         Stoke City
(170, 2) 

['Arsenal', 'Bournemouth', 'Brighton and Hove Albion', 'Burnley', 'Chelsea', 'Crystal Palace', 'Everton', 'Huddersfield Town', 'Leicester City', 'Liverpool', 'Manchester City', 'Manchester United', 'Newcastle United', 'Southampton', 'Stoke City', 'Swansea City', 'Tottenham Hotspur', 'Watford', 'West Bromwich Albion', 'West Ham United']
20 

             h_team             a_team
50          Burnley  Huddersfield Town
51          Everton        Bournemouth
52   Leicester City          Liverpool
53  Manchester City     Crystal Palace
54      Southampton  Manchester United
55       Stoke City            Chelsea


## Goal Expectancy Extractor

In [3]:
from sklearn.base import BaseEstimator, TransformerMixin
from ibex import FrameMixin

class GoalExpectancyExtractor(BaseEstimator, TransformerMixin, FrameMixin):
    """
    Accepts a Fixture list, a team list, and a DataSet
    Calculates Goal Expectancy for each team in the fixture list    
    """
    
    def __init__(self,fixture_batch, dataset, team_list):
        self.fixture_batch = fixture_batch
        self.dataset = dataset
        self.team_list = team_list

        
    def get_league_history(self,fixture_batch, ds):
        """
        """
        lowest_index = min(fixture_batch.index)
        league_history_ds = ds.where(ds['Idx'] < lowest_index, drop=True)
        league_history_df = league_history_ds.to_dataframe().dropna(subset=['Idx']).sort_values('Idx')
        league_history_df.reset_index(inplace=True)
        league_history_df.set_index('Idx', inplace=True)
        return league_history_df

    
    def get_prev_games_subset_df(self, prev_games_df):
        """
        """
        some_strs= ['goal']
        relevant_cols = [col for col in prev_games_df.columns for some_str in some_strs if some_str in col]
        relevant_cols = relevant_cols + ['h_team', 'a_team']
        return prev_games_df[relevant_cols]    

    
    def calc_summary_stats(self, prev_games_subset_df):
        h_tot_games_played = a_tot_games_played = len(prev_games_subset_df.index)

        h_tot_GF = prev_games_subset_df['h_goals'].sum() ; a_tot_GF = prev_games_subset_df['a_goals'].sum()

        self.h_GF_mean_per_game = h_tot_GF/h_tot_games_played ; self.a_GF_mean_per_game = a_tot_GF/a_tot_games_played

        self.G_mean_per_game = (h_tot_GF + a_tot_GF)/(h_tot_games_played + a_tot_games_played)

        self.h_GF_mean_per_team_to_date = prev_games_subset_df['h_goals'].mean()
        self.h_GA_mean_per_team_to_date = prev_games_subset_df['a_goals'].mean()
        self.a_GF_mean_per_team_to_date = prev_games_subset_df['a_goals'].mean()
        self.a_GA_mean_per_team_to_date = prev_games_subset_df['h_goals'].mean()

        
    def get_ref_df(self, pdf, team_list):

        df_ref = pd.DataFrame(index = team_list)

        for team in df_ref.index:
            df_ref.loc[team,'h_GF'] = pdf[pdf['h_team'] == team]['h_goals'].mean()#pdf.loc[team, 'h_xGF'].mean()
            df_ref.loc[team, 'h_GA'] = pdf[pdf['h_team'] == team]['a_goals'].mean()
            df_ref.loc[team, 'a_GF'] = pdf[pdf['a_team'] == team]['a_goals'].mean()
            df_ref.loc[team, 'a_GA'] = pdf[pdf['a_team'] == team]['h_goals'].mean()

        df_ref = df_ref.assign(Attack_Strength = (df_ref['h_GF']/self.h_GF_mean_per_team_to_date + \
                                                  df_ref['a_GF']/self.a_GF_mean_per_team_to_date)/2)


        df_ref = df_ref.assign(Defence_Weakness = (df_ref['h_GA']/self.h_GA_mean_per_team_to_date + \
                                                   df_ref['a_GA']/self.a_GA_mean_per_team_to_date)/2)
        df_ref = df_ref.assign(Supremacy = (df_ref['Attack_Strength'] - df_ref['Defence_Weakness'])*self.G_mean_per_game)
        #print(df_ref.head(21))
        return df_ref

    
    def get_raw_features(self, fb):
        fb = fb.assign(h_attack_strength = self.df_ref.loc[fb['h_team'], 'Attack_Strength'].values)
        fb = fb.assign(h_defence_weakness = self.df_ref.loc[fb['h_team'], 'Defence_Weakness'].values)
        fb = fb.assign(h_GF_mean_per_game = self.h_GF_mean_per_game)
        fb = fb.assign(a_attack_strength = self.df_ref.loc[fb['a_team'], 'Attack_Strength'].values)
        fb = fb.assign(a_defence_weakness = self.df_ref.loc[fb['a_team'], 'Defence_Weakness'].values)
        fb = fb.assign(a_GF_mean_per_game = self.a_GF_mean_per_game)
        fb['dependancy'] = 0.0
        return fb
    
    def get_goal_expectancies(self, rfdf):
        feat_df = pd.DataFrame()
        feat_df['h_team'] = rfdf['h_team']
        feat_df['a_team'] = rfdf['a_team']
        feat_df['h_GE'] = rfdf['h_attack_strength'] * rfdf['a_defence_weakness'] * rfdf['h_GF_mean_per_game'] 
        feat_df['a_GE'] = rfdf['a_attack_strength'] * rfdf['h_defence_weakness'] * rfdf['a_GF_mean_per_game']

        return feat_df
    
    def fit(self, X, y=None):
        return self
    
    
    def transform(self, X, y=None):
        league_history_df = self.get_league_history(self.fixture_batch, self.dataset)
        prev_games_subset_df = self.get_prev_games_subset_df(league_history_df)
        self.calc_summary_stats(prev_games_subset_df)
        self.df_ref = self.get_ref_df(prev_games_subset_df, self.team_list)
        raw_features = self.get_raw_features(self.fixture_batch)
        transformed_X = self.get_goal_expectancies(raw_features)
        return transformed_X


fixture_batch = fixtures.iloc[50:60,:]
gee = GoalExpectancyExtractor(fixture_batch, ds, team_list)
gee.fit(fixture_batch)
feat = gee.transform(fixture_batch)
print()
print(feat.head(20))


                      h_team                a_team  h_GE  a_GE
50                   Burnley     Huddersfield Town  0.65  0.75
51                   Everton           Bournemouth  0.67  0.66
52            Leicester City             Liverpool  2.56  2.13
53           Manchester City        Crystal Palace  4.41  0.00
54               Southampton     Manchester United  0.30  1.73
55                Stoke City               Chelsea  0.94  1.49
56              Swansea City               Watford  0.53  1.59
57           West Ham United     Tottenham Hotspur  0.58  1.45
58  Brighton and Hove Albion      Newcastle United  0.60  1.12
59                   Arsenal  West Bromwich Albion  1.25  1.02
