In [32]:
import pandas as pd
from IPython.core.display_functions import display
from pandas import DataFrame, json_normalize
from utils import num_of_round, is_allowed_round, response_from_json

FILE_NAME = 'data/league39_season2020.json'
response = response_from_json(file_name=FILE_NAME)

In [33]:
all_response_data = json_normalize(data=response)
all_response_data = all_response_data.astype(
    {'goals.away': 'Int64', 'goals.home': 'Int64'}, errors='ignore')
all_response_data

Unnamed: 0,fixture.id,fixture.referee,fixture.timezone,fixture.date,fixture.timestamp,fixture.periods.first,fixture.periods.second,fixture.venue.id,fixture.venue.name,fixture.venue.city,...,goals.home,goals.away,score.halftime.home,score.halftime.away,score.fulltime.home,score.fulltime.away,score.extratime.home,score.extratime.away,score.penalty.home,score.penalty.away
0,592141,K. Friend,UTC,2021-01-12T20:15:00+00:00,1610482500,1610482500,1610486100,512,Turf Moor,Burnley,...,0,1,0,0,0,1,,,,
1,592142,J. Moss,UTC,2020-09-12T14:00:00+00:00,1599919200,1599919200,1599922800,525,Selhurst Park,London,...,1,0,1,0,1,0,,,,
2,592143,C. Kavanagh,UTC,2020-09-12T11:30:00+00:00,1599910200,1599910200,1599913800,535,Craven Cottage,London,...,0,3,0,1,0,3,,,,
3,592144,M. Oliver,UTC,2020-09-12T16:30:00+00:00,1599928200,1599928200,1599931800,550,Anfield,Liverpool,...,4,3,3,2,4,3,,,,
4,592145,J. Moss,UTC,2021-01-20T18:00:00+00:00,1611165600,1611165600,1611169200,555,Etihad Stadium,Manchester,...,2,0,0,0,2,0,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
375,592871,"Andre Marriner, England",UTC,2021-05-23T15:00:00+00:00,1621782000,1621782000,1621785600,550,Anfield,Liverpool,...,2,0,1,0,2,0,,,,
376,592872,"Michael Oliver, England",UTC,2021-05-23T15:00:00+00:00,1621782000,1621782000,1621785600,555,Etihad Stadium,Manchester,...,5,0,2,0,5,0,,,,
377,592873,"Kevin Friend, England",UTC,2021-05-23T15:00:00+00:00,1621782000,1621782000,1621785600,581,Bramall Lane,Sheffield,...,1,0,1,0,1,0,,,,
378,592874,"Martin Atkinson, England",UTC,2021-05-23T15:00:00+00:00,1621782000,1621782000,1621785600,598,London Stadium,London,...,3,0,2,0,3,0,,,,


In [34]:
COLUMN_NAMES_DICT = {'teams.home.name': 'host',
                     'teams.away.name': 'guest',
                     'goals.home': 'goals_host',
                     'goals.away': 'goals_guest',
                     'league.round': 'round'}

all_games = all_response_data[COLUMN_NAMES_DICT.keys()]
all_games = all_games.rename(columns=COLUMN_NAMES_DICT)
all_games['round'] = all_games['round'].apply(lambda s: num_of_round(s))
all_games

Unnamed: 0,host,guest,goals_host,goals_guest,round
0,Burnley,Manchester United,0,1,1
1,Crystal Palace,Southampton,1,0,1
2,Fulham,Arsenal,0,3,1
3,Liverpool,Leeds,4,3,1
4,Manchester City,Aston Villa,2,0,1
...,...,...,...,...,...
375,Liverpool,Crystal Palace,2,0,38
376,Manchester City,Everton,5,0,38
377,Sheffield Utd,Burnley,1,0,38
378,West Ham,Southampton,3,0,38


In [35]:
games_allowed = all_games[all_games["round"].apply(lambda n: is_allowed_round(n))]
games_allowed

Unnamed: 0,host,guest,goals_host,goals_guest,round
60,Aston Villa,Southampton,3,4,7
61,Burnley,Chelsea,0,3,7
62,Fulham,West Brom,2,0,7
63,Leeds,Leicester,1,4,7
64,Liverpool,West Ham,2,1,7
...,...,...,...,...,...
315,Leicester,West Brom,3,0,32
316,Manchester United,Burnley,3,1,32
317,Newcastle,West Ham,3,2,32
318,Southampton,Crystal Palace,3,1,32


In [36]:
games_allowed[ games_allowed['round'] == 8 ]

Unnamed: 0,host,guest,goals_host,goals_guest,round
70,Arsenal,Aston Villa,0,3,8
71,Brighton,Burnley,0,0,8
72,Chelsea,Sheffield Utd,4,1,8
73,Crystal Palace,Leeds,4,1,8
74,Everton,Manchester United,1,3,8
75,Leicester,Wolves,1,0,8
76,Manchester City,Liverpool,1,1,8
77,Southampton,Newcastle,2,0,8
78,West Brom,Tottenham,0,1,8
79,West Ham,Fulham,1,0,8


In [37]:
def single_team_db(predicted_round: int):
    games_df = games_allowed[games_allowed['round'] < predicted_round]
    host_df = games_df.groupby('host').agg('sum').reset_index()
    host_df.drop(labels=['round'], axis=1)
    guest_df = games_df.groupby('guest').agg('sum').reset_index()
    guest_df.drop(labels=['round'], axis=1)

    tot_each_team_df = DataFrame(data={
        "team": host_df.host,
        "GF_as_any": host_df.goals_host + guest_df.goals_guest,
        "GA_as_any": host_df.goals_guest + guest_df.goals_host,
        "GF_as_host": host_df.goals_host,
        "GA_as_host": host_df.goals_guest,
        "GF_as_guest": guest_df.goals_guest,
        "GA_as_guest": guest_df.goals_host,
    })
    tot_each_team_df.sort_values(by=['team'], ascending=True)
    return tot_each_team_df

final_single_team_db = single_team_db(predicted_round=1 + games_allowed['round'].max())
final_single_team_db

Unnamed: 0,team,GF_as_any,GA_as_any,GF_as_host,GA_as_host,GF_as_guest,GA_as_guest
0,Arsenal,36,29,15,16,21,13
1,Aston Villa,32,28,16,16,16,12
2,Brighton,23,26,12,12,11,14
3,Burnley,23,35,13,15,10,20
4,Chelsea,37,22,20,11,17,11
5,Crystal Palace,26,46,13,22,13,24
6,Everton,29,31,11,19,18,12
7,Fulham,20,28,8,15,12,13
8,Leeds,38,41,17,14,21,27
9,Leicester,45,29,24,15,21,14


In [38]:
HOST_COLUMNS_NAMES = {
    'team': 'team1',
    'GF_as_any': 'GF1_as_any',
    'GA_as_any': 'GA1_as_any',
    'GF_as_host': 'GF1_as_host',
    'GA_as_host': 'GA1_as_host',
}

GUEST_COLUMNS_NAMES = {
    'team': 'team2',
    'GF_as_any': 'GF2_as_any',
    'GA_as_any': 'GA2_as_any',
    'GF_as_guest': 'GF2_as_guest',
    'GA_as_guest': 'GA2_as_guest',
}

In [39]:
def matches_to_predict(predicted_round: int) -> games_allowed:
    return games_allowed[games_allowed['round'] == predicted_round]

In [69]:
max_round = games_allowed['round'].max()
min_round = 7

predicted_round = 31
matches = matches_to_predict(predicted_round=predicted_round)
prediction = DataFrame()

single_team_sums = single_team_db(predicted_round=predicted_round)

final_df = None
for ind, row in matches.iterrows():
    host = row['host']
    guest = row['guest']

    host_stats: DataFrame
    guest_stats: DataFrame

    host_stats = single_team_sums.loc[single_team_sums['team'] == host]
    guest_stats = single_team_sums.loc[single_team_sums['team'] == guest]
    host_stats = host_stats.rename(columns=HOST_COLUMNS_NAMES)
    guest_stats = guest_stats.rename(columns=GUEST_COLUMNS_NAMES)

    host_stats.index = [0]
    guest_stats.index = [0]

    line = pd.concat([host_stats, guest_stats], axis=1)
    if final_df is not None:
        final_df = pd.concat([final_df, line], axis=0)
    else:
        final_df = line

final_df.to_excel('output.xlsx')
final_df.reset_index(drop=True)




Unnamed: 0,team1,GF1_as_any,GA1_as_any,GF1_as_host,GA1_as_host,GF_as_guest,GA_as_guest,team2,GF2_as_any,GA2_as_any,GF_as_host,GA_as_host,GF2_as_guest,GA2_as_guest
0,Brighton,23,26,12,12,11,14,Everton,27,29,9,17,18,12
1,Burnley,21,30,12,13,9,17,Newcastle,22,40,15,17,7,23
2,Crystal Palace,24,39,12,18,12,21,Chelsea,33,21,20,11,13,10
3,Fulham,19,26,8,14,11,12,Wolves,24,33,14,14,10,19
4,Liverpool,36,22,13,13,23,9,Aston Villa,30,24,15,14,15,10
5,Manchester City,51,11,26,6,25,5,Leeds,35,39,16,13,19,26
6,Sheffield Utd,14,43,9,18,5,25,Arsenal,32,28,14,15,18,13
7,Tottenham,36,25,23,10,13,15,Manchester United,48,21,29,11,19,10
8,West Brom,19,45,6,27,13,18,Southampton,28,41,15,15,13,26
9,West Ham,36,29,21,15,15,14,Leicester,40,26,21,15,19,11
