In [37]:
import os
import json
import pandas as pd
import numpy as np

## Set Variables

In [2]:
#kpi = 'stength_attack'
#kpi = 'stength_overall'
#kpi = 'difficulty'
kpi = 'custom_kpi'
skip_multi_gameweeks = False
skip_blank_gameweeks = False
start_gameweek = 20
end_gameweek = 35
exclude_gameweeks = []

In [3]:
# Exmaple custom KPI, goals scored last seasion (new teams given relgated teams values)
custom_kpi = {
    "ARS" : 56,
    "AVL" : 41,
    "BHA" : 39,
    "BUR" : 43,
    "CHE" : 69,
    "CRY" : 31,
    "EVE" : 44,
    "FUL" : 26,
    "LEE" : 40,
    "LEI" : 67,
    "LIV" : 85,
    "MCI" : 102,
    "MUN" : 66,
    "NEW" : 38,
    "SHU" : 39,
    "SOU" : 51,
    "TOT" : 61,
    "WBA" : 36,
    "WHU" : 49,
    "WOL" : 51
}

## Import Data

In [4]:
# Change directory to level up
current = os.getcwd()
os.chdir(current.rsplit('/', 1)[0])

### Bootstrap Static Data

In [5]:
# List files in data direcory
file_list = os.listdir("data/bootstrap_static")

# Sort and select first (latest) data set
file_list.sort()
file_list = file_list[0]

In [6]:
# Import data
path = f'data/bootstrap_static/{file_list}'
with open(path) as f:
    data = json.load(f)

### Fixtures

In [7]:
# List files in data direcory
file_list = os.listdir("data/fixtures")

# Sort and select first (latest) data set
file_list.sort()
file_list = file_list[0]

In [8]:
# Import data
path = f'data/fixtures/{file_list}'
with open(path) as f:
    fixtures = json.load(f)

In [9]:
os.chdir(current)

## Create look up for team IDs

In [10]:
name_lkup = {}

In [11]:
# Create look up dict
def create_lkup(kpi):
    for i in data['teams']:
        id = i['id']
        kpi_value = i[kpi]
        if id not in name_lkup.keys():
            name_lkup[id] = {}
        name_lkup[id][kpi] = kpi_value

In [12]:
kpis_to_add = ['short_name',
               'strength_overall_home',
               'strength_overall_away',
               'strength_attack_home',
               'strength_attack_away',
               'strength_defence_home',
               'strength_defence_away']

for i in kpis_to_add:
    create_lkup(i)

In [13]:
# View look up
name_lkup

{1: {'short_name': 'ARS',
  'strength_overall_home': 1190,
  'strength_overall_away': 1210,
  'strength_attack_home': 1170,
  'strength_attack_away': 1210,
  'strength_defence_home': 1190,
  'strength_defence_away': 1200},
 2: {'short_name': 'AVL',
  'strength_overall_home': 1150,
  'strength_overall_away': 1160,
  'strength_attack_home': 1150,
  'strength_attack_away': 1150,
  'strength_defence_home': 1180,
  'strength_defence_away': 1210},
 3: {'short_name': 'BHA',
  'strength_overall_home': 1080,
  'strength_overall_away': 1100,
  'strength_attack_home': 1150,
  'strength_attack_away': 1180,
  'strength_defence_home': 1090,
  'strength_defence_away': 1100},
 4: {'short_name': 'BUR',
  'strength_overall_home': 1050,
  'strength_overall_away': 1080,
  'strength_attack_home': 1120,
  'strength_attack_away': 1190,
  'strength_defence_home': 1010,
  'strength_defence_away': 1030},
 5: {'short_name': 'CHE',
  'strength_overall_home': 1260,
  'strength_overall_away': 1280,
  'strength_atta

## Add additional information to fixtures

In [14]:
for i in fixtures:
    
    # Away team
    # Select away team ID
    team_a_id = i['team_a']  
    
    # Add team short name
    kpi_key = 'short_name'
    kpi_name = 'team_a_short_name' 
    kpi_value = name_lkup[team_a_id][kpi_key]
    i[kpi_name] = kpi_value
    
    # Add custom KPI
    kpi_name = 'team_h_custom_kpi' # Note we are defining "h" here as this this will be the stats for the opponant
    i[kpi_name] = custom_kpi[kpi_value]
    
    # Add strength attack
    kpi_key = 'strength_attack_away'
    kpi_name = 'team_h_stength_attack' # Note we are defining "h" here as this this will be the stats for the opponant
    kpi_value = name_lkup[team_a_id][kpi_key]
    i[kpi_name] = kpi_value
    
    # Add strength overall
    kpi_key = 'strength_overall_away'
    kpi_name = 'team_h_stength_overall' # Note we are defining "h" here as this this will be the stats for the opponant
    kpi_value = name_lkup[team_a_id][kpi_key]
    i[kpi_name] = kpi_value
   
    
    #Home team
    team_h_id = i['team_h']  
    
    # Add team short name
    kpi_key = 'short_name'
    kpi_name = 'team_h_short_name' 
    kpi_value = name_lkup[team_h_id][kpi_key]
    i[kpi_name] = kpi_value
    
    # Add custom KPI
    kpi_name = 'team_a_custom_kpi' # Note we are defining "h" here as this this will be the stats for the opponant
    i[kpi_name] = custom_kpi[kpi_value]
    
    # Add strength attack
    kpi_key = 'strength_attack_home'
    kpi_name = 'team_a_stength_attack' # Note we are defining "h" here as this this will be the stats for the opponant
    kpi_value = name_lkup[team_h_id][kpi_key]
    i[kpi_name] = kpi_value
    
    # Add strength overall
    kpi_key = 'strength_overall_home'
    kpi_name = 'team_a_stength_overall' # Note we are defining "h" here as this this will be the stats for the opponant
    kpi_value = name_lkup[team_h_id][kpi_key]
    i[kpi_name] = kpi_value

## Reshape fixtures 

In [15]:
# Double game week check function
# check if double gameweek
def multi_gw_check(team,gameweek):
    if gameweek in fix[team]:
        
        # Get list of gameweeks
        key_list = list(fix[team].keys())
        
        # get last gameweek key
        key = [k for k in key_list if gameweek == k or k.startswith(f'{gameweek}_')][-1]
        
        # Redfine new gameweek by appending an '_' and increment integer
        if '_' in key:    
            new_key = int(key.rsplit('_', 1)[-1]) + 1    
            gameweek_new = f'{gameweek}_{new_key}'

        else:
            gameweek_new = f'{gameweek}_2' 
        
        return gameweek_new
    
    else:
        return gameweek

In [16]:
fix = {}
for i in fixtures:
    gameweek = str(i['event'])
    
    # Skip if match not scheduled
    if gameweek == 'None':
        continue
    
    away_team = str(i['team_a_short_name'])
    
    if away_team not in fix:
        fix[away_team] = {}
    
    # check if double gameweek
    gameweek = multi_gw_check(team = away_team,gameweek = gameweek)
                    
    fix[away_team][gameweek] = i[f'team_a_{kpi}']
    
    home_team = str(i['team_h_short_name'])
    
    if home_team not in fix:
        fix[home_team] = {}
    
    fix[home_team][gameweek] = i[f'team_h_{kpi}']

In [17]:
# Identify max value for kpi
max_val = 0
for i in fix:
    for j in fix[i]:
        if fix[i][j] > max_val:
            max_val = fix[i][j]

## Identify and fill missing game weeks

In [18]:
bgw = []
for i in fix:
    for j in range(1,39):
        if str(j) not in fix[i]:
            fix[i][str(j)] = max_val    
            bgw.append(str(j))
bgw = list(set(bgw))
bgw.sort()

In [19]:
bgw

['1', '11', '16', '17', '18', '19', '33']

## Identify multi gameweeks

In [20]:
mgw = []
for i in fix:
    for j in fix[i]:
        if '_' in j:
            gw = j.rsplit('_', 1)[0]
            mgw.append(gw)

mgw = list(set(mgw))
mgw.sort()

In [21]:
mgw

['19']

In [22]:
# Merge multigame weeks into one value
for i in fix:
    key_list = list(fix[i].keys())
    for j in range(1,39):        
        keys = [k for k in key_list if str(j) == k or k.startswith(f'{str(j)}_')]
        if len(keys) <= 1:
            continue
        gameweek = keys[0]
        
                
        # Identify total kpi for the multi game week
        val = 0
        for k in keys:
            val += fix[i][k]
            
        # Average opponent 
        val = val/len(keys)
        
        # Gameweek weighting
        val = val/len(keys)
        
        # Remove keys from dictionary
        for l in keys:
            del fix[i][l]
        
        fix[i][gameweek] = int(val)

## Reshape data to identify complimenting fixtures

In [23]:
df = pd.DataFrame(fix)
df = df.reindex(sorted(df.columns), axis=1)
df.index = pd.to_numeric(df.index, errors='coerce')

In [24]:
# Remove required weeks
# Create list of all gameweeks within range
gameweeks = list(range(start_gameweek, end_gameweek + 1))
gameweeks = [str(i) for i in gameweeks]

In [25]:
if skip_multi_gameweeks:
    gameweeks = list(set(gameweeks) - set(mgw))
    gameweeks.sort()    

In [26]:
if skip_blank_gameweeks:
    gameweeks = list(set(gameweeks) - set(exclude_gameweeks))
    gameweeks.sort()    

In [27]:
# Remove specified gameweeks
gameweeks = list(set(gameweeks) - set(exclude_gameweeks))
gameweeks.sort()

In [28]:
# View gameweeks
gameweeks = [int(i) for i in gameweeks]
gameweeks.sort()
gameweeks

[20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35]

In [29]:
# Filter to only select required gameweeks
df = df.loc[gameweeks].sort_index()

In [30]:
df

Unnamed: 0,ARS,AVL,BHA,BUR,CHE,CRY,EVE,FUL,LEE,LEI,LIV,MCI,MUN,NEW,SHU,SOU,TOT,WBA,WHU,WOL
20,51,43,26,41,51,49,67,39,38,44,61,36,39,40,66,56,85,102,31,69
21,66,51,61,69,43,51,38,36,67,40,49,39,56,44,102,41,39,26,85,31
22,51,49,85,102,61,38,40,67,44,26,39,43,51,31,36,66,69,39,41,56
23,41,56,43,39,39,40,66,49,31,51,102,85,44,51,69,38,36,61,26,67
24,40,39,41,31,38,43,26,44,56,85,67,61,36,69,49,51,102,66,39,51
25,102,67,31,36,51,39,85,39,51,41,44,56,38,66,26,69,49,43,61,40
26,67,40,36,61,66,26,51,31,41,56,39,49,69,51,85,44,43,39,102,38
27,43,51,67,56,44,61,69,85,49,39,26,66,102,36,51,39,31,38,40,41
28,61,38,51,44,40,36,43,102,69,39,51,26,49,41,67,39,56,31,66,85
29,49,39,38,67,85,66,36,40,26,43,69,51,31,39,41,61,51,44,56,102


## Calculate differences between teams

In [31]:
fixture_pair = pd.DataFrame(columns = ['TEAM_1','TEAM_2','VALUE'])

In [32]:
for i in df.columns[1:]:
    for j in df.columns[1:]:
        value = df[[i,j]].min(axis = 1).sum()
        fixture_pair.loc[len(fixture_pair)] = [i,j,value]

In [33]:
# Remove cases comparing each team to itself
fixture_pair = fixture_pair[fixture_pair['TEAM_1'] != fixture_pair['TEAM_2']]
fixture_pair = fixture_pair.reset_index()

In [34]:
# Remove Duplication
fixture_pair[['TEAM_1','TEAM_2']] = pd.DataFrame(np.sort(fixture_pair[['TEAM_1','TEAM_2']].values))
del fixture_pair['index']
fixture_pair = fixture_pair.drop_duplicates()
fixture_pair = fixture_pair.sort_values('VALUE')

In [38]:
fixture_pair.head(20)
#fixture_pair[(fixture_pair['TEAM_1'] == 'SOU')|(fixture_pair['TEAM_2'] == 'SOU')]

Unnamed: 0,TEAM_1,TEAM_2,VALUE
61,CHE,LEI,603
97,EVE,LEI,607
133,LEE,LEI,609
25,BHA,LEI,614
17,AVL,WOL,616
215,MUN,WOL,617
69,CHE,WBA,617
161,LEI,WOL,618
20,BHA,CHE,618
7,AVL,LEI,620
