In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import algorithms
import os
from tqdm import tqdm

In [2]:
# Get all the relevant matches
matches = []
for file in os.listdir(os.fsencode('dataset/intersection/statsbomb_matches_shots/')):
    filename = os.fsdecode(file)
    if 'shots' in filename and 'La Liga' not in filename:
        matches.append(pd.read_csv('dataset/intersection/statsbomb_matches_shots/' + filename))
matches = pd.concat(matches)
matches.head()

Unnamed: 0,id,index,period,timestamp,minute,second,type,possession,possession_team,play_pattern,...,shot_aerial_won,shot_deflected,shot_one_on_one,shot_open_goal,out,shot_saved_off_target,shot_redirect,shot_follows_dribble,shot_saved_to_post,off_camera
0,bd2f3cbe-d7d8-4a7e-a01a-44d41e8ef888,272,1,00:05:28.851,5,28,Shot,13,Lorient,Regular Play,...,,,,,,,,,,
1,2a229476-cb32-47ad-8f70-28a252fe5fd8,392,1,00:07:31.125,7,31,Shot,18,Lorient,From Throw In,...,,,,,,,,,,
2,ade34ebd-5819-4e42-a97d-2dcc75600e1f,663,1,00:13:03.329,13,3,Shot,33,Lorient,From Throw In,...,True,,,,,,,,,
3,51abe4d1-9329-41f9-bf34-e897afbb8c8e,943,1,00:20:28.418,20,28,Shot,46,Marseille,From Corner,...,,,,,,,,,,
4,b2f38f8b-1710-4cb2-8200-a359a2226a3f,989,1,00:21:42.359,21,42,Shot,52,Marseille,From Throw In,...,,,,,,,,,,


In [3]:
matches.columns

Index(['id', 'index', 'period', 'timestamp', 'minute', 'second', 'type',
       'possession', 'possession_team', 'play_pattern', 'team', 'player',
       'position', 'location', 'duration', 'under_pressure', 'related_events',
       'match_id', 'shot_statsbomb_xg', 'shot_end_location',
       'shot_key_pass_id', 'shot_first_time', 'shot_technique',
       'shot_body_part', 'shot_type', 'shot_outcome', 'shot_freeze_frame',
       'possession_team_id', 'team_id', 'player_id', 'shot_aerial_won',
       'shot_deflected', 'shot_one_on_one', 'shot_open_goal', 'out',
       'shot_saved_off_target', 'shot_redirect', 'shot_follows_dribble',
       'shot_saved_to_post', 'off_camera'],
      dtype='object')

In [4]:
# Sort the matches by number of shots
for match_id in matches['match_id'].unique():
    amount_of_shots = len(matches[matches['match_id'] == match_id])
    matches.loc[matches['match_id'] == match_id, 'amount_of_shots'] = amount_of_shots
matches.sort_values(by='amount_of_shots', inplace=True)
# Get the maximum amount of shots
max_amount_of_shots = matches['amount_of_shots'].iloc[-1]
# Iterate from 1 to max max_amount_of_shots and get the execution times
execution_times_dp = []
execution_times_fft = []
execution_times_simulation = []
execution_times_optimized_dp = []
all_shotcounts = []
shotcount = 0
for match_id in tqdm(matches['match_id'].unique()):
    shotcount = 1 if shotcount > max_amount_of_shots else shotcount 
    # Get the match
    match = matches[matches['match_id'] == match_id]
    xgs = match['shot_statsbomb_xg'].sample(n=shotcount, replace=True)
    # Get the execution times
    poibin = algorithms.PoiBinCalculator(list(xgs))
    _, execution_time_dp = poibin.dp_poibin_pdf()
    _, execution_time_fft = poibin.fft_poibin_pdf()
    _, execution_time_simulation = poibin.sim_poibin_pdf()
    _, execution_time_optimized_dp = poibin.optimized_dp_poibin_pdf()
    all_shotcounts.append(shotcount)
    execution_times_dp.append(execution_time_dp)
    execution_times_fft.append(execution_time_fft)
    execution_times_simulation.append(execution_time_simulation)
    execution_times_optimized_dp.append(execution_time_optimized_dp)
    shotcount += 1

100%|██████████| 1443/1443 [02:47<00:00,  8.61it/s]


In [8]:
plt.scatter(all_shotcounts, execution_times_dp)
plt.title('Execution times for the DP algortihm')
plt.xlabel('Amount of shots')
plt.ylabel('Process time (ms)')
plt.savefig('graphs/2_execution_times_dp.png')
plt.clf()
plt.scatter(all_shotcounts, execution_times_fft)
plt.title('Execution times for the FFT algortihm')
plt.xlabel('Amount of shots')
plt.ylabel('Process time (ms)')
plt.savefig('graphs/2_execution_times_fft.png')
plt.clf()
plt.scatter(all_shotcounts, execution_times_simulation)
plt.title('Execution times for the simulation algortihm\n10000 samples')
plt.xlabel('Amount of shots')
plt.ylabel('Process time (ms)')
plt.savefig('graphs/2_execution_times_simulation.png')
plt.clf()
plt.scatter(all_shotcounts, execution_times_optimized_dp)
plt.title('Execution times for the optimized DP algortihm')
plt.xlabel('Amount of shots')
plt.ylabel('Process time (ms)')
plt.savefig('graphs/2_execution_times_optimized_dp.png')
plt.clf()

<Figure size 640x480 with 0 Axes>

In [6]:
import math
shots_per_match = matches.groupby('match_id').count()['id']
plt.hist(shots_per_match, bins=math.ceil(math.sqrt(len(shots_per_match))))
plt.title('Distribution of shots per match')
plt.xlabel('Amount of shots')
plt.ylabel('Amount of matches')
plt.savefig('graphs/shots_per_match.png')
plt.clf()

<Figure size 640x480 with 0 Axes>

In [9]:
print('Average execution time DP: ' + str(np.mean(execution_times_dp)))
print('Average execution time FFT: ' + str(np.mean(execution_times_fft)))
print('Average execution time simulation: ' + str(np.mean(execution_times_simulation)))
print('Average execution time optimized DP: ' + str(np.mean(execution_times_optimized_dp)))

Average execution time DP: 0.08881276784436189
Average execution time FFT: 0.10073569300031981
Average execution time simulation: 117.76417196188483
Average execution time optimized DP: 0.26764913998598694
