In [15]:
from statsbombpy import sb
import pandas as pd
import numpy as np
import networkx as nx
from collections import Counter

In [16]:
events = sb.competition_events(
    country="Germany",
    division= "1. Bundesliga",
    season="2023/2024",
    gender="male"
)



In [17]:
#make a list to hold a df of the events of a match
df_dict = {}

for id in events.match_id.unique():
    #find one match for one team
    match_subset = events.loc[events['match_id'] == id]

    #we identify the starting 11
    starting_11 = match_subset.loc[match_subset['type'] == 'Starting XI'].loc[match_subset['team'] == 'Bayer Leverkusen', 'tactics'].to_list()[0]
    position_dict = {}
    
    #we make a dictionary for positions of players
    for member in starting_11['lineup']:
        player_id = int(member['player']['id'])
        position_name = member['position']['name']
        position_dict[player_id] = position_name


    #include only passes, shots, substitution or tactical shifts
    match_subset = match_subset.loc[(match_subset['type'].isin(['Shot','Pass','Substitution', 'Tactical Shift']))]

    #We include rows with Leverkusen as possession team or with type substitution or tactical shift
    match_subset = match_subset.loc[(match_subset['possession_team'] == 'Bayer Leverkusen') | (match_subset['type'].isin(['Substitution', 'Tactical Shift']))]

    #sort the values like when we did the passing sequences
    match_subset = match_subset.sort_values(['period','timestamp'], ascending=[True, True])
        
    match_subset['pass_recipient_position'] = np.nan

    for index, row in match_subset.iterrows():
        #If substitution, we update the dictionary to include player
        if row['type'] == 'Substitution' and row['team'] == 'Bayer Leverkusen':
            position_dict[row['substitution_replacement_id']] = row['position']

        #In case of a tactical shift, create a new position_dict
        if row['type'] == 'Tactical Shift' and row['team'] == 'Bayer Leverkusen':
            lineup = row['tactics']
            position_dict = {}
            for member in lineup['lineup']:
                player_id = int(member['player']['id'])
                position_name = member['position']['name']
                position_dict[player_id] = position_name

        elif row['pass_recipient_id'] in list(position_dict.keys()):
            #if the player is in our dictionary, we assign his position - else he may be a player of the opponent, and we ignore him
            match_subset.at[index, 'pass_recipient_position'] = position_dict[int(row['pass_recipient_id'])]
            
    #den næste linje kan eventuelt kommenteres ud, og så kan vi bare fjerne na i stedet, hvis vi ikke vil have de ekstra nodes, jeg foreslår :D
    #add shot as a pass recipient as well
    #match_subset.loc[match_subset['type'] =='Shot', 'pass_recipient_position'] = match_subset.apply(lambda x: np.where(pd.isna(x['pass_recipient_position']), x['shot_outcome'], x['pass_recipient_position']), axis=1)

    #if a does not have a pass_recipient_position yet, we drop it
    match_subset.dropna(subset=['pass_recipient_position'], inplace = True)
    df_dict[id] = match_subset 


  match_subset['pass_recipient_position'] = np.nan
  match_subset.at[index, 'pass_recipient_position'] = position_dict[int(row['pass_recipient_id'])]
  match_subset['pass_recipient_position'] = np.nan
  match_subset.at[index, 'pass_recipient_position'] = position_dict[int(row['pass_recipient_id'])]
  match_subset['pass_recipient_position'] = np.nan
  match_subset.at[index, 'pass_recipient_position'] = position_dict[int(row['pass_recipient_id'])]
  match_subset['pass_recipient_position'] = np.nan
  match_subset.at[index, 'pass_recipient_position'] = position_dict[int(row['pass_recipient_id'])]
  match_subset['pass_recipient_position'] = np.nan
  match_subset.at[index, 'pass_recipient_position'] = position_dict[int(row['pass_recipient_id'])]
  match_subset['pass_recipient_position'] = np.nan
  match_subset.at[index, 'pass_recipient_position'] = position_dict[int(row['pass_recipient_id'])]
  match_subset['pass_recipient_position'] = np.nan
  match_subset.at[index, 'pass_recipient_position

In [18]:
#dictionary for the graphs
match_graph_dict = {}
#iterate through the dfs
for match in df_dict.values():

    match_id = match['match_id'].iloc[0]
    max_minute = match['minute'].max()
    #iterate from minute 0 to 15 minutes before end of game (it will not include the last 5 minutes, only the targets), and iterate every 5 minutes
    for start_minute in range(0, 85, 5):
        if start_minute == 40:
            pass
        else:
            end_minute = start_minute + 4 
            if start_minute < 44:
                period = 1
            else:
                period = 2
            interval_df = match[(match['minute'] >= start_minute) & (match['minute'] <= end_minute) & (match['period'] == period)]
            graph = nx.DiGraph()
            edges = []

        #iterate through the passings of the match
        for node in pd.concat([interval_df['position'], interval_df['pass_recipient_position']], axis=0):
            if str(node) not in list(graph.nodes):
                #add the node to the graph
                graph.add_node(str(node))

        for passing in interval_df[['position', 'pass_recipient_position']].itertuples():
            edge = (str(passing.position), str(passing.pass_recipient_position))
            edges.append(edge)

        #count the frequencies
        edges_counter = Counter(edges)

        edge_and_count = [(edge[0], edge[1], edges_counter[edge]) for edge in edges]

        #add weighted edges to graph
        if edge_and_count:
            graph.add_weighted_edges_from(edge_and_count)

        #add finished graph to dictionary. add 1 minute to the end of the time window to match the minute the prediction window starts (so the ids match)
        match_graph_dict[str(match_id)+str(end_minute+1)] = [graph]

In [25]:
match_graph_dict.keys()

dict_keys(['38953025', '389530210', '389530215', '389530220', '389530225', '389530230', '389530235', '389530240', '389530250', '389530255', '389530260', '389530265', '389530270', '389530275', '389530280', '389530285', '38952925', '389529210', '389529215', '389529220', '389529225', '389529230', '389529235', '389529240', '389529250', '389529255', '389529260', '389529265', '389529270', '389529275', '389529280', '389529285', '38953335', '389533310', '389533315', '389533320', '389533325', '389533330', '389533335', '389533340', '389533350', '389533355', '389533360', '389533365', '389533370', '389533375', '389533380', '389533385', '38953405', '389534010', '389534015', '389534020', '389534025', '389534030', '389534035', '389534040', '389534050', '389534055', '389534060', '389534065', '389534070', '389534075', '389534080', '389534085', '38953485', '389534810', '389534815', '389534820', '389534825', '389534830', '389534835', '389534840', '389534850', '389534855', '389534860', '389534865', '38953

In [20]:
targets = pd.read_csv("/Users/MathildeStouby/Desktop/P5 GitHub/5-semester/momentum_data.csv")

In [21]:
#the graphs are in a list so just remove the list
for key, value in match_graph_dict.items():
    if isinstance(value, list) and len(value) == 1:
        match_graph_dict[key] = value[0] 

In [23]:
ids = targets["id"].to_list()
momentums = targets["momentum"].to_list()

#join the momentum value on each graph, matching the "match_id + time_interval" value
for i in ids:
    for key, value in match_graph_dict.items():
        if key == str(i):
            index = ids.index(i)
            value.graph["momentum"] = momentums[index]

In [24]:
import pickle
with open("Momentum graphs.pkl", "wb") as file:
    pickle.dump(match_graph_dict, file)