In [557]:
import pandas as pd
import ast

In [558]:
events_world_cup =pd.read_csv('../events_World_Cup.csv')
teams = pd.read_csv('../teams.csv')
tags2name = pd.read_csv('../tags2name.csv')
playerrank = pd.read_csv('../playerank.csv')
players = pd.read_csv('../players.csv')
matches_world_cup = pd.read_csv('../matches_World_Cup.csv')

Lets filter the unrequired columns

In [559]:
events_world_cup = events_world_cup[['subEventName', 'tags', 'playerId', 'matchId', 'eventName', 'teamId', 'eventSec', 'matchPeriod']]
players = players[['wyId', 'shortName']]
tags2name = tags2name[['Tag', 'Description']]
teams = teams[['wyId', 'officialName', 'type']]
playerrank = playerrank[['playerId', 'roleCluster']]


Start filtering the data

In [560]:
# Only national teams
teams = teams[teams['type'] == 'national']

# Only matches where France has played.
matches_world_cup = matches_world_cup[matches_world_cup['label'].str.contains("France")]

# Only events from the matches where France has played.
events_world_cup = events_world_cup[events_world_cup['matchId'].isin(matches_world_cup['wyId'])]

events_world_cup

Unnamed: 0,subEventName,tags,playerId,matchId,eventName,teamId,eventSec,matchPeriod
18813,Simple pass,[{'id': 1801}],238055,2057966,Pass,8493,1.435354,1H
18814,High pass,[{'id': 1802}],61395,2057966,Pass,8493,3.978396,1H
18815,Throw in,[{'id': 1801}],340646,2057966,Free Kick,4418,15.608867,1H
18816,Simple pass,[{'id': 1801}],209091,2057966,Pass,4418,16.385084,1H
18817,Launch,[{'id': 1802}],340646,2057966,Pass,4418,17.214485,1H
...,...,...,...,...,...,...,...,...
101751,High pass,[{'id': 1802}],69396,2058017,Pass,9598,2964.715715,2H
101752,Clearance,[{'id': 1802}],3309,2058017,Others on the ball,4418,2967.926784,2H
101753,Throw in,[{'id': 1801}],69968,2058017,Free Kick,9598,2972.985039,2H
101754,Simple pass,[{'id': 1801}],3476,2058017,Pass,9598,2978.301867,2H


In [561]:
# Merge match name
data = pd.merge(events_world_cup, teams, left_on='teamId', right_on='wyId')
data

Unnamed: 0,subEventName,tags,playerId,matchId,eventName,teamId,eventSec,matchPeriod,wyId,officialName,type
0,Simple pass,[{'id': 1801}],238055,2057966,Pass,8493,1.435354,1H,8493,Australia,national
1,High pass,[{'id': 1802}],61395,2057966,Pass,8493,3.978396,1H,8493,Australia,national
2,Head pass,[{'id': 1802}],61425,2057966,Pass,8493,19.920463,1H,8493,Australia,national
3,High pass,[{'id': 1801}],62389,2057966,Pass,8493,26.371362,1H,8493,Australia,national
4,Air duel,"[{'id': 701}, {'id': 1802}]",16151,2057966,Duel,8493,27.942092,1H,8493,Australia,national
...,...,...,...,...,...,...,...,...,...,...,...
10838,Touch,[],69396,2058017,Others on the ball,9598,2960.803153,2H,9598,Croatia,national
10839,High pass,[{'id': 1802}],69396,2058017,Pass,9598,2964.715715,2H,9598,Croatia,national
10840,Throw in,[{'id': 1801}],69968,2058017,Free Kick,9598,2972.985039,2H,9598,Croatia,national
10841,Simple pass,[{'id': 1801}],3476,2058017,Pass,9598,2978.301867,2H,9598,Croatia,national


In [562]:
# Merge player name
data = pd.merge(data, players, left_on='playerId', right_on='wyId')
data

Unnamed: 0,subEventName,tags,playerId,matchId,eventName,teamId,eventSec,matchPeriod,wyId_x,officialName,type,wyId_y,shortName
0,Simple pass,[{'id': 1801}],238055,2057966,Pass,8493,1.435354,1H,8493,Australia,national,238055,A. Nabbout
1,Simple pass,[{'id': 1801}],238055,2057966,Pass,8493,318.452504,1H,8493,Australia,national,238055,A. Nabbout
2,Ground loose ball duel,"[{'id': 701}, {'id': 1802}]",238055,2057966,Duel,8493,622.800480,1H,8493,Australia,national,238055,A. Nabbout
3,Head pass,[{'id': 1801}],238055,2057966,Pass,8493,1089.881235,1H,8493,Australia,national,238055,A. Nabbout
4,Air duel,"[{'id': 701}, {'id': 1802}]",238055,2057966,Duel,8493,1310.158067,1H,8493,Australia,national,238055,A. Nabbout
...,...,...,...,...,...,...,...,...,...,...,...,...,...
10838,Simple pass,[{'id': 1801}],69411,2058017,Pass,9598,2888.451480,2H,9598,Croatia,national,69411,A. Kramari\u0107
10839,Ground attacking duel,"[{'id': 502}, {'id': 703}, {'id': 1801}]",135810,2058017,Duel,9598,2398.932619,2H,9598,Croatia,national,135810,M. Pjaca
10840,Simple pass,[{'id': 1802}],135810,2058017,Pass,9598,2400.002448,2H,9598,Croatia,national,135810,M. Pjaca
10841,Simple pass,[{'id': 1801}],135810,2058017,Pass,9598,2567.478676,2H,9598,Croatia,national,135810,M. Pjaca


In [563]:
# Merge tags

def map_tags_to_desc(tag_list):
    descriptions = []
    for tag in tag_list:
        tag_id = tag['id']
        description = tags2name.loc[tags2name['Tag'] == tag_id, 'Description'].values
        if len(description) > 0:
            descriptions.append(description[0])
    return descriptions

data['tags'] = data['tags'].apply(lambda x: map_tags_to_desc(ast.literal_eval(x)))
data

Unnamed: 0,subEventName,tags,playerId,matchId,eventName,teamId,eventSec,matchPeriod,wyId_x,officialName,type,wyId_y,shortName
0,Simple pass,[Accurate],238055,2057966,Pass,8493,1.435354,1H,8493,Australia,national,238055,A. Nabbout
1,Simple pass,[Accurate],238055,2057966,Pass,8493,318.452504,1H,8493,Australia,national,238055,A. Nabbout
2,Ground loose ball duel,"[Lost, Not accurate]",238055,2057966,Duel,8493,622.800480,1H,8493,Australia,national,238055,A. Nabbout
3,Head pass,[Accurate],238055,2057966,Pass,8493,1089.881235,1H,8493,Australia,national,238055,A. Nabbout
4,Air duel,"[Lost, Not accurate]",238055,2057966,Duel,8493,1310.158067,1H,8493,Australia,national,238055,A. Nabbout
...,...,...,...,...,...,...,...,...,...,...,...,...,...
10838,Simple pass,[Accurate],69411,2058017,Pass,9598,2888.451480,2H,9598,Croatia,national,69411,A. Kramari\u0107
10839,Ground attacking duel,"[Free space left, Won, Accurate]",135810,2058017,Duel,9598,2398.932619,2H,9598,Croatia,national,135810,M. Pjaca
10840,Simple pass,[Not accurate],135810,2058017,Pass,9598,2400.002448,2H,9598,Croatia,national,135810,M. Pjaca
10841,Simple pass,[Accurate],135810,2058017,Pass,9598,2567.478676,2H,9598,Croatia,national,135810,M. Pjaca


In [564]:
data = data.drop(columns=['playerId', 'teamId', 'type', 'wyId_y', 'wyId_x'])

# Replace matchId with match label
data = data.merge(matches_world_cup[['wyId', 'label']], left_on='matchId', right_on='wyId', how='left')

data['matchId'] = data['label']
data = data.drop(columns=['label', 'wyId'])

data = data.rename(columns=
                   {'officialName': 'Team',
                    'shortName': 'Player',
                    'matchId' : 'Match',
                    'eventName': 'Event',
                    'tags': 'Tags', 
                    'matchPeriod': 'MatchPeriod',
                    'eventSec': 'EventSec',
                    'subEventName': 'SubEvent'}
                    )
data


Unnamed: 0,SubEvent,Tags,Match,Event,EventSec,MatchPeriod,Team,Player
0,Simple pass,[Accurate],"France - Australia, 2 - 1",Pass,1.435354,1H,Australia,A. Nabbout
1,Simple pass,[Accurate],"France - Australia, 2 - 1",Pass,318.452504,1H,Australia,A. Nabbout
2,Ground loose ball duel,"[Lost, Not accurate]","France - Australia, 2 - 1",Duel,622.800480,1H,Australia,A. Nabbout
3,Head pass,[Accurate],"France - Australia, 2 - 1",Pass,1089.881235,1H,Australia,A. Nabbout
4,Air duel,"[Lost, Not accurate]","France - Australia, 2 - 1",Duel,1310.158067,1H,Australia,A. Nabbout
...,...,...,...,...,...,...,...,...
10838,Simple pass,[Accurate],"France - Croatia, 4 - 2",Pass,2888.451480,2H,Croatia,A. Kramari\u0107
10839,Ground attacking duel,"[Free space left, Won, Accurate]","France - Croatia, 4 - 2",Duel,2398.932619,2H,Croatia,M. Pjaca
10840,Simple pass,[Not accurate],"France - Croatia, 4 - 2",Pass,2400.002448,2H,Croatia,M. Pjaca
10841,Simple pass,[Accurate],"France - Croatia, 4 - 2",Pass,2567.478676,2H,Croatia,M. Pjaca


In [565]:
data['Tags'] = data['Tags'].apply(lambda x: ', '.join(x) if isinstance(x, list) else x)
data

Unnamed: 0,SubEvent,Tags,Match,Event,EventSec,MatchPeriod,Team,Player
0,Simple pass,Accurate,"France - Australia, 2 - 1",Pass,1.435354,1H,Australia,A. Nabbout
1,Simple pass,Accurate,"France - Australia, 2 - 1",Pass,318.452504,1H,Australia,A. Nabbout
2,Ground loose ball duel,"Lost, Not accurate","France - Australia, 2 - 1",Duel,622.800480,1H,Australia,A. Nabbout
3,Head pass,Accurate,"France - Australia, 2 - 1",Pass,1089.881235,1H,Australia,A. Nabbout
4,Air duel,"Lost, Not accurate","France - Australia, 2 - 1",Duel,1310.158067,1H,Australia,A. Nabbout
...,...,...,...,...,...,...,...,...
10838,Simple pass,Accurate,"France - Croatia, 4 - 2",Pass,2888.451480,2H,Croatia,A. Kramari\u0107
10839,Ground attacking duel,"Free space left, Won, Accurate","France - Croatia, 4 - 2",Duel,2398.932619,2H,Croatia,M. Pjaca
10840,Simple pass,Not accurate,"France - Croatia, 4 - 2",Pass,2400.002448,2H,Croatia,M. Pjaca
10841,Simple pass,Accurate,"France - Croatia, 4 - 2",Pass,2567.478676,2H,Croatia,M. Pjaca


In [566]:
grouped  = data.groupby('Match').size().reset_index(name='count')

# Data per match
grouped

Unnamed: 0,Match,count
0,"Denmark - France, 0 - 0",1581
1,"France - Argentina, 4 - 3",1426
2,"France - Australia, 2 - 1",1514
3,"France - Belgium, 1 - 0",1588
4,"France - Croatia, 4 - 2",1459
5,"France - Peru, 1 - 0",1668
6,"Uruguay - France, 0 - 2",1607


In [567]:
# All relevant french players
french_players = data[data['Team'] == 'France']['Player'].unique().tolist()
french_players

['B. Pavard',
 'C. Tolisso',
 'R. Varane',
 'H. Lloris',
 'P. Pogba',
 'S. Umtiti',
 'A. Griezmann',
 'N. Kant\\u00e9',
 'K. Mbapp\\u00e9',
 'L. Hern\\u00e1ndez',
 'O. Demb\\u00e9l\\u00e9',
 'N. Fekir',
 'O. Giroud',
 'B. Matuidi',
 "S. N'Zonzi",
 'D. Sidib\\u00e9',
 'P. Kimpembe',
 'T. Lemar',
 'S. Mandanda',
 'B. Mendy',
 'F. Thauvin']

In [568]:
# Manually map the players to their respective postions
player_position_mapping = {
    'B. Pavard': 'RB',
    'C. Tolisso': 'CM',
    'R. Varane': 'CB',
    'H. Lloris': 'GKP',
    'P. Pogba': 'CM',
    'S. Umtiti': 'CB',
    'A. Griezmann': 'AM',
    'N. Kant\\u00e9': 'CDM',
    'K. Mbapp\\u00e9': 'CF',
    'L. Hern\\u00e1ndez': 'LB',
    'O. Demb\\u00e9l\\u00e9': 'RW',
    'N. Fekir': 'AM',
    'O. Giroud': 'CF',
    'B. Matuidi': 'LM',
    "S. N'Zonzi": 'CM',
    'D. Sidibé': 'RB',
    'P. Kimpembe': 'CB',
    'T. Lemar': 'LW',
    'S. Mandanda': 'GKP',
    'B. Mendy': 'LB',
    'F. Thauvin': 'RW',
    'D. Sidib\\u00e9': 'RB',
}

# Map the Position based on Player names
data['Position'] = data['Player'].map(player_position_mapping)

data[data['Team'] == 'France']

Unnamed: 0,SubEvent,Tags,Match,Event,EventSec,MatchPeriod,Team,Player,Position
700,Throw in,Accurate,"France - Australia, 2 - 1",Free Kick,15.608867,1H,France,B. Pavard,RB
701,Launch,Not accurate,"France - Australia, 2 - 1",Pass,17.214485,1H,France,B. Pavard,RB
702,Simple pass,Accurate,"France - Australia, 2 - 1",Pass,32.884277,1H,France,B. Pavard,RB
703,Simple pass,Accurate,"France - Australia, 2 - 1",Pass,54.866585,1H,France,B. Pavard,RB
704,Simple pass,Accurate,"France - Australia, 2 - 1",Pass,75.441589,1H,France,B. Pavard,RB
...,...,...,...,...,...,...,...,...,...
6020,Simple pass,Accurate,"Denmark - France, 0 - 0",Pass,2848.941221,2H,France,B. Mendy,LB
6021,Cross,"Left foot, Blocked, Not accurate","Denmark - France, 0 - 0",Pass,2853.514985,2H,France,B. Mendy,LB
6022,Throw in,Accurate,"Denmark - France, 0 - 0",Free Kick,2859.488457,2H,France,B. Mendy,LB
6023,Simple pass,Accurate,"France - Argentina, 4 - 3",Pass,2613.230156,2H,France,F. Thauvin,RW


In [569]:
data['SubEvent'] = data['SubEvent'].fillna('')

In [570]:
data["Status"] = "DRAW"

# France vs Australia

# First goal scored by France
mask = (
    data['Match'].str.contains('France - Australia', na=False) & 
    (data['MatchPeriod'] == '2H') & 
    (data['EventSec'] > (13*60 ))
)
data.loc[mask, 'Status'] = 'LEADING'

# Second goal conceded by France
mask = (
    data['Match'].str.contains('France - Australia', na=False) & 
    (data['MatchPeriod'] == '2H') & 
    (data['EventSec'] > (17*60))
)
data.loc[mask, 'Status'] = 'DRAW'

mask = (
    data['Match'].str.contains('France - Australia', na=False) & 
    (data['MatchPeriod'] == '2H') & 
    (data['EventSec'] > (36*60))
)
data.loc[mask, 'Status'] = 'LEADING'
# France vs Denmark is already prepopulated with draw

# France v Peru

# First goal scored by France
mask = (
    data['Match'].str.contains('France - Peru', na=False) & 
    (data['MatchPeriod'] == '1H') & 
    (data['EventSec'] > (35*60 ))
)
data.loc[mask, 'Status'] = 'LEADING'
# France vs Argentina

#Scored
mask = (
    data['Match'].str.contains('France - Argentina', na=False) & 
    (data['MatchPeriod'] == '1H') & 
    (data['EventSec'] > (14*60 ))
)
data.loc[mask, 'Status'] = 'LEADING'

#Conceded
mask = (
    data['Match'].str.contains('France - Argentina', na=False) & 
    (data['MatchPeriod'] == '1H') & 
    (data['EventSec'] > (42*60 ))
)
data.loc[mask, 'Status'] = 'DRAW'

#Conceded
mask = (
    data['Match'].str.contains('France - Argentina', na=False) & 
    (data['MatchPeriod'] == '2H') & 
    (data['EventSec'] > (4*60 ))
)
data.loc[mask, 'Status'] = 'LOSING'

#Scored
mask = (
    data['Match'].str.contains('France - Argentina', na=False) & 
    (data['MatchPeriod'] == '2H') & 
    (data['EventSec'] > (13*60 ))
)
data.loc[mask, 'Status'] = 'DRAW'


#Scored
mask = (
    data['Match'].str.contains('France - Argentina', na=False) & 
    (data['MatchPeriod'] == '2H') & 
    (data['EventSec'] > (20*60 ))
)
data.loc[mask, 'Status'] = 'LEADING'
# Uruguay vs France

#Scored
mask = (
    data['Match'].str.contains('Uruguay - France', na=False) & 
    (data['MatchPeriod'] == '1H') & 
    (data['EventSec'] > (40*60 ))
)

mask = (
    data['Match'].str.contains('Uruguay - France', na=False) & 
    (data['MatchPeriod'] == '2H'))
data.loc[mask, 'Status'] = 'LEADING'
# Belgium vs France

#Scored
mask = (
    data['Match'].str.contains('France - Belgium', na=False) & 
    (data['MatchPeriod'] == '2H') & 
    (data['EventSec'] > (7*60 ))
)
data.loc[mask, 'Status'] = 'LEADING'
# France - Croatia

#Scored
mask = (
    data['Match'].str.contains('France - Croatia', na=False) & 
    (data['MatchPeriod'] == '1H') & 
    (data['EventSec'] > (20*60))
)
data.loc[mask, 'Status'] = 'LEADING'

#Conceeded
mask = (
    data['Match'].str.contains('France - Croatia', na=False) & 
    (data['MatchPeriod'] == '1H') & 
    (data['EventSec'] > (29*60))
)
data.loc[mask, 'Status'] = 'DRAW'

#Scored
mask = (
    data['Match'].str.contains('France - Croatia', na=False) & 
    (data['MatchPeriod'] == '1H') & 
    (data['EventSec'] > (39*60))
)
data.loc[mask, 'Status'] = 'LEADING'

#Scored
mask = (
    data['Match'].str.contains('France - Croatia', na=False) & 
    (data['MatchPeriod'] == '2H') 
)
data.loc[mask, 'Status'] = 'LEADING'

### Events before goals by given minute

In [571]:
def events_before_goal_by_minutes(data, opponent_team, minutes=1):  
    # Determine the times of all goals by France
    goal_times = data[
        data['Match'].str.contains(opponent_team, na=False) &
        (data['Tags'].str.contains('Goal,', na=False)) & 
        (data['Tags'].str.contains('Accurate', na=False))
    ][['EventSec', 'MatchPeriod']]
    
    if not goal_times.empty:
        events_before_goals = []
        
        for _, goal in goal_times.iterrows():
            goal_time = goal['EventSec']
            goal_period = goal['MatchPeriod']
            
            # Filter the events for the specified number of minutes before the goal (including the goal)
            events_before_goal = data[
                data['Match'].str.contains(opponent_team, na=False) &
                (data['MatchPeriod'] == goal_period) &
                (data['EventSec'] >= max(0, goal_time - minutes * 60)) & 
                (data['EventSec'] <= goal_time)
            ]
            
            events_before_goals.append(events_before_goal)
        
        # Concatenate all the events before each goal
        events_before_goal_df = pd.concat(events_before_goals, ignore_index=True)
        
        return events_before_goal_df.sort_values(by=['MatchPeriod', 'EventSec'])
    
    else:
        # Return an empty DataFrame if no goals are found
        return pd.DataFrame(columns=data.columns)

France - Denmark

In [572]:
events_denmark = events_before_goal_by_minutes(data, 'Denmark', minutes=1)
events_denmark

Unnamed: 0,SubEvent,Tags,Match,Event,EventSec,MatchPeriod,Team,Player,Position,Status


France - Argentina

In [573]:
events_argentina = events_before_goal_by_minutes(data, 'Argentina', minutes=1)
events_argentina[events_argentina['Team'] == 'France']

Unnamed: 0,SubEvent,Tags,Match,Event,EventSec,MatchPeriod,Team,Player,Position,Status
23,Penalty,"Goal, Left foot, Position: Goal low center, Ac...","France - Argentina, 4 - 3",Free Kick,761.289657,1H,France,A. Griezmann,AM,DRAW
65,Ground defending duel,"Free space left, Neutral, Accurate","France - Argentina, 4 - 3",Duel,2391.157729,1H,France,N. Kant\u00e9,CDM,LEADING
63,Air duel,"Won, Accurate","France - Argentina, 4 - 3",Duel,2412.972955,1H,France,B. Pavard,RB,LEADING
64,Air duel,"Won, Accurate","France - Argentina, 4 - 3",Duel,2413.911393,1H,France,P. Pogba,CM,LEADING
55,Air duel,"Won, Accurate","France - Argentina, 4 - 3",Duel,151.127278,2H,France,R. Varane,CB,DRAW
56,Clearance,"Interception, Not accurate","France - Argentina, 4 - 3",Others on the ball,151.562032,2H,France,P. Pogba,CM,DRAW
57,Ground defending duel,"Free space right, Lost, Not accurate","France - Argentina, 4 - 3",Duel,153.96022,2H,France,B. Matuidi,LM,DRAW
15,Air duel,"Lost, Not accurate","France - Argentina, 4 - 3",Duel,650.822751,2H,France,L. Hern\u00e1ndez,LB,LOSING
4,Hand pass,Accurate,"France - Argentina, 4 - 3",Pass,655.959008,2H,France,H. Lloris,GKP,LOSING
2,Simple pass,Accurate,"France - Argentina, 4 - 3",Pass,660.094663,2H,France,R. Varane,CB,LOSING


In [574]:
events_argentina_france = events_argentina[events_argentina['Team'] == 'France']

Uruguay - France

In [575]:
events_uruguay = events_before_goal_by_minutes(data, 'Uruguay', minutes=1)
events_uruguay

Unnamed: 0,SubEvent,Tags,Match,Event,EventSec,MatchPeriod,Team,Player,Position,Status
1,Free kick cross,"Assist, High, Accurate","Uruguay - France, 0 - 2",Free Kick,2369.447264,1H,France,A. Griezmann,AM,DRAW
2,Air duel,"Lost, Not accurate","Uruguay - France, 0 - 2",Duel,2370.574308,1H,Uruguay,C. Stuani,,DRAW
0,Shot,"Goal, Head/body, Opportunity, Position: Goal l...","Uruguay - France, 0 - 2",Shot,2370.699238,1H,France,R. Varane,CB,DRAW
14,Simple pass,"Fairplay, Not accurate","Uruguay - France, 0 - 2",Pass,911.317722,2H,Uruguay,L. Su\u00e1rez,,LEADING
6,Simple pass,Accurate,"Uruguay - France, 0 - 2",Pass,917.104962,2H,France,R. Varane,CB,LEADING
7,Launch,Accurate,"Uruguay - France, 0 - 2",Pass,920.558108,2H,France,H. Lloris,GKP,LEADING
18,Air duel,"Won, Accurate","Uruguay - France, 0 - 2",Duel,923.857204,2H,Uruguay,J. Gim\u00e9nez,,LEADING
12,Air duel,"Lost, Not accurate","Uruguay - France, 0 - 2",Duel,923.882755,2H,France,O. Giroud,CF,LEADING
4,Head pass,Not accurate,"Uruguay - France, 0 - 2",Pass,926.139298,2H,France,C. Tolisso,CM,LEADING
22,Acceleration,Accurate,"Uruguay - France, 0 - 2",Others on the ball,927.620581,2H,Uruguay,C. Rodr\u00edguez,,LEADING


In [576]:
events_uruguay_france = events_uruguay[events_uruguay['Team'] == 'France']

Belgium - France

In [577]:
events_belgium = events_before_goal_by_minutes(data, 'Belgium', minutes=1)
events_belgium

Unnamed: 0,SubEvent,Tags,Match,Event,EventSec,MatchPeriod,Team,Player,Position,Status
2,Simple pass,Accurate,"France - Belgium, 1 - 0",Pass,278.729058,2H,France,S. Umtiti,CB,DRAW
12,Simple pass,Accurate,"France - Belgium, 1 - 0",Pass,279.349046,2H,France,B. Matuidi,LM,DRAW
3,Simple pass,Accurate,"France - Belgium, 1 - 0",Pass,281.916241,2H,France,S. Umtiti,CB,DRAW
1,Simple pass,Accurate,"France - Belgium, 1 - 0",Pass,284.663701,2H,France,H. Lloris,GKP,DRAW
0,Simple pass,Accurate,"France - Belgium, 1 - 0",Pass,288.55872,2H,France,R. Varane,CB,DRAW
8,Simple pass,Accurate,"France - Belgium, 1 - 0",Pass,293.119569,2H,France,N. Kant\u00e9,CDM,DRAW
4,Simple pass,Accurate,"France - Belgium, 1 - 0",Pass,296.252103,2H,France,S. Umtiti,CB,DRAW
9,Simple pass,Accurate,"France - Belgium, 1 - 0",Pass,297.398289,2H,France,L. Hern\u00e1ndez,LB,DRAW
13,Simple pass,Accurate,"France - Belgium, 1 - 0",Pass,300.399799,2H,France,B. Matuidi,LM,DRAW
10,Ground attacking duel,"Free space right, Won, Accurate","France - Belgium, 1 - 0",Duel,302.21031,2H,France,O. Giroud,CF,DRAW


In [578]:
events_belgium_france = events_belgium[events_belgium['Team'] == 'France']

France - Croatia

In [579]:
events_croatia = events_before_goal_by_minutes(data, 'Croatia', minutes=1)
events_croatia

Unnamed: 0,SubEvent,Tags,Match,Event,EventSec,MatchPeriod,Team,Player,Position,Status
82,Ground defending duel,"Lost, Not accurate","France - Croatia, 4 - 2",Duel,1616.441157,1H,France,N. Kant\u00e9,CDM,LEADING
89,Ground attacking duel,"Won, Accurate","France - Croatia, 4 - 2",Duel,1616.441157,1H,Croatia,I. Peri\u0161i\u0107,,LEADING
83,Foul,Yellow card,"France - Croatia, 4 - 2",Foul,1618.274416,1H,France,N. Kant\u00e9,CDM,LEADING
88,Free kick cross,"High, Accurate","France - Croatia, 4 - 2",Free Kick,1667.389939,1H,Croatia,L. Modri\u0107,,LEADING
86,Head pass,Accurate,"France - Croatia, 4 - 2",Pass,1671.715305,1H,Croatia,M. Mand\u017euki\u0107,,LEADING
...,...,...,...,...,...,...,...,...,...,...
66,Simple pass,"Interception, Accurate","France - Croatia, 4 - 2",Pass,1381.101172,2H,France,L. Hern\u00e1ndez,LB,LEADING
65,Simple pass,Accurate,"France - Croatia, 4 - 2",Pass,1385.191539,2H,France,S. Umtiti,CB,LEADING
61,Ground attacking duel,"Dangerous ball lost, Free space left, Lost, No...","France - Croatia, 4 - 2",Duel,1388.157139,2H,France,H. Lloris,GKP,LEADING
73,Ground defending duel,"Free space right, Won, Accurate","France - Croatia, 4 - 2",Duel,1388.157139,2H,Croatia,M. Mand\u017euki\u0107,,LEADING


In [580]:
events_croatia_france = events_croatia[events_croatia['Team'] == 'France']

France - Australia

In [581]:
events_australia = events_before_goal_by_minutes(data, 'Australia', minutes=1)
events_australia

Unnamed: 0,SubEvent,Tags,Match,Event,EventSec,MatchPeriod,Team,Player,Position,Status
1,Penalty,"Goal, Left foot, Position: Goal center right, ...","France - Australia, 2 - 1",Free Kick,730.009243,2H,France,A. Griezmann,AM,DRAW
0,Penalty,"Goal, Right foot, Position: Goal low right, Ac...","France - Australia, 2 - 1",Free Kick,976.718211,2H,Australia,M. Jedinak,,LEADING


In [582]:
events_australia_france = events_australia[events_australia['Team'] == 'France']

France - Peru

In [583]:
events_peru = events_before_goal_by_minutes(data, 'Peru', minutes=1)
events_peru

Unnamed: 0,SubEvent,Tags,Match,Event,EventSec,MatchPeriod,Team,Player,Position,Status
5,Ground loose ball duel,"Won, Accurate","France - Peru, 1 - 0",Duel,1965.524366,1H,France,N. Kant\u00e9,CDM,DRAW
25,Ground loose ball duel,"Lost, Not accurate","France - Peru, 1 - 0",Duel,1966.549858,1H,Peru,\u00c9. Flores,,DRAW
6,Simple pass,Accurate,"France - Peru, 1 - 0",Pass,1967.647707,1H,France,N. Kant\u00e9,CDM,DRAW
14,Simple pass,Accurate,"France - Peru, 1 - 0",Pass,1969.370714,1H,France,B. Matuidi,LM,DRAW
11,Simple pass,Accurate,"France - Peru, 1 - 0",Pass,1971.975232,1H,France,L. Hern\u00e1ndez,LB,DRAW
19,Ground loose ball duel,"Sliding tackle, Lost, Not accurate","France - Peru, 1 - 0",Duel,1972.539213,1H,Peru,L. Adv\u00edncula,,DRAW
0,High pass,"Key pass, Through, Accurate","France - Peru, 1 - 0",Pass,1977.211491,1H,France,P. Pogba,CM,DRAW
9,Shot,"Right foot, Opportunity, Position: Goal center...","France - Peru, 1 - 0",Shot,1978.651572,1H,France,K. Mbapp\u00e9,CF,DRAW
26,Reflexes,"Position: Goal center, Accurate","France - Peru, 1 - 0",Save attempt,1980.363205,1H,Peru,P. Gallese,,DRAW
16,Simple pass,Accurate,"France - Peru, 1 - 0",Pass,2005.047435,1H,Peru,A. Rodr\u00edguez,,DRAW


In [584]:
events_peru_france = events_peru[events_peru['Team'] == 'France']

In [585]:
all_events_before_goals = pd.concat([events_argentina, events_uruguay, events_croatia, events_australia, events_belgium, events_peru], ignore_index=True)
all_events_before_goals

Unnamed: 0,SubEvent,Tags,Match,Event,EventSec,MatchPeriod,Team,Player,Position,Status
0,Penalty,"Goal, Left foot, Position: Goal low center, Ac...","France - Argentina, 4 - 3",Free Kick,761.289657,1H,France,A. Griezmann,AM,DRAW
1,Simple pass,Accurate,"France - Argentina, 4 - 3",Pass,2385.262866,1H,Argentina,G. Mercado,,LEADING
2,Ground attacking duel,"Free space right, Neutral, Accurate","France - Argentina, 4 - 3",Duel,2389.505394,1H,Argentina,L. Messi,,LEADING
3,Ground defending duel,"Free space left, Neutral, Accurate","France - Argentina, 4 - 3",Duel,2391.157729,1H,France,N. Kant\u00e9,CDM,LEADING
4,Corner,"High, Accurate","France - Argentina, 4 - 3",Free Kick,2411.425619,1H,Argentina,L. Messi,,LEADING
...,...,...,...,...,...,...,...,...,...,...
261,Ground loose ball duel,"Sliding tackle, Lost, Not accurate","France - Peru, 1 - 0",Duel,2019.978058,1H,Peru,P. Guerrero,,DRAW
262,Smart pass,"Key pass, Through, Accurate","France - Peru, 1 - 0",Pass,2022.354070,1H,France,P. Pogba,CM,DRAW
263,Shot,"Left foot, Blocked, Opportunity, Not accurate","France - Peru, 1 - 0",Shot,2023.580214,1H,France,O. Giroud,CF,DRAW
264,Touch,Interception,"France - Peru, 1 - 0",Others on the ball,2024.690943,1H,Peru,A. Rodr\u00edguez,,DRAW


In [586]:
filtered_events = all_events_before_goals[
    ~(
        (all_events_before_goals['Event'] == 'Duel') & 
        (all_events_before_goals['Tags'].str.contains("Lost"))
    )
]

filtered_events

Unnamed: 0,SubEvent,Tags,Match,Event,EventSec,MatchPeriod,Team,Player,Position,Status
0,Penalty,"Goal, Left foot, Position: Goal low center, Ac...","France - Argentina, 4 - 3",Free Kick,761.289657,1H,France,A. Griezmann,AM,DRAW
1,Simple pass,Accurate,"France - Argentina, 4 - 3",Pass,2385.262866,1H,Argentina,G. Mercado,,LEADING
2,Ground attacking duel,"Free space right, Neutral, Accurate","France - Argentina, 4 - 3",Duel,2389.505394,1H,Argentina,L. Messi,,LEADING
3,Ground defending duel,"Free space left, Neutral, Accurate","France - Argentina, 4 - 3",Duel,2391.157729,1H,France,N. Kant\u00e9,CDM,LEADING
4,Corner,"High, Accurate","France - Argentina, 4 - 3",Free Kick,2411.425619,1H,Argentina,L. Messi,,LEADING
...,...,...,...,...,...,...,...,...,...,...
260,Ground loose ball duel,"Won, Accurate","France - Peru, 1 - 0",Duel,2019.807640,1H,France,P. Pogba,CM,DRAW
262,Smart pass,"Key pass, Through, Accurate","France - Peru, 1 - 0",Pass,2022.354070,1H,France,P. Pogba,CM,DRAW
263,Shot,"Left foot, Blocked, Opportunity, Not accurate","France - Peru, 1 - 0",Shot,2023.580214,1H,France,O. Giroud,CF,DRAW
264,Touch,Interception,"France - Peru, 1 - 0",Others on the ball,2024.690943,1H,Peru,A. Rodr\u00edguez,,DRAW


In [587]:
all_events_before_goals['Match'].unique()

array(['France - Argentina, 4 - 3', 'Uruguay - France, 0 - 2',
       'France - Croatia, 4 - 2', 'France - Australia, 2 - 1',
       'France - Belgium, 1 - 0', 'France - Peru, 1 - 0'], dtype=object)

##We have shot, freekick shot, cross , freekick cross, corner in subevent which are more exciting for now.

In [588]:
def sort_by_match(data):
    return data.sort_values(by=['Match','MatchPeriod', 'EventSec'])

all_events_before_goals_sorted = sort_by_match(all_events_before_goals)
filtered_events_sorted = sort_by_match(filtered_events)


We have finished our data preparation. Time to create an structured XES file

In [589]:
from datetime import datetime, timedelta

def create_non_filter_xes_file(data, filename="output.xes", checkOn :str = "org:resource"):

    def has_allowed_subevent(trace):
        return True

    def safe_str(value):
        """Safely convert value to string, handling NaN"""
        if pd.isna(value):
            return ""
        return str(value)

    base_datetime = datetime(2020, 1, 1) # Arbitrary date for the timestamp

    # Open file to write
    with open(filename, 'w') as f:
        # Write XML declaration and log opening tag
        f.write('<?xml version="1.0" encoding="UTF-8" ?>\n')
        f.write('<log xes.version="1.0" xes.features="nested-attributes" openxes.version="1.0RC7" xmlns="http://www.xes-standard.org/">\n')
        f.write(f'<classifier name="Activity_Resource" keys="{checkOn}"/>\n')
        f.write('<classifier name="Default" keys="concept:name"/>\n')
        
        # Initialize variables for tracking traces
        current_trace = []
        in_possession = False
        last_opponent_action = None  # To keep track of the last action by the opponent
        
        # Iterate over each row in the data
        for idx, row in data.iterrows():
            # Check if the team is "France"
            if row['Team'] == 'France':
                # Start a new trace if we're not in possession
                if not in_possession:
                    # If there was a previous trace, write it to the file
                    if current_trace:
                        # Only write trace if it contains an allowed subevent
                        if has_allowed_subevent(current_trace):
                            end_reason = current_trace[-1]['Event']
                            start_reason = last_opponent_action  # Set start reason to last opponent action
                            
                            # Write trace to file
                            f.write('  <trace>\n')
                            f.write(f'    <string key="custom:startReason" value="{safe_str(start_reason)}" />\n')
                            f.write(f'    <string key="custom:endReason" value="{safe_str(end_reason)}" />\n')
                            
                            # Write each event in the trace
                            for event in current_trace:
                                event_datetime = base_datetime + timedelta(seconds=event["EventSec"])
                                event_timestamp = event_datetime.isoformat()  # ISO format for timestamp

                                f.write('    <event>\n')
                                f.write(f'      <string key="org:resource" value="{safe_str(event["Position"])}" />\n')
                                f.write(f'      <string key="concept:name" value="{safe_str(event["Player"])}" />\n')
                                f.write(f'      <string key="custom:action" value="{safe_str(event["Event"])}" />\n')
                                f.write(f'      <date key="time:timestamp" value="{event_timestamp}" />\n')
                                f.write(f'      <string key="custom:subevent" value="{safe_str(event["SubEvent"])}" />\n')
                                f.write(f'      <string key="custom:matchperiod" value="{safe_str(event["MatchPeriod"])}" />\n')
                                f.write(f'      <string key="custom:tags" value="{safe_str(event["Tags"])}" />\n')
                                f.write(f'      <string key="custom:match" value="{safe_str(event["Match"])}" />\n')
                                f.write(f'      <string key="custom:status" value="{event["Status"]}" />\n')

                                f.write('    </event>\n')
                            
                            # Close trace
                            f.write('  </trace>\n')
                    
                    # Reset trace variables
                    current_trace = []
                    in_possession = True
                
                # Add the current event to the trace
                current_trace.append({
                    'Player': row['Player'],
                    'Position': row['Position'],
                    'Event': row['Event'],
                    'EventSec': row['EventSec'],
                    'SubEvent': row['SubEvent'],
                    'MatchPeriod': row['MatchPeriod'],
                    'Tags': row['Tags'],
                    "Match": row['Match'],
                    "Status": row['Status']
                })
                
            else:
                # If team is not France, complete the current trace
                if in_possession:
                    if current_trace:
                        # Only write trace if it contains an allowed subevent
                        if has_allowed_subevent(current_trace):
                            end_reason = current_trace[-1]['Event']
                            
                            # Write trace to file
                            f.write('  <trace>\n')
                            f.write(f'    <string key="custom:startReason" value="{safe_str(last_opponent_action)}" />\n')
                            f.write(f'    <string key="custom:endReason" value="{safe_str(end_reason)}" />\n')
                            
                            # Write each event in the trace
                            for event in current_trace:
                                event_datetime = base_datetime + timedelta(seconds=event["EventSec"])
                                event_timestamp = event_datetime.isoformat()  # ISO format for timestamp

                                f.write('    <event>\n')
                                f.write(f'      <string key="concept:name" value="{safe_str(event["Player"])}" />\n')
                                f.write(f'      <string key="org:resource" value="{safe_str(event["Position"])}" />\n')
                                f.write(f'      <string key="custom:action" value="{safe_str(event["Event"])}" />\n')
                                f.write(f'      <date key="time:timeStamp" value="{event_timestamp}" />\n')
                                f.write(f'      <string key="custom:subevent" value="{safe_str(event["SubEvent"])}" />\n')
                                f.write(f'      <string key="custom:matchperiod" value="{safe_str(event["MatchPeriod"])}" />\n')
                                f.write(f'      <string key="custom:tags" value="{safe_str(event["Tags"])}" />\n')
                                f.write(f'      <string key="custom:match" value="{safe_str(event["Match"])}" />\n')
                                f.write(f'      <string key="custom:status" value="{event["Status"]}" />\n')

                                f.write('    </event>\n')
                            
                            # Close trace
                            f.write('  </trace>\n')
                    
                    # Reset trace variables
                    current_trace = []
                    in_possession = False
                
                # Keep track of the last action of the opponent
                last_opponent_action = row['Event']  # Update the last opponent action
        
        # Close the remaining trace if still in possession
        if current_trace:
            # Only write trace if it contains an allowed subevent
            if has_allowed_subevent(current_trace):
                end_reason = current_trace[-1]['Event']
                start_reason = last_opponent_action  # Use last opponent action as start reason
                
                f.write('  <trace>\n')
                f.write(f'    <string key="custom:startReason" value="{safe_str(start_reason)}" />\n')
                f.write(f'    <string key="custom:endReason" value="{safe_str(end_reason)}" />\n')
                
                # Write each event in the trace
                for event in current_trace:
                    event_datetime = base_datetime + timedelta(seconds=event["EventSec"])
                    event_timestamp = event_datetime.isoformat() 
                    f.write('    <event>\n')
                    f.write(f'      <string key="org:resource" value="{safe_str(event["Position"])}" />\n')
                    f.write(f'      <string key="concept:name" value="{safe_str(event["Player"])}" />\n')
                    f.write(f'      <string key="custom:action" value="{safe_str(event["Event"])}" />\n')
                    f.write(f'      <date key="time:timeStamp" value="{event_timestamp}" />\n')
                    f.write(f'      <string key="custom:subevent" value="{safe_str(event["SubEvent"])}" />\n')
                    f.write(f'      <string key="custom:matchperiod" value="{safe_str(event["MatchPeriod"])}" />\n')
                    f.write(f'      <string key="custom:tags" value="{safe_str(event["Tags"])}" />\n')
                    f.write(f'      <string key="custom:match" value="{safe_str(event["Match"])}" />\n')
                    f.write(f'      <string key="custom:status" value="{event["Status"]}" />\n')

                    f.write('    </event>\n')
                
                f.write('  </trace>\n')
        
        # Close log
        f.write('</log>\n')

In [590]:
from datetime import datetime, timedelta

def create_attack_xes_data(data, filename="output.xes", checkOn :str = "org:resource"):
    # Define the allowed subevents
    ALLOWED_SUBEVENTS = {
        "Shot",
        "Free kick shot",
        "Cross",
        "Free kick cross",
        "Corner"
    }

    def has_allowed_subevent(trace):
        """Check if any event in the trace has an allowed subevent"""
        for event in trace:
            subevent = event['SubEvent']
            # Check if subevent is NaN or not a string
            if pd.isna(subevent) or not isinstance(subevent, str):
                continue
                
            # Split subevents by space and check each one
            subevents = subevent.split(' ')
            if any(subevent in ALLOWED_SUBEVENTS for subevent in subevents):
                return True
        return False

    def safe_str(value):
        """Safely convert value to string, handling NaN"""
        if pd.isna(value):
            return ""
        return str(value)

    base_datetime = datetime(2020, 1, 1) # Arbitrary date for the timestamp

    # Open file to write
    with open(filename, 'w') as f:
        # Write XML declaration and log opening tag
        f.write('<?xml version="1.0" encoding="UTF-8" ?>\n')
        f.write('<log xes.version="1.0" xes.features="nested-attributes" openxes.version="1.0RC7" xmlns="http://www.xes-standard.org/">\n')
        f.write(f'<classifier name="Activity_Resource" keys="{checkOn}"/>\n')
        f.write('<classifier name="Default" keys="concept:name"/>\n')
        
        # Initialize variables for tracking traces
        current_trace = []
        in_possession = False
        last_opponent_action = None  # To keep track of the last action by the opponent
        
        # Iterate over each row in the data
        for idx, row in data.iterrows():
            # Check if the team is "France"
            if row['Team'] == 'France':
                # Start a new trace if we're not in possession
                if not in_possession:
                    # If there was a previous trace, write it to the file
                    if current_trace:
                        # Only write trace if it contains an allowed subevent
                        if has_allowed_subevent(current_trace):
                            end_reason = current_trace[-1]['Event']
                            start_reason = last_opponent_action  # Set start reason to last opponent action
                            
                            # Write trace to file
                            f.write('  <trace>\n')
                            f.write(f'    <string key="custom:startReason" value="{safe_str(start_reason)}" />\n')
                            f.write(f'    <string key="custom:endReason" value="{safe_str(end_reason)}" />\n')
                            
                            # Write each event in the trace
                            for event in current_trace:
                                event_datetime = base_datetime + timedelta(seconds=event["EventSec"])
                                event_timestamp = event_datetime.isoformat()  # ISO format for timestamp

                                f.write('    <event>\n')
                                f.write(f'      <string key="org:resource" value="{safe_str(event["Position"])}" />\n')
                                f.write(f'      <string key="concept:name" value="{safe_str(event["Player"])}" />\n')
                                f.write(f'      <string key="custom:action" value="{safe_str(event["Event"])}" />\n')
                                f.write(f'      <date key="time:timestamp" value="{event_timestamp}" />\n')
                                f.write(f'      <string key="custom:subevent" value="{safe_str(event["SubEvent"])}" />\n')
                                f.write(f'      <string key="custom:matchperiod" value="{safe_str(event["MatchPeriod"])}" />\n')
                                f.write(f'      <string key="custom:tags" value="{safe_str(event["Tags"])}" />\n')
                                f.write(f'      <string key="custom:match" value="{safe_str(event["Match"])}" />\n')
                                f.write(f'      <string key="custom:status" value="{event["Status"]}" />\n')

                                f.write('    </event>\n')
                            
                            # Close trace
                            f.write('  </trace>\n')
                    
                    # Reset trace variables
                    current_trace = []
                    in_possession = True
                
                # Add the current event to the trace
                current_trace.append({
                    'Player': row['Player'],
                    'Position': row['Position'],
                    'Event': row['Event'],
                    'EventSec': row['EventSec'],
                    'SubEvent': row['SubEvent'],
                    'MatchPeriod': row['MatchPeriod'],
                    'Tags': row['Tags'],
                    "Match": row['Match'],
                    "Status": row['Status']
                })
                
            else:
                # If team is not France, complete the current trace
                if in_possession:
                    if current_trace:
                        # Only write trace if it contains an allowed subevent
                        if has_allowed_subevent(current_trace):
                            end_reason = current_trace[-1]['Event']
                            
                            # Write trace to file
                            f.write('  <trace>\n')
                            f.write(f'    <string key="custom:startReason" value="{safe_str(last_opponent_action)}" />\n')
                            f.write(f'    <string key="custom:endReason" value="{safe_str(end_reason)}" />\n')
                            
                            # Write each event in the trace
                            for event in current_trace:
                                event_datetime = base_datetime + timedelta(seconds=event["EventSec"])
                                event_timestamp = event_datetime.isoformat()  # ISO format for timestamp

                                f.write('    <event>\n')
                                f.write(f'      <string key="concept:name" value="{safe_str(event["Player"])}" />\n')
                                f.write(f'      <string key="org:resource" value="{safe_str(event["Position"])}" />\n')
                                f.write(f'      <string key="custom:action" value="{safe_str(event["Event"])}" />\n')
                                f.write(f'      <date key="time:timeStamp" value="{event_timestamp}" />\n')
                                f.write(f'      <string key="custom:subevent" value="{safe_str(event["SubEvent"])}" />\n')
                                f.write(f'      <string key="custom:matchperiod" value="{safe_str(event["MatchPeriod"])}" />\n')
                                f.write(f'      <string key="custom:tags" value="{safe_str(event["Tags"])}" />\n')
                                f.write(f'      <string key="custom:match" value="{safe_str(event["Match"])}" />\n')
                                f.write(f'      <string key="custom:status" value="{event["Status"]}" />\n')

                                f.write('    </event>\n')
                            
                            # Close trace
                            f.write('  </trace>\n')
                    
                    # Reset trace variables
                    current_trace = []
                    in_possession = False
                
                # Keep track of the last action of the opponent
                last_opponent_action = row['Event']  # Update the last opponent action
        
        # Close the remaining trace if still in possession
        if current_trace:
            # Only write trace if it contains an allowed subevent
            if has_allowed_subevent(current_trace):
                end_reason = current_trace[-1]['Event']
                start_reason = last_opponent_action  # Use last opponent action as start reason
                
                f.write('  <trace>\n')
                f.write(f'    <string key="custom:startReason" value="{safe_str(start_reason)}" />\n')
                f.write(f'    <string key="custom:endReason" value="{safe_str(end_reason)}" />\n')
                
                # Write each event in the trace
                for event in current_trace:
                    event_datetime = base_datetime + timedelta(seconds=event["EventSec"])
                    event_timestamp = event_datetime.isoformat() 
                    f.write('    <event>\n')
                    f.write(f'      <string key="org:resource" value="{safe_str(event["Position"])}" />\n')
                    f.write(f'      <string key="concept:name" value="{safe_str(event["Player"])}" />\n')
                    f.write(f'      <string key="custom:action" value="{safe_str(event["Event"])}" />\n')
                    f.write(f'      <date key="time:timeStamp" value="{event_timestamp}" />\n')
                    f.write(f'      <string key="custom:subevent" value="{safe_str(event["SubEvent"])}" />\n')
                    f.write(f'      <string key="custom:matchperiod" value="{safe_str(event["MatchPeriod"])}" />\n')
                    f.write(f'      <string key="custom:tags" value="{safe_str(event["Tags"])}" />\n')
                    f.write(f'      <string key="custom:match" value="{safe_str(event["Match"])}" />\n')
                    f.write(f'      <string key="custom:status" value="{event["Status"]}" />\n')

                    f.write('    </event>\n')
                
                f.write('  </trace>\n')
        
        # Close log
        f.write('</log>\n')

In [591]:
# vs_denmark = data[data['Match'].str.contains('Denmark - France', na=False)]
# create_attack_xes_data(sort_by_match(vs_denmark), filename="vs_denmark_attack.xes")
# create_non_filter_xes_file(sort_by_match(vs_denmark), filename="vs_denmark_all.xes")

create_non_filter_xes_file(all_events_before_goals_sorted, filename="all.xes")
create_attack_xes_data(all_events_before_goals_sorted, filename="all_attack.xes")

create_non_filter_xes_file(filtered_events_sorted, filename="filtered_all.xes")
create_attack_xes_data(filtered_events_sorted, filename="filtered_all_attack.xes")

create_non_filter_xes_file(sort_by_match(events_belgium), filename="belgium_non_filtered.xes")
# create_attack_xes_data(sort_by_match(events_belgium), filename="belgium_attack.xes")

create_non_filter_xes_file(sort_by_match(events_argentina), filename="argentina_non_filtered.xes")
create_non_filter_xes_file(sort_by_match(events_uruguay), filename="uruguay_non_filtered.xes")
create_non_filter_xes_file(sort_by_match(events_croatia), filename="croatia_non_filtered.xes")
create_non_filter_xes_file(sort_by_match(events_australia), filename="australia_non_filtered.xes")
create_non_filter_xes_file(sort_by_match(events_peru), filename="peru_non_filtered.xes")





