In [1]:
import json
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.ensemble import RandomForestRegressor
from sklearn.linear_model import Lasso

#Reading Data

In [2]:
from google.colab import drive

drive.mount('/content/drive')

Mounted at /content/drive


##Events

In [3]:
list=[
    '/content/drive/MyDrive/Predicting Matches Using In-game Events/events_England.json',
    '/content/drive/MyDrive/Predicting Matches Using In-game Events/events_France.json',
    '/content/drive/MyDrive/Predicting Matches Using In-game Events/events_Spain.json',
    '/content/drive/MyDrive/Predicting Matches Using In-game Events/events_Italy.json',
    '/content/drive/MyDrive/Predicting Matches Using In-game Events/events_Germany.json',
    '/content/drive/MyDrive/Predicting Matches Using In-game Events/events_World_Cup.json',
    '/content/drive/MyDrive/Predicting Matches Using In-game Events/events_European_Championship.json'
]

Events=pd.DataFrame([])

for x in list:

  # Load JSON data
  with open(x) as json_file:
      data = json.load(json_file)

  # Extracting headers
  headers = [
      'eventId', 'eventName', 'subEventId', 'subEventName', 'tags',
      'eventSec', 'id', 'matchId', 'matchPeriod', 'playerId', 'position_x', 'position_y', 'teamId'
  ]

  # Create a list to store event dictionaries
  events_list = []

  # Iterate through each event
  for event in data:
      # Extract x and y coordinates from the 'positions' field
      if event['positions']:
          position_x, position_y = event['positions'][0]['x'], event['positions'][0]['y']
      else:
          position_x, position_y = None, None

      # Extract tag names from the 'tags' field
      tag_names = [tag['id'] for tag in event['tags']]

      # Create a dictionary for the event
      event_dict = {
          'eventId': event['eventId'],
          'eventName': event['eventName'],
          'subEventId': event['subEventId'],
          'subEventName': event['subEventName'],
          'tags': tag_names,
          'eventSec': event['eventSec'],
          'id': event['id'],
          'matchId': event['matchId'],
          'matchPeriod': event['matchPeriod'],
          'playerId': event['playerId'],
          'position_x': position_x,
          'position_y': position_y,
          'teamId': event['teamId'],
      }

      # Append the event dictionary to the list
      events_list.append(event_dict)

  # Create a DataFrame from the list of event dictionaries
  df = pd.DataFrame(events_list)
  Events=pd.concat([Events,df],ignore_index=True,axis=0)

# Display the DataFrame
Events

Unnamed: 0,eventId,eventName,subEventId,subEventName,tags,eventSec,id,matchId,matchPeriod,playerId,position_x,position_y,teamId
0,8,Pass,85,Simple pass,[1801],2.758649,177959171,2499719,1H,25413,49,49,1609
1,8,Pass,83,High pass,[1801],4.946850,177959172,2499719,1H,370224,31,78,1609
2,8,Pass,82,Head pass,[1801],6.542188,177959173,2499719,1H,3319,51,75,1609
3,8,Pass,82,Head pass,[1801],8.143395,177959174,2499719,1H,120339,35,71,1609
4,8,Pass,85,Simple pass,[1801],10.302366,177959175,2499719,1H,167145,41,95,1609
...,...,...,...,...,...,...,...,...,...,...,...,...,...
3251289,6,Offside,,,[],984.816517,90589035,1694440,E2,7936,89,28,4418
3251290,2,Foul,26,Time lost foul,[1702],1026.418188,90589357,1694440,E2,70134,10,61,9905
3251291,3,Free Kick,31,Free Kick,[1801],1031.553667,90589358,1694440,E2,70134,9,63,9905
3251292,1,Duel,10,Air duel,"[701, 1802]",1035.342605,90589359,1694440,E2,70410,67,78,9905


##Matches

In [4]:
list=[
    '/content/drive/MyDrive/Predicting Matches Using In-game Events/matches_England.json',
    '/content/drive/MyDrive/Predicting Matches Using In-game Events/matches_France.json',
    '/content/drive/MyDrive/Predicting Matches Using In-game Events/matches_Spain.json',
    '/content/drive/MyDrive/Predicting Matches Using In-game Events/matches_Italy.json',
    '/content/drive/MyDrive/Predicting Matches Using In-game Events/matches_Germany.json',
    '/content/drive/MyDrive/Predicting Matches Using In-game Events/matches_World_Cup.json',
    '/content/drive/MyDrive/Predicting Matches Using In-game Events/matches_European_Championship.json'
]

Matches=pd.DataFrame([])

for x in list:

  with open(x, 'r') as file:
      data = json.load(file)


  matches_data = []

  for match in data:
      for team_side, team in match['teamsData'].items():
          match_info = {
              'competitionId': match['competitionId'],
              'date': match['date'],
              'dateutc': match['dateutc'],
              'duration': match['duration'],
              'gameweek': match['gameweek'],
              'label': match['label'],
              'roundID': match.get('roundID', None),
              'seasonId': match['seasonId'],
              'status': match['status'],
              'venue': match['venue'],
              'winner': match['winner'],
              'wyId': match['wyId'],
              'team_side': team_side,  # 'home' or 'away'
              'hasFormation': team['hasFormation'],
              'score': team['score'],
              'scoreET': team['scoreET'],
              'scoreHT': team['scoreHT'],
              'scoreP': team['scoreP'],
              'teamId': team['teamId'],
              'coachId': team['coachId'],
          }

          # Add lineup information if available
          if 'lineup' in team:
              # Extract goals, own goals, and cards information from 'lineup'
              lineup_info = team['lineup']
              lineup_prefix = f'{team_side}_lineup'
              match_info.update({f'{lineup_prefix}_{key}': value for key, value in lineup_info.items() if key in ('goals', 'own_goals', 'cards')})

          # Add bench information if available
          if 'bench' in team:
              # Extract goals, own goals, and cards information from 'bench'
              bench_info = team['bench']
              bench_prefix = f'{team_side}_bench'
              match_info.update({f'{bench_prefix}_{key}': value for key, value in bench_info.items() if key in ('goals', 'own_goals', 'cards')})

          # Add substitutions information if available
          if 'substitutions' in team:
              # Extract goals, own goals, and cards information from 'substitutions'
              substitutions_info = team['substitutions']
              subs_prefix = f'{team_side}_substitutions'
              match_info.update({f'{subs_prefix}_{key}': value for key, value in substitutions_info.items() if key in ('goals', 'own_goals', 'cards')})

          matches_data.append(match_info)

  # Create a DataFrame
  df = pd.DataFrame(matches_data)
  Matches=pd.concat([Matches,df],ignore_index=True,axis=0)

# Display the DataFrame
Matches = Matches.rename(columns={'wyId':'matchId'})
Matches

Unnamed: 0,competitionId,date,dateutc,duration,gameweek,label,roundID,seasonId,status,venue,winner,matchId,team_side,hasFormation,score,scoreET,scoreHT,scoreP,teamId,coachId
0,364,"May 13, 2018 at 4:00:00 PM GMT+2",2018-05-13 14:00:00,Regular,38,"Burnley - AFC Bournemouth, 1 - 2",,181150,Played,Turf Moor,1659,2500089,1646,1,1,0,1,0,1646,8880
1,364,"May 13, 2018 at 4:00:00 PM GMT+2",2018-05-13 14:00:00,Regular,38,"Burnley - AFC Bournemouth, 1 - 2",,181150,Played,Turf Moor,1659,2500089,1659,1,2,0,0,0,1659,8934
2,364,"May 13, 2018 at 4:00:00 PM GMT+2",2018-05-13 14:00:00,Regular,38,"Crystal Palace - West Bromwich Albion, 2 - 0",,181150,Played,Selhurst Park,1628,2500090,1628,1,2,0,0,0,1628,8357
3,364,"May 13, 2018 at 4:00:00 PM GMT+2",2018-05-13 14:00:00,Regular,38,"Crystal Palace - West Bromwich Albion, 2 - 0",,181150,Played,Selhurst Park,1628,2500090,1627,1,0,0,0,0,1627,0
4,364,"May 13, 2018 at 4:00:00 PM GMT+2",2018-05-13 14:00:00,Regular,38,"Huddersfield Town - Arsenal, 0 - 1",,181150,Played,The John Smith's Stadium,1609,2500091,1609,1,1,0,1,0,1609,7845
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3877,102,"June 11, 2016 at 6:00:00 PM GMT+2",2016-06-11 16:00:00,Regular,1,"Wales - Slovakia, 2 - 1",,9291,Played,Stade Matmut-Atlantique,10682,1694396,14496,1,1,0,0,0,14496,268998
3878,102,"June 11, 2016 at 3:00:00 PM GMT+2",2016-06-11 13:00:00,Regular,1,"Albania - Switzerland, 0 - 1",,9291,Played,Stade Bollaert-Delelis,6697,1694391,6697,1,1,0,1,0,6697,210701
3879,102,"June 11, 2016 at 3:00:00 PM GMT+2",2016-06-11 13:00:00,Regular,1,"Albania - Switzerland, 0 - 1",,9291,Played,Stade Bollaert-Delelis,6697,1694391,8731,1,0,0,0,0,8731,135480
3880,102,"June 10, 2016 at 9:00:00 PM GMT+2",2016-06-10 19:00:00,Regular,1,"France - Romania, 2 - 1",,9291,Played,Stade de France,4418,1694390,11944,1,1,0,0,0,11944,360105


##Tags

In [5]:
tags_df = pd.read_csv('/content/drive/MyDrive/Predicting Matches Using In-game Events/tags2name.csv')
tags_df

Unnamed: 0,Tag,Label,Description
0,101,Goal,Goal
1,102,own_goal,Own goal
2,301,assist,Assist
3,302,keyPass,Key pass
4,1901,counter_attack,Counter attack
5,401,Left,Left foot
6,402,Right,Right foot
7,403,head/body,Head/body
8,1101,direct,Direct
9,1102,indirect,Indirect


#Dataset Design

##Aggregating Match Statistics on the level of Sub Events

In [6]:
Events1 = pd.get_dummies(Events,columns = ['subEventName'],prefix = None)
Events1_1H = pd.get_dummies(Events[Events['matchPeriod']=="1H"],columns = ['subEventName'],prefix = None)#first half stats
Events2 = Events1.groupby(by = ['matchId','teamId']).sum()
Events2_1H = Events1_1H.groupby(by = ['matchId','teamId']).sum()
Events2.drop(['eventId','eventSec','id','playerId','position_x','position_y','subEventName_'],inplace=True,axis=1)
Events2_1H.drop(['eventId','eventSec','id','playerId','position_x','position_y','subEventName_'],inplace=True,axis=1)
Events2.reset_index(inplace=True)
Events2_1H.reset_index(inplace=True)
Events2

  Events2 = Events1.groupby(by = ['matchId','teamId']).sum()
  Events2_1H = Events1_1H.groupby(by = ['matchId','teamId']).sum()


Unnamed: 0,matchId,teamId,subEventName_Acceleration,subEventName_Air duel,subEventName_Ball out of the field,subEventName_Clearance,subEventName_Corner,subEventName_Cross,subEventName_Foul,subEventName_Free Kick,...,subEventName_Save attempt,subEventName_Shot,subEventName_Simple pass,subEventName_Simulation,subEventName_Smart pass,subEventName_Throw in,subEventName_Time lost foul,subEventName_Touch,subEventName_Violent Foul,subEventName_Whistle
0,1694390,4418,7,52,1,23,5,18,6,11,...,0,14,396,0,13,33,0,45,0,0
1,1694390,11944,5,54,0,21,4,15,11,6,...,4,8,159,0,5,36,0,42,1,0
2,1694391,6697,8,30,0,17,4,12,16,5,...,1,14,431,0,32,10,0,25,0,0
3,1694391,8731,4,32,0,9,4,21,8,10,...,3,7,279,0,9,11,0,26,1,0
4,1694392,6697,3,47,0,19,7,26,11,13,...,2,14,399,0,13,22,0,33,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3877,2576336,3315,13,38,80,20,5,12,13,12,...,4,14,248,0,11,30,0,45,0,1
3878,2576337,3164,5,29,68,7,3,14,7,10,...,1,9,362,0,8,24,0,70,0,1
3879,2576337,3204,9,29,3,18,5,8,10,7,...,1,14,287,0,2,22,0,45,0,1
3880,2576338,3185,11,30,0,16,3,12,11,10,...,1,4,271,0,8,14,0,57,0,0


In [7]:
Events2_1H

Unnamed: 0,matchId,teamId,subEventName_Acceleration,subEventName_Air duel,subEventName_Ball out of the field,subEventName_Clearance,subEventName_Corner,subEventName_Cross,subEventName_Foul,subEventName_Free Kick,...,subEventName_Save attempt,subEventName_Shot,subEventName_Simple pass,subEventName_Simulation,subEventName_Smart pass,subEventName_Throw in,subEventName_Time lost foul,subEventName_Touch,subEventName_Violent Foul,subEventName_Whistle
0,1694390,4418,2,22,1,10,4,10,2,5,...,0,8,214,0,5,14,0,23,0,0
1,1694390,11944,1,23,0,9,2,5,4,3,...,1,3,93,0,1,17,0,24,1,0
2,1694391,6697,1,16,0,12,3,7,7,3,...,1,7,197,0,16,5,0,12,0,0
3,1694391,8731,1,17,0,8,3,11,3,5,...,1,4,119,0,4,5,0,12,1,0
4,1694392,6697,0,21,0,12,5,15,5,6,...,0,7,206,0,8,12,0,14,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3877,2576336,3315,5,20,42,11,1,2,7,8,...,2,8,99,0,5,15,0,24,0,0
3878,2576337,3164,4,17,34,5,2,5,4,5,...,0,3,208,0,3,11,0,45,0,1
3879,2576337,3204,4,17,3,7,4,3,6,4,...,0,5,105,0,2,12,0,21,0,1
3880,2576338,3185,5,16,0,1,2,8,4,5,...,1,1,172,0,5,10,0,28,0,0


##Adding Half-Time Scores and Winner to the Aggregated Data

In [8]:
#Creating Unique ID for Events2 and Matches Datasets
Events2['UID'] = Events2['matchId'].astype(str) + Events2['teamId'].astype(str)
Events2_1H['UID'] = Events2_1H['matchId'].astype(str) + Events2_1H['teamId'].astype(str)
Matches['UID'] = Matches['matchId'].astype(str) + Matches['teamId'].astype(str)

# Now, you can perform the merge based on the 'UID' column
Data = pd.merge(Events2, Matches, on = ['UID'], how = 'outer')
Data_1H = pd.merge(Events2_1H, Matches, on = ['UID'], how = 'outer')

# Print the result
Data.drop(['competitionId', 'date', 'dateutc', 'duration', 'gameweek', 'label',
       'roundID', 'seasonId', 'status', 'venue', 'matchId_y',
       'team_side', 'hasFormation', 'scoreET', 'scoreP',
       'teamId_y', 'coachId'],inplace=True,axis=1)
Data_1H.drop(['competitionId', 'date', 'dateutc', 'duration', 'gameweek', 'label',
       'roundID', 'seasonId', 'status', 'venue', 'matchId_y',
       'team_side', 'hasFormation', 'scoreET', 'scoreP',
       'teamId_y', 'coachId'],inplace=True,axis=1)

#Transforming 'winner' into label feature
for i in range(Data.shape[0]):
  if str(Data.at[i,'teamId_x'])==str(Data.at[i,'winner']):
    Data.at[i,'Result']='win'
  elif str(Data.at[i,'winner'])=='0':
    Data.at[i,'Result']='draw'
  else:
    Data.at[i,'Result']='loss'

for i in range(Data_1H.shape[0]):
  if str(Data_1H.at[i,'teamId_x'])==str(Data_1H.at[i,'winner']):
    Data_1H.at[i,'Result']='win'
  elif str(Data_1H.at[i,'winner'])=='0':
    Data_1H.at[i,'Result']='draw'
  else:
    Data_1H.at[i,'Result']='loss'

Data.drop('winner',inplace=True,axis=1)
Data_1H.drop('winner',inplace=True,axis=1)
Data

Unnamed: 0,matchId_x,teamId_x,subEventName_Acceleration,subEventName_Air duel,subEventName_Ball out of the field,subEventName_Clearance,subEventName_Corner,subEventName_Cross,subEventName_Foul,subEventName_Free Kick,...,subEventName_Smart pass,subEventName_Throw in,subEventName_Time lost foul,subEventName_Touch,subEventName_Violent Foul,subEventName_Whistle,UID,score,scoreHT,Result
0,1694390,4418,7,52,1,23,5,18,6,11,...,13,33,0,45,0,0,16943904418,2,0,win
1,1694390,11944,5,54,0,21,4,15,11,6,...,5,36,0,42,1,0,169439011944,1,0,loss
2,1694391,6697,8,30,0,17,4,12,16,5,...,32,10,0,25,0,0,16943916697,1,1,win
3,1694391,8731,4,32,0,9,4,21,8,10,...,9,11,0,26,1,0,16943918731,0,0,loss
4,1694392,6697,3,47,0,19,7,26,11,13,...,13,22,0,33,0,0,16943926697,1,0,draw
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3877,2576336,3315,13,38,80,20,5,12,13,12,...,11,30,0,45,0,1,25763363315,0,0,loss
3878,2576337,3164,5,29,68,7,3,14,7,10,...,8,24,0,70,0,1,25763373164,1,0,loss
3879,2576337,3204,9,29,3,18,5,8,10,7,...,2,22,0,45,0,1,25763373204,3,1,win
3880,2576338,3185,11,30,0,16,3,12,11,10,...,8,14,0,57,0,0,25763383185,2,1,win


In [9]:
Data_1H

Unnamed: 0,matchId_x,teamId_x,subEventName_Acceleration,subEventName_Air duel,subEventName_Ball out of the field,subEventName_Clearance,subEventName_Corner,subEventName_Cross,subEventName_Foul,subEventName_Free Kick,...,subEventName_Smart pass,subEventName_Throw in,subEventName_Time lost foul,subEventName_Touch,subEventName_Violent Foul,subEventName_Whistle,UID,score,scoreHT,Result
0,1694390,4418,2,22,1,10,4,10,2,5,...,5,14,0,23,0,0,16943904418,2,0,win
1,1694390,11944,1,23,0,9,2,5,4,3,...,1,17,0,24,1,0,169439011944,1,0,loss
2,1694391,6697,1,16,0,12,3,7,7,3,...,16,5,0,12,0,0,16943916697,1,1,win
3,1694391,8731,1,17,0,8,3,11,3,5,...,4,5,0,12,1,0,16943918731,0,0,loss
4,1694392,6697,0,21,0,12,5,15,5,6,...,8,12,0,14,0,0,16943926697,1,0,draw
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3877,2576336,3315,5,20,42,11,1,2,7,8,...,5,15,0,24,0,0,25763363315,0,0,loss
3878,2576337,3164,4,17,34,5,2,5,4,5,...,3,11,0,45,0,1,25763373164,1,0,loss
3879,2576337,3204,4,17,3,7,4,3,6,4,...,2,12,0,21,0,1,25763373204,3,1,win
3880,2576338,3185,5,16,0,1,2,8,4,5,...,5,10,0,28,0,0,25763383185,2,1,win


##Embedding Tags

###Expanding Tags Vector in Events Data

In [10]:
max_length = Events['tags'].apply(len).max()

# Create a new DataFrame with columns based on the maximum length
tags_df_1 = pd.DataFrame(Events['tags'].tolist(), columns=[f'tag_{i+1}' for i in range(max_length)])

# Concatenate the new DataFrame with the original DataFrame
test1 = pd.concat([Events, tags_df_1], axis=1)

# Drop the unrelated columns
test1 = test1.drop(['tags','eventId','subEventId', 'subEventName', 'eventSec', 'id', 'playerId', 'position_x', 'position_y'], axis=1)

# First Half Data Only
test1_1H=test1[test1['matchPeriod']=='1H']
test1_1H.drop('matchPeriod',inplace=True,axis=1)
test1_1H.reset_index(inplace=True,drop=True)
test1

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test1_1H.drop('matchPeriod',inplace=True,axis=1)


Unnamed: 0,eventName,matchId,matchPeriod,teamId,tag_1,tag_2,tag_3,tag_4,tag_5,tag_6
0,Pass,2499719,1H,1609,1801.0,,,,,
1,Pass,2499719,1H,1609,1801.0,,,,,
2,Pass,2499719,1H,1609,1801.0,,,,,
3,Pass,2499719,1H,1609,1801.0,,,,,
4,Pass,2499719,1H,1609,1801.0,,,,,
...,...,...,...,...,...,...,...,...,...,...
3251289,Offside,1694440,E2,4418,,,,,,
3251290,Foul,1694440,E2,9905,1702.0,,,,,
3251291,Free Kick,1694440,E2,9905,1801.0,,,,,
3251292,Duel,1694440,E2,9905,701.0,1802.0,,,,


In [11]:
test1_1H

Unnamed: 0,eventName,matchId,teamId,tag_1,tag_2,tag_3,tag_4,tag_5,tag_6
0,Pass,2499719,1609,1801.0,,,,,
1,Pass,2499719,1609,1801.0,,,,,
2,Pass,2499719,1609,1801.0,,,,,
3,Pass,2499719,1609,1801.0,,,,,
4,Pass,2499719,1609,1801.0,,,,,
...,...,...,...,...,...,...,...,...,...
1628454,Pass,1694440,4418,1801.0,,,,,
1628455,Pass,1694440,4418,1801.0,,,,,
1628456,Pass,1694440,4418,1801.0,,,,,
1628457,Pass,1694440,4418,1801.0,,,,,


###Identifying Unique Tags for Each EventName

In [12]:
#Function to find unique elements in a list
def unique(list1):
    unique_list = []
    for x in list1:
        if x not in unique_list:
            unique_list.append(x)
    return unique_list

events=test1['eventName'].value_counts().index.tolist()
for x in events:
  y=test1[test1['eventName']==x]
  uniqueTags=[]
  for i in range(6):
    title='tag_'+str(i+1)
    for j in range(len(y[title].value_counts().index.tolist())):
      uniqueTags.append(y[title].value_counts().index[j])
  print(x,':',unique(uniqueTags))

Pass : [1801.0, 1802.0, 1401.0, 402.0, 901.0, 1901.0, 401.0, 302.0, 301.0, 2001.0, 1001.0, 403.0, 102.0, 801.0, 2101.0, 201.0, 504.0, 502.0]
Duel : [701.0, 703.0, 702.0, 502.0, 501.0, 504.0, 503.0, 1601.0, 602.0, 601.0, 1901.0, 1401.0, 2001.0, 1802.0, 302.0, 201.0, 301.0, 402.0, 1702.0, 1703.0, 1801.0]
Others on the ball : [1401.0, 1801.0, 1802.0, 1302.0, 1901.0, 1301.0, 201.0, 2001.0, 102.0, 1001.0, 302.0, 301.0, 1702.0]
Free Kick : [1801.0, 801.0, 1802.0, 302.0, 402.0, 401.0, 101.0, 1001.0, 301.0, 2101.0, 1901.0, 201.0, 403.0, 1101.0, 1102.0, 1205.0, 1204.0, 1202.0, 1201.0, 1203.0, 1206.0, 1215.0, 1219.0, 1221.0, 1214.0, 1216.0, 1212.0, 1220.0, 1210.0, 1222.0, 1223.0, 1218.0, 1207.0, 1213.0, 1209.0, 1208.0, 1211.0, 1217.0]
Interruption : [1901.0]
Foul : [1702.0, 1701.0, 1703.0, 1901.0]
Shot : [402.0, 401.0, 403.0, 101.0, 1901.0, 302.0, 301.0, 201.0, 2101.0, 1216.0, 1215.0, 1212.0, 1210.0, 1214.0, 1401.0, 1213.0, 1211.0, 1201.0, 1203.0, 1205.0, 1202.0, 1206.0, 1204.0, 1218.0, 1209.0, 

In [13]:
#copy the above result
tags_lists = {
'Pass' : [1801.0, 1802.0, 1401.0, 402.0, 901.0, 1901.0, 401.0, 302.0, 301.0, 2001.0, 1001.0, 403.0, 102.0, 801.0, 2101.0, 201.0, 504.0, 502.0],
'Duel' : [701.0, 703.0, 702.0, 502.0, 501.0, 504.0, 503.0, 1601.0, 602.0, 601.0, 1901.0, 1401.0, 2001.0, 1802.0, 302.0, 201.0, 301.0, 402.0, 1703.0, 1702.0, 1801.0],
'Others on the ball' : [1401.0, 1801.0, 1802.0, 1302.0, 1901.0, 1301.0, 201.0, 2001.0, 102.0, 1001.0, 302.0, 301.0, 1702.0],
'Free Kick' : [1801.0, 801.0, 1802.0, 302.0, 402.0, 401.0, 101.0, 1001.0, 301.0, 2101.0, 1901.0, 201.0, 403.0, 1101.0, 1102.0, 1205.0, 1204.0, 1202.0, 1201.0, 1203.0, 1206.0, 1215.0, 1219.0, 1214.0, 1221.0, 1216.0, 1212.0, 1220.0, 1223.0, 1210.0, 1222.0, 1213.0, 1218.0, 1207.0, 1209.0, 1208.0, 1211.0, 1217.0],
'Interruption' : [1901.0],
'Foul' : [1702.0, 1701.0, 1703.0, 1901.0],
'Shot' : [402.0, 401.0, 403.0, 101.0, 1901.0, 302.0, 301.0, 201.0, 2101.0, 1216.0, 1215.0, 1212.0, 1210.0, 1214.0, 1401.0, 1213.0, 1211.0, 1201.0, 1203.0, 1205.0, 1202.0, 1206.0, 1204.0, 1218.0, 1209.0, 1208.0, 1217.0, 1221.0, 1207.0, 1222.0, 1802.0, 1223.0, 1801.0, 1219.0, 1220.0],
'Save attempt' : [101.0, 1201.0, 1203.0, 1205.0, 1202.0, 1204.0, 1206.0, 1801.0, 1207.0, 1209.0, 1208.0, 1901.0, 1802.0, 1001.0],
'Offside' : [1901.0],
'Goalkeeper leaving line' : [1901.0]
}
tag_mapping = dict(zip(tags_df['Tag'], tags_df['Description']))

# Create a new dictionary with event names and their corresponding meanings
event_meanings = {event: [tag_mapping[tag] for tag in tags_list if tag in tag_mapping] for event, tags_list in tags_lists.items()}

# Display the result
for event, meanings in event_meanings.items():
    print(f'{event} : {meanings}')

Pass : ['Accurate', 'Not accurate', 'Interception', 'Right foot', 'Through', 'Counter attack', 'Left foot', 'Key pass', 'Assist', 'Dangerous ball lost', 'Fairplay', 'Head/body', 'Own goal', 'High', 'Blocked', 'Opportunity', 'Take on right', 'Free space left']
Duel : ['Lost', 'Won', 'Neutral', 'Free space left', 'Free space right', 'Take on right', 'Take on left', 'Sliding tackle', 'Anticipation', 'Anticipated', 'Counter attack', 'Interception', 'Dangerous ball lost', 'Not accurate', 'Key pass', 'Opportunity', 'Assist', 'Right foot', 'Second yellow card', 'Yellow card', 'Accurate']
Others on the ball : ['Interception', 'Accurate', 'Not accurate', 'Missed ball', 'Counter attack', 'Feint', 'Opportunity', 'Dangerous ball lost', 'Own goal', 'Fairplay', 'Key pass', 'Assist', 'Yellow card']
Free Kick : ['Accurate', 'High', 'Not accurate', 'Key pass', 'Right foot', 'Left foot', 'Goal', 'Fairplay', 'Assist', 'Blocked', 'Counter attack', 'Opportunity', 'Head/body', 'Direct', 'Indirect', 'Positio

###Embedding

In [14]:
#Based on the results above we are now able to embed those tags into numeric form (score) representing their influence on winning
score=[3,-3,3,2,1,0,0,0,1,-1,-2,-1,0,0,1,1,1,1,-1,1,1,1,1,0,1,-1,-2,-1,-2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,-1,0,1,1,-1]
tags_df['score'] = score
tags_df

Unnamed: 0,Tag,Label,Description,score
0,101,Goal,Goal,3
1,102,own_goal,Own goal,-3
2,301,assist,Assist,3
3,302,keyPass,Key pass,2
4,1901,counter_attack,Counter attack,1
5,401,Left,Left foot,0
6,402,Right,Right foot,0
7,403,head/body,Head/body,0
8,1101,direct,Direct,1
9,1102,indirect,Indirect,-1


##Constructing Event-Influence Scores based on Tag Embeddings

###Replacing Tag IDs with assigned scores in 'test1' data

In [15]:
#This takes around 2 hours to run (be careful)
def replace_ids_with_scores(df, tags_df, columns_to_replace):
    result_df = df.copy()

    for column in columns_to_replace:
        result_df[column] = result_df[column].apply(lambda tag_id: tags_df.loc[tags_df['Tag'] == tag_id, 'score'].values[0] if any(tags_df['Tag'] == tag_id) else np.nan)

    return result_df

tag_columns = test1.columns[test1.columns.str.startswith('tag_')]
result_df = replace_ids_with_scores(test1, tags_df, tag_columns)
result_df_1H = replace_ids_with_scores(test1_1H, tags_df, tag_columns)
result_df

Unnamed: 0,eventName,matchId,matchPeriod,teamId,tag_1,tag_2,tag_3,tag_4,tag_5,tag_6
0,Pass,2499719,1H,1609,1.0,,,,,
1,Pass,2499719,1H,1609,1.0,,,,,
2,Pass,2499719,1H,1609,1.0,,,,,
3,Pass,2499719,1H,1609,1.0,,,,,
4,Pass,2499719,1H,1609,1.0,,,,,
...,...,...,...,...,...,...,...,...,...,...
3251289,Offside,1694440,E2,4418,,,,,,
3251290,Foul,1694440,E2,9905,-1.0,,,,,
3251291,Free Kick,1694440,E2,9905,1.0,,,,,
3251292,Duel,1694440,E2,9905,-1.0,-1.0,,,,


In [16]:
result_df_1H

Unnamed: 0,eventName,matchId,teamId,tag_1,tag_2,tag_3,tag_4,tag_5,tag_6
0,Pass,2499719,1609,1.0,,,,,
1,Pass,2499719,1609,1.0,,,,,
2,Pass,2499719,1609,1.0,,,,,
3,Pass,2499719,1609,1.0,,,,,
4,Pass,2499719,1609,1.0,,,,,
...,...,...,...,...,...,...,...,...,...
1628454,Pass,1694440,4418,1.0,,,,,
1628455,Pass,1694440,4418,1.0,,,,,
1628456,Pass,1694440,4418,1.0,,,,,
1628457,Pass,1694440,4418,1.0,,,,,


###Creating Scores

In [17]:
test2 = result_df.copy()
test2_1H = result_df_1H.copy()
tag_columns_to_sum = ['tag_1','tag_2','tag_3','tag_4','tag_5','tag_6']
test2['Score'] = test2[tag_columns_to_sum].sum(axis = 1)
test2_1H['Score'] = test2_1H[tag_columns_to_sum].sum(axis = 1)
test2 = test2[['matchId','teamId','Score','eventName']]
test2_1H = test2_1H[['matchId','teamId','Score','eventName']]
test2 = test2.groupby(by=['matchId','teamId','eventName']).sum()
test2_1H = test2_1H.groupby(by=['matchId','teamId','eventName']).sum()
test2.reset_index(inplace=True)
test2_1H.reset_index(inplace=True)
pivot_df = test2.pivot(index = ['matchId','teamId'],columns='eventName', values='Score')
pivot_df_1H = test2_1H.pivot(index = ['matchId','teamId'],columns='eventName', values='Score')
pivot_df.reset_index(inplace=True)
pivot_df_1H.reset_index(inplace=True)
pivot_df.fillna(0,inplace = True)
pivot_df_1H.fillna(0,inplace = True)
pivot_df

eventName,matchId,teamId,Duel,Foul,Free Kick,Goalkeeper leaving line,Interruption,Offside,Others on the ball,Pass,Save attempt,Shot
0,1694390,4418,248.0,-1.0,34.0,0.0,0.0,0.0,22.0,389.0,3.0,13.0
1,1694390,11944,48.0,-3.0,30.0,2.0,0.0,0.0,11.0,170.0,9.0,-2.0
2,1694391,6697,150.0,-1.0,10.0,1.0,0.0,0.0,31.0,465.0,3.0,7.0
3,1694391,8731,60.0,-6.0,13.0,0.0,0.0,0.0,12.0,313.0,8.0,-2.0
4,1694392,6697,191.0,-2.0,35.0,0.0,0.0,0.0,20.0,427.0,4.0,9.0
...,...,...,...,...,...,...,...,...,...,...,...,...
3877,2576336,3315,63.0,-1.0,33.0,0.0,0.0,0.0,33.0,213.0,8.0,-5.0
3878,2576337,3164,120.0,-5.0,30.0,0.0,0.0,0.0,8.0,339.0,9.0,8.0
3879,2576337,3204,74.0,-1.0,35.0,0.0,0.0,0.0,20.0,296.0,4.0,7.0
3880,2576338,3185,97.0,-4.0,16.0,0.0,0.0,0.0,11.0,259.0,3.0,9.0


In [18]:
pivot_df_1H

eventName,matchId,teamId,Duel,Foul,Free Kick,Goalkeeper leaving line,Interruption,Offside,Others on the ball,Pass,Save attempt,Shot
0,1694390,4418,118.0,0.0,13.0,0.0,0.0,0.0,0.0,206.0,1.0,-4.0
1,1694390,11944,13.0,-2.0,12.0,2.0,0.0,0.0,9.0,104.0,2.0,2.0
2,1694391,6697,43.0,-1.0,6.0,0.0,0.0,0.0,16.0,197.0,2.0,3.0
3,1694391,8731,43.0,-3.0,7.0,0.0,0.0,0.0,6.0,135.0,4.0,-2.0
4,1694392,6697,94.0,0.0,20.0,0.0,0.0,0.0,6.0,209.0,2.0,4.0
...,...,...,...,...,...,...,...,...,...,...,...,...
3877,2576336,3315,17.0,0.0,16.0,0.0,0.0,0.0,19.0,91.0,5.0,1.0
3878,2576337,3164,73.0,-4.0,12.0,0.0,0.0,0.0,8.0,192.0,2.0,2.0
3879,2576337,3204,33.0,0.0,22.0,0.0,0.0,0.0,6.0,99.0,1.0,-8.0
3880,2576338,3185,52.0,-2.0,6.0,0.0,0.0,0.0,1.0,167.0,1.0,5.0


##Finalizing Dataset

###Merging Aggregated Sub-event Statistics Data with Event-influence Scores Data

In [19]:
pivot_df['UID'] = pivot_df['matchId'].astype(str) + pivot_df['teamId'].astype(str)
pivot_df_1H['UID'] = pivot_df_1H['matchId'].astype(str) + pivot_df_1H['teamId'].astype(str)


# Now, you can perform the merge based on the 'UID' column
final_df = pd.merge(Data, pivot_df, on=['UID'], how='left')
final_df_1H = pd.merge(Data_1H, pivot_df_1H, on=['UID'], how='left')

# Transform scores into Z-scors
scaler= StandardScaler()
final_df[['Duel', 'Foul', 'Free Kick', 'Interruption', 'Offside',
       'Others on the ball', 'Pass', 'Save attempt', 'Shot',
       'Goalkeeper leaving line']]= scaler.fit_transform(final_df[['Duel', 'Foul', 'Free Kick', 'Interruption', 'Offside',
       'Others on the ball', 'Pass', 'Save attempt', 'Shot',
       'Goalkeeper leaving line']])
final_df_1H[['Duel', 'Foul', 'Free Kick', 'Interruption', 'Offside',
       'Others on the ball', 'Pass', 'Save attempt', 'Shot',
       'Goalkeeper leaving line']]= scaler.fit_transform(final_df_1H[['Duel', 'Foul', 'Free Kick', 'Interruption', 'Offside',
       'Others on the ball', 'Pass', 'Save attempt', 'Shot',
       'Goalkeeper leaving line']])

final_df

Unnamed: 0,matchId_x,teamId_x,subEventName_Acceleration,subEventName_Air duel,subEventName_Ball out of the field,subEventName_Clearance,subEventName_Corner,subEventName_Cross,subEventName_Foul,subEventName_Free Kick,...,Duel,Foul,Free Kick,Goalkeeper leaving line,Interruption,Offside,Others on the ball,Pass,Save attempt,Shot
0,1694390,4418,7,52,1,23,5,18,6,11,...,3.083120,0.699337,0.421886,-0.255593,-0.022704,-0.194763,-0.015755,0.562124,-0.821780,0.816255
1,1694390,11944,5,54,0,21,4,15,11,6,...,-1.541381,-0.598381,-0.022012,7.232796,-0.022704,-0.194763,-1.217134,-1.164710,0.853572,-0.987492
2,1694391,6697,8,30,0,17,4,12,16,5,...,0.817114,0.699337,-2.241500,3.488601,-0.022704,-0.194763,0.967192,1.161391,-0.821780,0.094756
3,1694391,8731,4,32,0,9,4,21,8,10,...,-1.263911,-2.544958,-1.908577,-0.255593,-0.022704,-0.194763,-1.107918,-0.037142,0.574347,-0.987492
4,1694392,6697,3,47,0,19,7,26,11,13,...,1.765137,0.050478,0.532860,-0.255593,-0.022704,-0.194763,-0.234188,0.861758,-0.542555,0.335256
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3877,2576336,3315,13,38,80,20,5,12,13,12,...,-1.194544,0.699337,0.310911,-0.255593,-0.022704,-0.194763,1.185624,-0.825651,0.574347,-1.348241
3878,2576337,3164,5,29,68,7,3,14,7,10,...,0.123439,-1.896099,-0.022012,-0.255593,-0.022704,-0.194763,-1.544783,0.167870,0.853572,0.215006
3879,2576337,3204,9,29,3,18,5,8,10,7,...,-0.940196,0.699337,0.532860,-0.255593,-0.022704,-0.194763,-0.234188,-0.171189,-0.542555,0.094756
3880,2576338,3185,11,30,0,16,3,12,11,10,...,-0.408379,-1.247240,-1.575653,-0.255593,-0.022704,-0.194763,-1.217134,-0.462937,-0.821780,0.335256


In [20]:
final_df_1H

Unnamed: 0,matchId_x,teamId_x,subEventName_Acceleration,subEventName_Air duel,subEventName_Ball out of the field,subEventName_Clearance,subEventName_Corner,subEventName_Cross,subEventName_Foul,subEventName_Free Kick,...,Duel,Foul,Free Kick,Goalkeeper leaving line,Interruption,Offside,Others on the ball,Pass,Save attempt,Shot
0,1694390,4418,2,22,1,10,4,10,2,5,...,2.431148,0.822211,-0.379764,-0.172591,0.0,-0.13752,-1.873804,0.613936,-0.758407,-1.293123
1,1694390,11944,1,23,0,9,2,5,4,3,...,-1.640812,-1.489029,-0.560543,11.183288,0.0,-0.13752,-0.288609,-0.871285,-0.301385,-0.128277
2,1694391,6697,1,16,0,12,3,7,7,3,...,-0.477395,-0.333409,-1.645211,-0.172591,0.0,-0.13752,0.944320,0.482887,-0.301385,0.065864
3,1694391,8731,1,17,0,8,3,11,3,5,...,-0.477395,-2.644650,-1.464433,-0.172591,0.0,-0.13752,-0.817007,-0.419894,0.612659,-0.904841
4,1694392,6697,0,21,0,12,5,15,5,6,...,1.500414,0.822211,0.885682,-0.172591,0.0,-0.13752,-0.817007,0.657619,-0.301385,0.260005
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3877,2576336,3315,5,20,42,11,1,2,7,8,...,-1.485689,0.822211,0.162570,-0.172591,0.0,-0.13752,1.472719,-1.060577,1.069681,-0.322418
3878,2576337,3164,4,17,34,5,2,5,4,5,...,0.686023,-3.800270,-0.560543,-0.172591,0.0,-0.13752,-0.464742,0.410082,-0.301385,-0.128277
3879,2576337,3204,4,17,3,7,4,3,6,4,...,-0.865200,0.822211,1.247239,-0.172591,0.0,-0.13752,-0.817007,-0.944090,-0.758407,-2.069687
3880,2576338,3185,5,16,0,1,2,8,4,5,...,-0.128369,-1.489029,-1.645211,-0.172591,0.0,-0.13752,-1.697671,0.046057,-0.758407,0.454146


In [21]:
final_df.columns

Index(['matchId_x', 'teamId_x', 'subEventName_Acceleration',
       'subEventName_Air duel', 'subEventName_Ball out of the field',
       'subEventName_Clearance', 'subEventName_Corner', 'subEventName_Cross',
       'subEventName_Foul', 'subEventName_Free Kick',
       'subEventName_Free kick cross', 'subEventName_Free kick shot',
       'subEventName_Goal kick', 'subEventName_Goalkeeper leaving line',
       'subEventName_Ground attacking duel',
       'subEventName_Ground defending duel',
       'subEventName_Ground loose ball duel', 'subEventName_Hand foul',
       'subEventName_Hand pass', 'subEventName_Head pass',
       'subEventName_High pass', 'subEventName_Late card foul',
       'subEventName_Launch', 'subEventName_Out of game foul',
       'subEventName_Penalty', 'subEventName_Protest', 'subEventName_Reflexes',
       'subEventName_Save attempt', 'subEventName_Shot',
       'subEventName_Simple pass', 'subEventName_Simulation',
       'subEventName_Smart pass', 'subEventName

###Splitting Final Datasets into X and Y

In [41]:
Y=final_df['Result']
X=final_df.drop(['matchId_x', 'teamId_x', 'UID','Result', 'matchId', 'teamId','score'],axis=1)
X

Unnamed: 0,subEventName_Acceleration,subEventName_Air duel,subEventName_Ball out of the field,subEventName_Clearance,subEventName_Corner,subEventName_Cross,subEventName_Foul,subEventName_Free Kick,subEventName_Free kick cross,subEventName_Free kick shot,...,Duel,Foul,Free Kick,Goalkeeper leaving line,Interruption,Offside,Others on the ball,Pass,Save attempt,Shot
0,7,52,1,23,5,18,6,11,3,0,...,3.083120,0.699337,0.421886,-0.255593,-0.022704,-0.194763,-0.015755,0.562124,-0.821780,0.816255
1,5,54,0,21,4,15,11,6,2,0,...,-1.541381,-0.598381,-0.022012,7.232796,-0.022704,-0.194763,-1.217134,-1.164710,0.853572,-0.987492
2,8,30,0,17,4,12,16,5,0,2,...,0.817114,0.699337,-2.241500,3.488601,-0.022704,-0.194763,0.967192,1.161391,-0.821780,0.094756
3,4,32,0,9,4,21,8,10,2,0,...,-1.263911,-2.544958,-1.908577,-0.255593,-0.022704,-0.194763,-1.107918,-0.037142,0.574347,-0.987492
4,3,47,0,19,7,26,11,13,4,3,...,1.765137,0.050478,0.532860,-0.255593,-0.022704,-0.194763,-0.234188,0.861758,-0.542555,0.335256
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3877,13,38,80,20,5,12,13,12,0,0,...,-1.194544,0.699337,0.310911,-0.255593,-0.022704,-0.194763,1.185624,-0.825651,0.574347,-1.348241
3878,5,29,68,7,3,14,7,10,5,0,...,0.123439,-1.896099,-0.022012,-0.255593,-0.022704,-0.194763,-1.544783,0.167870,0.853572,0.215006
3879,9,29,3,18,5,8,10,7,3,0,...,-0.940196,0.699337,0.532860,-0.255593,-0.022704,-0.194763,-0.234188,-0.171189,-0.542555,0.094756
3880,11,30,0,16,3,12,11,10,1,0,...,-0.408379,-1.247240,-1.575653,-0.255593,-0.022704,-0.194763,-1.217134,-0.462937,-0.821780,0.335256


In [42]:
Y_1H=final_df_1H['Result']
X_1H=final_df_1H.drop(['matchId_x', 'teamId_x', 'UID','Result', 'matchId', 'teamId','score'],axis=1)
X_1H

Unnamed: 0,subEventName_Acceleration,subEventName_Air duel,subEventName_Ball out of the field,subEventName_Clearance,subEventName_Corner,subEventName_Cross,subEventName_Foul,subEventName_Free Kick,subEventName_Free kick cross,subEventName_Free kick shot,...,Duel,Foul,Free Kick,Goalkeeper leaving line,Interruption,Offside,Others on the ball,Pass,Save attempt,Shot
0,2,22,1,10,4,10,2,5,2,0,...,2.431148,0.822211,-0.379764,-0.172591,0.0,-0.13752,-1.873804,0.613936,-0.758407,-1.293123
1,1,23,0,9,2,5,4,3,0,0,...,-1.640812,-1.489029,-0.560543,11.183288,0.0,-0.13752,-0.288609,-0.871285,-0.301385,-0.128277
2,1,16,0,12,3,7,7,3,0,1,...,-0.477395,-0.333409,-1.645211,-0.172591,0.0,-0.13752,0.944320,0.482887,-0.301385,0.065864
3,1,17,0,8,3,11,3,5,2,0,...,-0.477395,-2.644650,-1.464433,-0.172591,0.0,-0.13752,-0.817007,-0.419894,0.612659,-0.904841
4,0,21,0,12,5,15,5,6,3,1,...,1.500414,0.822211,0.885682,-0.172591,0.0,-0.13752,-0.817007,0.657619,-0.301385,0.260005
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3877,5,20,42,11,1,2,7,8,0,0,...,-1.485689,0.822211,0.162570,-0.172591,0.0,-0.13752,1.472719,-1.060577,1.069681,-0.322418
3878,4,17,34,5,2,5,4,5,2,0,...,0.686023,-3.800270,-0.560543,-0.172591,0.0,-0.13752,-0.464742,0.410082,-0.301385,-0.128277
3879,4,17,3,7,4,3,6,4,2,0,...,-0.865200,0.822211,1.247239,-0.172591,0.0,-0.13752,-0.817007,-0.944090,-0.758407,-2.069687
3880,5,16,0,1,2,8,4,5,1,0,...,-0.128369,-1.489029,-1.645211,-0.172591,0.0,-0.13752,-1.697671,0.046057,-0.758407,0.454146


In [43]:
Yscore=final_df['score']
Xscore=final_df.drop(['matchId_x', 'teamId_x', 'UID','Result', 'matchId', 'teamId','score'],axis=1)
Xscore

Unnamed: 0,subEventName_Acceleration,subEventName_Air duel,subEventName_Ball out of the field,subEventName_Clearance,subEventName_Corner,subEventName_Cross,subEventName_Foul,subEventName_Free Kick,subEventName_Free kick cross,subEventName_Free kick shot,...,Duel,Foul,Free Kick,Goalkeeper leaving line,Interruption,Offside,Others on the ball,Pass,Save attempt,Shot
0,7,52,1,23,5,18,6,11,3,0,...,3.083120,0.699337,0.421886,-0.255593,-0.022704,-0.194763,-0.015755,0.562124,-0.821780,0.816255
1,5,54,0,21,4,15,11,6,2,0,...,-1.541381,-0.598381,-0.022012,7.232796,-0.022704,-0.194763,-1.217134,-1.164710,0.853572,-0.987492
2,8,30,0,17,4,12,16,5,0,2,...,0.817114,0.699337,-2.241500,3.488601,-0.022704,-0.194763,0.967192,1.161391,-0.821780,0.094756
3,4,32,0,9,4,21,8,10,2,0,...,-1.263911,-2.544958,-1.908577,-0.255593,-0.022704,-0.194763,-1.107918,-0.037142,0.574347,-0.987492
4,3,47,0,19,7,26,11,13,4,3,...,1.765137,0.050478,0.532860,-0.255593,-0.022704,-0.194763,-0.234188,0.861758,-0.542555,0.335256
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3877,13,38,80,20,5,12,13,12,0,0,...,-1.194544,0.699337,0.310911,-0.255593,-0.022704,-0.194763,1.185624,-0.825651,0.574347,-1.348241
3878,5,29,68,7,3,14,7,10,5,0,...,0.123439,-1.896099,-0.022012,-0.255593,-0.022704,-0.194763,-1.544783,0.167870,0.853572,0.215006
3879,9,29,3,18,5,8,10,7,3,0,...,-0.940196,0.699337,0.532860,-0.255593,-0.022704,-0.194763,-0.234188,-0.171189,-0.542555,0.094756
3880,11,30,0,16,3,12,11,10,1,0,...,-0.408379,-1.247240,-1.575653,-0.255593,-0.022704,-0.194763,-1.217134,-0.462937,-0.821780,0.335256


In [44]:
Yscore_1H=final_df_1H['score']
Xscore_1H=final_df_1H.drop(['matchId_x', 'teamId_x', 'UID','Result', 'matchId', 'teamId','score'],axis=1)
Xscore_1H

Unnamed: 0,subEventName_Acceleration,subEventName_Air duel,subEventName_Ball out of the field,subEventName_Clearance,subEventName_Corner,subEventName_Cross,subEventName_Foul,subEventName_Free Kick,subEventName_Free kick cross,subEventName_Free kick shot,...,Duel,Foul,Free Kick,Goalkeeper leaving line,Interruption,Offside,Others on the ball,Pass,Save attempt,Shot
0,2,22,1,10,4,10,2,5,2,0,...,2.431148,0.822211,-0.379764,-0.172591,0.0,-0.13752,-1.873804,0.613936,-0.758407,-1.293123
1,1,23,0,9,2,5,4,3,0,0,...,-1.640812,-1.489029,-0.560543,11.183288,0.0,-0.13752,-0.288609,-0.871285,-0.301385,-0.128277
2,1,16,0,12,3,7,7,3,0,1,...,-0.477395,-0.333409,-1.645211,-0.172591,0.0,-0.13752,0.944320,0.482887,-0.301385,0.065864
3,1,17,0,8,3,11,3,5,2,0,...,-0.477395,-2.644650,-1.464433,-0.172591,0.0,-0.13752,-0.817007,-0.419894,0.612659,-0.904841
4,0,21,0,12,5,15,5,6,3,1,...,1.500414,0.822211,0.885682,-0.172591,0.0,-0.13752,-0.817007,0.657619,-0.301385,0.260005
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3877,5,20,42,11,1,2,7,8,0,0,...,-1.485689,0.822211,0.162570,-0.172591,0.0,-0.13752,1.472719,-1.060577,1.069681,-0.322418
3878,4,17,34,5,2,5,4,5,2,0,...,0.686023,-3.800270,-0.560543,-0.172591,0.0,-0.13752,-0.464742,0.410082,-0.301385,-0.128277
3879,4,17,3,7,4,3,6,4,2,0,...,-0.865200,0.822211,1.247239,-0.172591,0.0,-0.13752,-0.817007,-0.944090,-0.758407,-2.069687
3880,5,16,0,1,2,8,4,5,1,0,...,-0.128369,-1.489029,-1.645211,-0.172591,0.0,-0.13752,-1.697671,0.046057,-0.758407,0.454146


#Model Fitting - Win/Loss Classification

##KNN

####Full-time Data

In [26]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.2, random_state=42)

# Initialize the KNN Classifier
knn = KNeighborsClassifier()

# Train the model on the training data
knn.fit(X_train, y_train)

# Make predictions on the test data
y_pred = knn.predict(X_test)

# Print a classification report for more detailed metrics
print("Classification Report:")
print(classification_report(y_test, y_pred))

Classification Report:
              precision    recall  f1-score   support

        draw       0.31      0.31      0.31       206
        loss       0.45      0.58      0.50       268
         win       0.54      0.39      0.46       303

    accuracy                           0.44       777
   macro avg       0.43      0.43      0.42       777
weighted avg       0.45      0.44      0.43       777



####Half-time Data

In [27]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_1H, Y_1H, test_size=0.2, random_state=42)

# Initialize the KNN Classifier
knn = KNeighborsClassifier()

# Train the model on the training data
knn.fit(X_train, y_train)

# Make predictions on the test data
y_pred = knn.predict(X_test)

# Print a classification report for more detailed metrics
print("Classification Report:")
print(classification_report(y_test, y_pred))

Classification Report:
              precision    recall  f1-score   support

        draw       0.28      0.27      0.27       206
        loss       0.45      0.56      0.50       268
         win       0.50      0.40      0.44       303

    accuracy                           0.42       777
   macro avg       0.41      0.41      0.41       777
weighted avg       0.42      0.42      0.42       777



##Random Forest

####Full-time Data

In [28]:
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report


# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.2, random_state=42)

# Standardize features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Initialize the Random Forest Classifier
rf_classifier = RandomForestClassifier(n_estimators=100, random_state=42)

# Train the model on the training data
rf_classifier.fit(X_train_scaled, y_train)

# Predictions on the training set
y_pred_train = rf_classifier.predict(X_train_scaled)

# Predictions on the test set
y_pred_test = rf_classifier.predict(X_test_scaled)

# Compute accuracy scores
train_accuracy = accuracy_score(y_train, y_pred_train)
test_accuracy = accuracy_score(y_test, y_pred_test)

# Print the accuracies
print("Training Accuracy:", train_accuracy)
print("Test Accuracy:", test_accuracy)
# Make predictions on the test data
y_pred = rf_classifier.predict(X_test_scaled)

# Print a classification report for more detailed metrics
print("Classification Report:")
print(classification_report(y_test, y_pred))

Training Accuracy: 1.0
Test Accuracy: 0.6872586872586872
Classification Report:
              precision    recall  f1-score   support

        draw       0.51      0.26      0.34       206
        loss       0.69      0.86      0.77       268
         win       0.74      0.83      0.78       303

    accuracy                           0.69       777
   macro avg       0.65      0.65      0.63       777
weighted avg       0.66      0.69      0.66       777



####Half-time Data

In [29]:
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report


# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_1H, Y_1H, test_size=0.2, random_state=42)

# Standardize features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Initialize the Random Forest Classifier
rf_classifier = RandomForestClassifier(n_estimators=100, random_state=42)

# Train the model on the training data
rf_classifier.fit(X_train_scaled, y_train)

# Predictions on the training set
y_pred_train = rf_classifier.predict(X_train_scaled)

# Predictions on the test set
y_pred_test = rf_classifier.predict(X_test_scaled)

# Compute accuracy scores
train_accuracy = accuracy_score(y_train, y_pred_train)
test_accuracy = accuracy_score(y_test, y_pred_test)

# Print the accuracies
print("Training Accuracy:", train_accuracy)
print("Test Accuracy:", test_accuracy)
# Make predictions on the test data
y_pred = rf_classifier.predict(X_test_scaled)

# Print a classification report for more detailed metrics
print("Classification Report:")
print(classification_report(y_test, y_pred))

Training Accuracy: 1.0
Test Accuracy: 0.5701415701415702
Classification Report:
              precision    recall  f1-score   support

        draw       0.30      0.09      0.13       206
        loss       0.57      0.73      0.64       268
         win       0.61      0.76      0.68       303

    accuracy                           0.57       777
   macro avg       0.49      0.52      0.48       777
weighted avg       0.51      0.57      0.52       777



##Multi-layered Perceptron

####Full-time Data

In [30]:
from sklearn.model_selection import train_test_split
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.metrics import accuracy_score, classification_report


# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.24, random_state=42)

# Initialize the XGB Classifier
gb_classifier = GradientBoostingClassifier()

# Train the model on the training data
gb_classifier.fit(X_train, y_train)

# Make predictions on the test data
y_pred = gb_classifier.predict(X_test)

# Print a classification report for more detailed metrics
print("Classification Report:")
print(classification_report(y_test, y_pred))

Classification Report:
              precision    recall  f1-score   support

        draw       0.57      0.44      0.50       251
        loss       0.75      0.84      0.79       328
         win       0.78      0.81      0.79       353

    accuracy                           0.72       932
   macro avg       0.70      0.70      0.69       932
weighted avg       0.71      0.72      0.71       932



####Half-time Data

In [31]:
from sklearn.model_selection import train_test_split
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.metrics import accuracy_score, classification_report


# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_1H, Y_1H, test_size=0.24, random_state=42)

# Initialize the XGB Classifier
gb_classifier = GradientBoostingClassifier()

# Train the model on the training data
gb_classifier.fit(X_train, y_train)

# Make predictions on the test data
y_pred = gb_classifier.predict(X_test)

# Print a classification report for more detailed metrics
print("Classification Report:")
print(classification_report(y_test, y_pred))

Classification Report:
              precision    recall  f1-score   support

        draw       0.34      0.14      0.19       251
        loss       0.59      0.73      0.65       328
         win       0.62      0.75      0.68       353

    accuracy                           0.58       932
   macro avg       0.52      0.54      0.51       932
weighted avg       0.53      0.58      0.54       932



##XGBoost

####Full-time Data

In [32]:
from sklearn.model_selection import train_test_split
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.metrics import accuracy_score, classification_report


# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.2, random_state=42)

# Initialize the XGB Classifier
gb_classifier = GradientBoostingClassifier()

# Train the model on the training data
gb_classifier.fit(X_train, y_train)

# Make predictions on the test data
y_pred = gb_classifier.predict(X_test)

# Print a classification report for more detailed metrics
print("Classification Report:")
print(classification_report(y_test, y_pred))

Classification Report:
              precision    recall  f1-score   support

        draw       0.54      0.45      0.49       206
        loss       0.75      0.83      0.79       268
         win       0.79      0.81      0.80       303

    accuracy                           0.72       777
   macro avg       0.69      0.70      0.69       777
weighted avg       0.71      0.72      0.71       777



####Half-time Data

In [33]:
from sklearn.model_selection import train_test_split
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.metrics import accuracy_score, classification_report


# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_1H, Y_1H, test_size=0.2, random_state=42)

# Initialize the XGB Classifier
gb_classifier = GradientBoostingClassifier()

# Train the model on the training data
gb_classifier.fit(X_train, y_train)

# Make predictions on the test data
y_pred = gb_classifier.predict(X_test)

# Print a classification report for more detailed metrics
print("Classification Report:")
print(classification_report(y_test, y_pred))

Classification Report:
              precision    recall  f1-score   support

        draw       0.32      0.16      0.21       206
        loss       0.60      0.72      0.65       268
         win       0.64      0.74      0.69       303

    accuracy                           0.58       777
   macro avg       0.52      0.54      0.52       777
weighted avg       0.54      0.58      0.55       777



#Model Fitting - Score Prediction

##Linear Regression

####Full-time Data

In [49]:
# Perform train-test split
X_train, X_test, y_train, y_test = train_test_split(Xscore, Yscore, test_size=0.2, random_state=42)

# Create a linear regression model
model = LinearRegression()

# Fit the model on the training data
model.fit(X_train, y_train)

# Make predictions on the test set
y_pred = model.predict(X_test)

# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f'Mean Squared Error: {mse}')
print(f'R-squared: {r2}')

# Optionally, you can also print the coefficients and intercept
print('Coefficients:', model.coef_)
print('Intercept:', model.intercept_)

Mean Squared Error: 0.4478311314396557
R-squared: 0.7355709638569141
Coefficients: [ 4.18707757e-03 -1.42648287e-03 -1.16121084e-03 -1.22216472e-03
 -1.11527312e-02 -4.39080679e-03 -3.42602509e-03 -1.01508324e-02
 -2.70606459e-02  4.88063374e-02 -8.91765873e-03  6.35202801e-03
 -1.01095792e-04 -1.12315424e-03  1.05550843e-03  1.13118653e-02
 -4.18256007e-03 -1.47942937e-03 -2.55814765e-03  3.20237254e-03
 -1.72758367e-03  6.67458925e-02  3.20186503e-01 -1.23360407e-02
 -2.23390005e-02 -1.54275984e-02  2.90700933e-02 -2.70581217e-04
  5.54520567e-02  7.02765359e-05 -6.84532523e-03  2.29586217e-02
 -1.53750107e-03 -5.95056555e-02 -1.01411247e-02  5.87302387e-01
 -7.57849732e-03  1.70886209e-02  5.51728178e-02 -8.02053564e-03
  8.90297840e-03  2.20384002e-03 -1.80682919e-02  1.60641985e-02
 -8.71225336e-03  6.18821271e-01]
Intercept: 1.6888929972892004


####Half-time Data

In [50]:
# Perform train-test split
X_train, X_test, y_train, y_test = train_test_split(Xscore_1H, Yscore_1H, test_size=0.2, random_state=42)

# Create a linear regression model
model = LinearRegression()

# Fit the model on the training data
model.fit(X_train, y_train)

# Make predictions on the test set
y_pred = model.predict(X_test)

# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f'Mean Squared Error: {mse}')
print(f'R-squared: {r2}')

# Optionally, you can also print the coefficients and intercept
print('Coefficients:', model.coef_)
print('Intercept:', model.intercept_)

Mean Squared Error: 0.759695686433195
R-squared: 0.55142556195279
Coefficients: [ 1.13357765e-02 -1.80610813e-03 -2.93047218e-03 -1.04104164e-02
  1.71140717e-02  2.73090025e-03  4.10218463e-03 -6.95868135e-04
  3.97528463e-03  9.83633992e-02 -1.47254377e-03  1.74010095e-02
 -1.44082974e-03 -6.82180716e-04 -4.39986251e-04 -1.32529617e-02
  8.22557931e-03  2.31982009e-03 -6.43819653e-03 -5.29250892e-02
 -1.11509489e-02 -1.18579169e-01  2.93719538e-02  5.24663668e-02
 -5.50281953e-02 -3.83852967e-02  1.36451109e-02  1.04241723e-03
  1.62603862e-01  2.01010127e-02  8.74598462e-04 -2.40906450e-01
  1.68986369e-03 -1.93834889e-01  4.53335202e-03  1.03352299e+00
  3.00702623e-02 -1.21776378e-03  2.63663481e-02  2.12747208e-03
  5.82867088e-16  3.03426764e-03 -2.78757082e-04  1.42434020e-03
  5.40745470e-02  1.24705315e-02]
Intercept: 0.6926277159861004


##Random Forest Regressor

####Full-time Data

In [36]:
# Perform train-test split
X_train, X_test, y_train, y_test = train_test_split(Xscore, Yscore, test_size=0.2, random_state=42)

# Create a Random Forest Regressor model
rf_model = RandomForestRegressor(n_estimators=1000, random_state=42)

# Fit the model on the training data
rf_model.fit(X_train, y_train)

# Make predictions on the test set
y_pred = rf_model.predict(X_test)

# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f'Mean Squared Error: {mse}')
print(f'R-squared: {r2}')

# Feature Importance
feature_importance = rf_model.feature_importances_
print('Feature Importance:')
for feature, importance in zip(Xscore.columns, feature_importance):
    print(f'{feature}: {importance}')

# Optionally, you can also print other model diagnostics or hyperparameters
print('Number of Estimators:', rf_model.n_estimators)
print('Max Depth:', rf_model.max_depth)
# Add more as needed

# Other model diagnostics or visualizations can be added as needed
# For example, you might want to visualize the predicted vs. actual values, or residuals.

Mean Squared Error: 0.43334877091377094
R-squared: 0.7441223046773376
Feature Importance:
subEventName_Acceleration: 0.00870166753810036
subEventName_Air duel: 0.010253791718363839
subEventName_Ball out of the field: 0.005946473302377013
subEventName_Clearance: 0.008035918549215296
subEventName_Corner: 0.005952700596083749
subEventName_Cross: 0.007744607893828495
subEventName_Foul: 0.007699183593742654
subEventName_Free Kick: 0.007710784321210909
subEventName_Free kick cross: 0.0052628377386768075
subEventName_Free kick shot: 0.0036036830629307803
subEventName_Goal kick: 0.006942388198794756
subEventName_Goalkeeper leaving line: 0.003996759983372201
subEventName_Ground attacking duel: 0.00907503057703437
subEventName_Ground defending duel: 0.01081167086940117
subEventName_Ground loose ball duel: 0.009431207024307175
subEventName_Hand foul: 0.0026829264418091743
subEventName_Hand pass: 0.006323525319817767
subEventName_Head pass: 0.008374513695847852
subEventName_High pass: 0.0097265369

####Half-time Data

In [37]:
# Perform train-test split
X_train, X_test, y_train, y_test = train_test_split(Xscore_1H, Yscore_1H, test_size=0.2, random_state=42)

# Create a Random Forest Regressor model
rf_model = RandomForestRegressor(n_estimators=1000, random_state=42)

# Fit the model on the training data
rf_model.fit(X_train, y_train)

# Make predictions on the test set
y_pred = rf_model.predict(X_test)

# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f'Mean Squared Error: {mse}')
print(f'R-squared: {r2}')

# Feature Importance
feature_importance = rf_model.feature_importances_
print('Feature Importance:')
for feature, importance in zip(Xscore.columns, feature_importance):
    print(f'{feature}: {importance}')

# Optionally, you can also print other model diagnostics or hyperparameters
print('Number of Estimators:', rf_model.n_estimators)
print('Max Depth:', rf_model.max_depth)
# Add more as needed

# Other model diagnostics or visualizations can be added as needed
# For example, you might want to visualize the predicted vs. actual values, or residuals.

Mean Squared Error: 0.7913443127413127
R-squared: 0.532738125634987
Feature Importance:
subEventName_Acceleration: 0.011896855687478112
subEventName_Air duel: 0.01780625389734717
subEventName_Ball out of the field: 0.011257710287448075
subEventName_Clearance: 0.015610231610079952
subEventName_Corner: 0.01279901886767687
subEventName_Cross: 0.019973461419897028
subEventName_Foul: 0.015116477370533921
subEventName_Free Kick: 0.01237779643321076
subEventName_Free kick cross: 0.006964974666892523
subEventName_Free kick shot: 0.005504178031819294
subEventName_Goal kick: 0.010798281100588523
subEventName_Goalkeeper leaving line: 0.006957422844098695
subEventName_Ground attacking duel: 0.01785719355234606
subEventName_Ground defending duel: 0.023232453506986783
subEventName_Ground loose ball duel: 0.01583591562715956
subEventName_Hand foul: 0.003464567172216853
subEventName_Hand pass: 0.010968203183481537
subEventName_Head pass: 0.0154418590325096
subEventName_High pass: 0.016235645854642232


##Lasso Regression

###Full-time Data

In [38]:
# Perform train-test split
X_train, X_test, y_train, y_test = train_test_split(Xscore, Yscore, test_size=0.2, random_state=42)

# Create a Lasso Regression model
lasso_model = Lasso(alpha=0.01)  # You can adjust the alpha (penalty) parameter

# Fit the model on the training data
lasso_model.fit(X_train, y_train)

# Make predictions on the test set
y_pred = lasso_model.predict(X_test)

# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f'Mean Squared Error: {mse}')
print(f'R-squared: {r2}')

# Print the coefficients (some might be exactly zero due to L1 regularization)
print('Coefficients:', lasso_model.coef_)
print('Intercept:', lasso_model.intercept_)

Mean Squared Error: 0.44491933161942315
R-squared: 0.7372902825148879
Coefficients: [ 2.32061919e-03 -1.30207982e-03 -1.30741463e-03 -7.76926737e-04
 -7.00285074e-03 -4.85015375e-03 -2.75034044e-03 -7.82107690e-03
 -2.14620035e-02  3.40798238e-02 -7.26067831e-03  0.00000000e+00
 -2.98494693e-04 -1.30779423e-03  7.92560216e-04  0.00000000e+00
 -2.32298360e-03 -1.22045046e-03 -2.55085750e-03 -0.00000000e+00
 -1.03917456e-03  0.00000000e+00  2.77816395e-01 -0.00000000e+00
 -2.32541611e-02 -1.46129073e-02  2.95075898e-02 -1.74485219e-05
  0.00000000e+00  3.14654053e-04 -5.12700956e-03  0.00000000e+00
 -1.61741352e-03 -0.00000000e+00 -0.00000000e+00  5.79847537e-01
 -0.00000000e+00  0.00000000e+00  3.48619338e-02 -0.00000000e+00
  0.00000000e+00  0.00000000e+00 -0.00000000e+00  0.00000000e+00
 -0.00000000e+00  6.14489172e-01]
Intercept: 1.5534733608258606


###Half-time Data

In [53]:
# Perform train-test split
X_train, X_test, y_train, y_test = train_test_split(Xscore_1H, Yscore_1H, test_size=0.2, random_state=42)

# Create a Lasso Regression model
lasso_model = Lasso(alpha=0.01)  # You can adjust the alpha (penalty) parameter

# Fit the model on the training data
lasso_model.fit(X_train, y_train)

# Make predictions on the test set
y_pred = lasso_model.predict(X_test)

# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f'Mean Squared Error: {mse}')
print(f'R-squared: {r2}')

# Print the coefficients (some might be exactly zero due to L1 regularization)
print('Coefficients:', lasso_model.coef_)
print('Intercept:', lasso_model.intercept_)

Mean Squared Error: 0.7598343557219427
R-squared: 0.5513436824589542
Coefficients: [ 1.01284026e-02 -1.18076433e-03 -2.99943367e-03 -1.05819066e-02
  1.51111030e-02  3.42785542e-03  1.04730488e-03 -0.00000000e+00
  0.00000000e+00  6.31510001e-02 -2.35391378e-03  4.72485967e-03
 -0.00000000e+00 -8.96906976e-04 -1.49161611e-05 -0.00000000e+00
  3.54449122e-03  1.32575923e-03 -5.70515437e-03 -0.00000000e+00
 -1.12755879e-02 -0.00000000e+00  0.00000000e+00  0.00000000e+00
 -1.58778791e-02 -1.99637892e-03  1.60688247e-02  1.12295410e-03
  0.00000000e+00  2.00527413e-02  8.45345260e-04 -0.00000000e+00
  1.31879801e-03 -0.00000000e+00  0.00000000e+00  1.02431507e+00
  1.58517644e-02  0.00000000e+00  1.68664119e-02  0.00000000e+00
  0.00000000e+00  0.00000000e+00 -0.00000000e+00  0.00000000e+00
 -0.00000000e+00  6.76663796e-03]
Intercept: 0.5876955616256095
