In [1]:
# Import libraries
import pandas as pd
from statsbombpy import sb
import requests
import http.client
import json
import ScraperFC as sfc

Match Information

In [107]:
# Get all matches 
df_all_matches = sb.matches(competition_id=55, season_id=282)

# Get England's matches
df_england = df_all_matches[(df_all_matches['home_team'] == 'England') | (df_all_matches['away_team'] == 'England')]

# Euros Final 
match_id = 3943043

# Get match information
df = df_england[df_england['match_id'] == match_id]

drop_colns = ['kick_off','match_status','match_status_360','last_updated','last_updated_360','match_week',
              'data_version','shot_fidelity_version','xy_fidelity_version']

matchInfo = df.drop(drop_colns, axis=1).reset_index(drop=True)

# Column to assign home away team
def assign_home_away(row):
    if row['team'] == matchInfo['home_team'][0]:
        return 'home'
    else: return 'away'

# Export
# matchInfo.to_csv(r'./Match Info.csv', index=False)




In [3]:
homeTeam = matchInfo['home_team'][0]
awayTeam = matchInfo['away_team'][0]
homeManager = matchInfo['home_managers'][0]
awayManager = matchInfo['away_managers'][0]

Match Sheet

In [156]:
# Concat England and Spain lineup into one
players_list = []
teams = [homeTeam, awayTeam]

for i in range(len(teams)):
    lineup = sb.lineups(match_id=match_id)[teams[i]]
    fullLineup = lineup[['player_id','player_name','jersey_number','country','cards','positions']]
    fullLineup['team'] = teams[i]
    players_list.append(fullLineup)

players_df = pd.concat(players_list, axis=0, ignore_index=True).reset_index(drop=True)

players_df['home_away'] = players_df.apply(assign_home_away, axis=1)

# Get extract positions into a different column
def player_position(row):
    if row['positions']:
        return row['positions'][0]['position']
    return None

players_df['player_position'] = players_df.apply(player_position, axis=1)

# Get substitution status
def bench(row):
    
    if row['positions'] == []:
        return 'Bench'
    elif row['positions'][0]['from'] == '00:00':
        return 'Starter'
    else: return 'Substitute'

players_df['bench_status'] = players_df.apply(bench, axis=1)

# Add manager
def manager(row):
    if row['country'] == homeTeam:
        return homeManager
    else:
        return awayManager
    
players_df['team_manager'] = players_df.apply(manager, axis=1)

# Add cards
def cards(row):
    if row['cards']:
        return row['cards'][0]['time']
    return None

players_df['cards'] = players_df.apply(cards, axis=1)

# Filter
drop_colns = ['positions']
df_lineup = players_df.drop(drop_colns,axis=1).reset_index(drop=True)

# Export
#df_lineup.to_csv(r'./Match Lineups.csv', index=False)



Positions

In [5]:
# Get match events
matchEvents = sb.events(match_id=match_id)

# Filter columns
df_position = matchEvents[['id','match_id','player_id','location','player', 'position','team','minute']].sort_values(by=['minute'], 
                                                                                                  ascending=True).reset_index(drop=True)

# Split columns
df_position[['location_x', 'location_y']] = df_position['location'].apply(pd.Series)

# Assign a goal status by different time
timeFirstGoal = 46
timeSecondGoal = 72
timeThirdGoal = 85

def goalstatus(row):
    if row['minute'] <= timeFirstGoal:
        return '1-46'
    elif row['minute'] <= timeSecondGoal:
        return '46-72'
    elif row['minute'] <= timeThirdGoal:
        return '72-85'
    else:
        return '85-94'
    
df_position['goal_status'] = df_position.apply(goalstatus, axis=1)

df_position_team = df_position.groupby(['player']).agg({'location_x': ['mean'], 'location_y':['mean','count']}).reset_index()
df_position_status = df_position.groupby(['player','team','goal_status']).agg({'location_x': ['mean'], 'location_y':['mean']}).reset_index()

# Export
# df_position_status.to_csv(r'./Players Position Per Goal.csv', index=False)
# df_position_team.to_csv(r'./Players Position.csv', index=False)




In [6]:
# Split columns
matchEvents[['location_x', 'location_y']] = matchEvents['location'].apply(pd.Series)
matchEvents[['pass_end_location_x', 'pass_end_location_y']] = matchEvents['pass_end_location'].apply(pd.Series)
matchEvents['shot_end_location'] = matchEvents['shot_end_location'].astype(str)
matchEvents[['shot_end_location_x', 'shot_end_location_y', 'shot_end_location_z']] = matchEvents['shot_end_location'].str.strip('[]').str.split(', ', expand=True)

In [7]:
matchEvents[matchEvents['shot_outcome']=='Goal']['minute']

3236    46
3247    72
3249    85
Name: minute, dtype: int64

xG Dynamics

In [8]:
shotEvents = matchEvents[matchEvents['type'] == 'Shot']
df_xg = shotEvents[['id','period','match_id','player_id','location','player', 'position','team','minute',
             'shot_statsbomb_xg','shot_outcome']].reset_index(drop=True)

# Export
#df_xg.to_csv(r'./xG Dyanamics.csv', index=False)

Match Momentum

In [10]:
# Get the match id from the URL
matchURL = 'https://www.sofascore.com/football/match/england-spain/YTbsnUb#id:11874023' # input('Insert Sofascore matchURL: ')
sofamatchID = matchURL.split(':')[-1]
mainURL = matchURL.split('#')[0]
print(sofamatchID)
print(mainURL)


# Scrape data
url = "https://sofascore.p.rapidapi.com/matches/get-graph"
querystring = {"matchId":f"{sofamatchID}"}
headers = {
	"X-RapidAPI-Key": "02f78f4685mshc10883d2824f85fp15d8f7jsn4a10a9bc47ae",
	"X-RapidAPI-Host": "sofascore.p.rapidapi.com"
}

try:
    response = requests.get(url, headers=headers, params=querystring)
    if response.status_code == 200:
        # Resource has been modified, process response data
        matchMomentum = response.json() 
    else:
        # Handle other status codes
        print("Unexpected status code:", response.status_code)

except requests.RequestException as e:
    print("Error:", e)

match_momentum_df = pd.DataFrame(matchMomentum['graphPoints'])


# Create a distinct column for each row entry 

# Function to assign values based on conditions
def assign_home_team(row):
    if row['value'] > 0:
        return homeTeam
    else:
        return None

def assign_away_team(row):
    if row['value'] < 0:
        return awayTeam
    else:
        return None

# Apply the function to create the new column
match_momentum_df['homeTeam'] = match_momentum_df.apply(assign_home_team, axis=1)
match_momentum_df['awayTeam'] = match_momentum_df.apply(assign_away_team, axis=1)

# Export
#match_momentum_df.to_csv(r'./Match Momentum.csv', index=False)

11874023
https://www.sofascore.com/football/match/england-spain/YTbsnUb


Team Stats

In [11]:
sofascore = sfc.Sofascore()
matchStats= sofascore.scrape_team_match_stats(matchURL)

matchStats = matchStats[matchStats['period'] == 'ALL'].reset_index(drop=True)
matchStats = matchStats[['name','homeValue', 'awayValue']]

# Export
#matchStats.to_csv(r'./Match Stats.csv', index=False)

Running


In [12]:
statstry = matchStats.transpose().reset_index()
statstry.columns = statstry.iloc[0]
statstry = statstry.drop(0)

# Add a column to assign team name
def assign_team(row):
    if 'home' in row['name']:
        return homeTeam
    elif 'away' in row['name']:
        return awayTeam
    else: return None

statstry['Team'] = statstry.apply(assign_team, axis=1)

# ASSING GOAL
def assign_score(row):
    if 'home' in row['name']:
        return matchInfo['home_score'][0]
    else: return matchInfo['away_score'][0]

statstry['Goal'] = statstry.apply(assign_score, axis=1)


# Export
#statstry.to_csv(r'./Match Stats New.csv', index=False)

Player Stats

In [13]:
def get_player_stats(home_away):

    fbref = sfc.FBref()
    fbref_match_url = 'https://fbref.com/en/matches/e307ecc0/Spain-England-July-14-2024-UEFA-Euro-2024'
    a = fbref.scrape_match(fbref_match_url)

    # Get all the needed stats we want
    stats = ['Summary','Passing','Passing types','Defense','Possession','Misc','Keeper']
    
    summary = a[f'{home_away} Player Stats'][0][stats[0]]
    passing = a[f'{home_away} Player Stats'][0][stats[1]]
    passTypes = a[f'{home_away} Player Stats'][0][stats[2]]
    defense = a[f'{home_away} Player Stats'][0][stats[3]]
    possession = a[f'{home_away} Player Stats'][0][stats[4]]
    misc = a[f'{home_away} Player Stats'][0][stats[5]]
    keeper = a[f'{home_away} Player Stats'][0][stats[6]]

    # summary.columns = summary.columns.droplevel(0)
    # passing.columns = passing.columns.droplevel(0)
    # passTypes.columns = passTypes.columns.droplevel(0)
        
    # Join the dataframe
    totalTable = pd.concat([summary,passing,passTypes,defense,possession,misc], axis=1)
    totalTable['Team'] = home_away
    keeper['Team'] = home_away

    return totalTable, keeper

homePlayers = get_player_stats('Home')
homePlayers_Stats = homePlayers[0]
homePlayers_Stats.columns = ['_'.join(col).lower() for col in homePlayers_Stats.columns]
homeKeeper = homePlayers[1]
homeKeeper.columns = ['_'.join(col).lower() for col in homeKeeper.columns]

awayPlayers = get_player_stats('Away')
awayPlayers_Stats = awayPlayers[0]
awayPlayers_Stats.columns = ['_'.join(col).lower() for col in awayPlayers_Stats.columns]
awayKeeper = awayPlayers[1]
awayKeeper.columns = ['_'.join(col).lower() for col in awayKeeper.columns]

# Join the tablea on the rows
fullPlayerStats = pd.concat([homePlayers_Stats, awayPlayers_Stats],axis=0)
fullKeeperStats = pd.concat([homeKeeper, awayKeeper],axis=0)

# Export to cs
# homePlayers_Stats.to_csv(r'./Home Players Stats.csv', index=False)
# homeKeeper.to_csv(r'./Home Keeper Stats.csv', index=False)
# awayPlayers_Stats.to_csv(r'./Away Players Stats.csv', index=False)
# awayKeeper.to_csv(r'./Away Keeper Stats.csv', index=False)
#fullKeeperStats.to_csv(r'./Keepers Stats.csv', index=False)
#fullPlayerStats.to_csv(r'./Players Stats.csv', index=False)


Goalkeeper Shots Stats

In [174]:
home_keeper_name = df_lineup[(df_lineup['player_position'] == 'Goalkeeper') & (df_lineup['bench_status'] == 'Starter') & 
          (df_lineup['home_away'] == 'home')].reset_index(drop=True)['player_name'][0]
away_keeper_name = df_lineup[(df_lineup['player_position'] == 'Goalkeeper') & (df_lineup['bench_status'] == 'Starter') & 
          (df_lineup['home_away'] == 'away')].reset_index(drop=True)['player_name'][0]

In [175]:
def keeper_stats(home_away):
    
    if home_away == 'Home':
        team = awayTeam
        keeperName = home_keeper_name #'Unai Simón Mendibil'
    elif home_away == 'Away':
        team = homeTeam
        keeperName = away_keeper_name #'Jordan Pickford'

    # Get shots faced
    shotsFaced = matchEvents[(matchEvents['type'] == 'Shot') & (matchEvents['team'] == team)].reset_index(drop=True)

    # Filter shots columns
    shotsFaced = shotsFaced[['match_id','id','minute','period','player','player_id','shot_body_part',
        'shot_outcome', 'shot_statsbomb_xg','location_x', 'location_y','shot_end_location_x',
        'shot_end_location_y', 'shot_end_location_z','team', 'team_id']]
    shotsFaced['home_away'] = home_away
    # Crosses Against
    crossesAgainst = matchEvents[(matchEvents['pass_cross'] == True) & (matchEvents['team'] == team)].reset_index(drop=True)
    crossesAgainst = crossesAgainst[['match_id','id','minute','player','player_id',
                                        'location_x', 'location_y','pass_end_location_x',
                                          'pass_end_location_y','pass_outcome','goalkeeper_type','team', 'team_id']]
    crossesAgainst['home_away'] = home_away
    # Keeper Passes
    # All Passes - Then sort to passes insie and beyond own third
    keeperPasses = matchEvents[(matchEvents['type'] == 'Pass') & (matchEvents['player'] == keeperName)].reset_index(drop=True)
    keeperPasses = keeperPasses[['id','match_id','period','minute','player','player_id','position',
                'location_x','location_y','pass_recipient','pass_recipient_id',
                'pass_end_location_x','pass_end_location_y','pass_type',
                'pass_length','pass_outcome','pass_body_part','team']]
    keeperPasses['home_away'] = home_away
    return shotsFaced, crossesAgainst, keeperPasses


# Get home keeper stats
home_Keeper = keeper_stats('Home')
home_Keeper_shotsFaced = home_Keeper[0]
home_crossesAgainst = home_Keeper[1]
home_keeperPasses = home_Keeper[2]

# Get away keeper stats
away_Keeper = keeper_stats('Away')
away_Keeper_shotsFaced = away_Keeper[0]
away_crossesAgainst = away_Keeper[1]
away_keeperPasses = away_Keeper[2]

# Join the tablea on the rows
shotsFaced = pd.concat([home_Keeper_shotsFaced, away_Keeper_shotsFaced],axis=0)
crossesAgainst = pd.concat([home_crossesAgainst, away_crossesAgainst],axis=0)
keeperPasses = pd.concat([home_keeperPasses, away_keeperPasses],axis=0)


# Export
# shotsFaced.to_csv(r'./Keeper Shots Faced.csv', index=False)
#crossesAgainst.to_csv(r'./Keeper Crosses Against.csv', index=False)
# keeperPasses.to_csv(r'./Keeper Passes.csv', index=False)

Shots

In [15]:
# Filte columns from the shotevents dataframe
shotmapDF = shotEvents[['id','match_id','minute','player','player_id','shot_body_part',
        'shot_outcome', 'shot_statsbomb_xg','location_x', 'location_y','shot_end_location_x',
        'shot_end_location_y', 'shot_end_location_z','team', 'team_id']].reset_index(drop=True)


shotmapDF['home_away'] = shotmapDF.apply(assign_home_away, axis=1)

# Export
#shotmapDF.to_csv(r'./Teams Shotmap.csv', index=False)

Pass Network and Heatmap

In [16]:
passEvent = matchEvents[matchEvents['type'] == 'Pass'].reset_index(drop=True)

# Filter
passNetworkDF = passEvent[['id','match_id','period','minute','player','player_id','position','period',
                'location_x','location_y','pass_recipient','pass_recipient_id',
                'pass_end_location_x','pass_end_location_y','pass_type','pass_cross','shot_key_pass_id',
                'pass_height','pass_length','pass_outcome','pass_goal_assist','pass_shot_assist','team']]

passNetworkDF['home_away'] = passNetworkDF.apply(assign_home_away, axis=1)

def passoutcome(row):
    if pd.isna(row['pass_outcome']):
        return 'complete'
    elif row['pass_outcome'] == 'Incomplete':
        return 'incomplete'
    else:
        return None
    

# Applky the function and create a new column
passNetworkDF['pass_success'] = passNetworkDF.apply(passoutcome, axis = 1)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  passNetworkDF['home_away'] = passNetworkDF.apply(assign_home_away, axis=1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  passNetworkDF['pass_success'] = passNetworkDF.apply(passoutcome, axis = 1)


In [17]:
# Completed/Succesful Passes
completePass = passNetworkDF[passNetworkDF['pass_outcome'].isna()].reset_index(drop=True)

avgLocation = completePass.groupby(['player']).agg({'location_x': ['mean'], 'location_y':['mean','count']})
avgLocation.columns = ['location_x','location_y','count']


passBetween = completePass.groupby(['player','pass_recipient','team', 'home_away']).id.count().reset_index()
passBetween.rename(columns={'id':'pass_count'}, inplace=True)

passNetwork = passBetween.merge(avgLocation, left_on = ['player'], right_index=True)
passNetwork = passNetwork.merge(avgLocation, left_on= ['pass_recipient'], right_index=True, suffixes=['','_end'])

passNetwork = passNetwork[passNetwork['pass_count'] > 3]

# Export
#passBetween.to_csv(r'./Successful Passes Between.csv',  index=False)
#passNetwork.to_csv(r'./GamePassNetwork.csv', index=False)


Incomplete Passes

In [18]:
incompletePass = passNetworkDF[passNetworkDF['pass_success'] == 'incomplete'].reset_index(drop=True)

# Export
#incompletePass.to_csv(r'./Incomplete Passes.csv', index=False)

Recoveries

In [19]:
recoveryEvents = ['Duel','Interception','Ball Recovery']
recoveries = matchEvents[matchEvents['type'].isin(recoveryEvents)].reset_index(drop=True)

def ballrecovery(row):
    if row['ball_recovery_recovery_failure'] == True:
        return 'Failed'
    else: return 'Ball Recovered'

recoveries['ball_recovery_outcome'] = recoveries.apply(ballrecovery, axis=1)
recoveries['home_away'] = recoveries.apply(assign_home_away, axis=1)

recoveryDF = recoveries[['id','match_id','period','minute','player','player_id','position','period','type','home_away',
            'location_x','location_y','team','ball_recovery_outcome','duel_outcome','interception_outcome']].reset_index(drop=True)

# Export
#recoveryDF.to_csv(r'./Recoveris.csv', index=False)

Dribbles

In [20]:
dribbles = matchEvents[matchEvents['type'] == 'Dribble'].reset_index(drop=True)

dribbles['home_away'] = dribbles.apply(assign_home_away, axis=1)

# Filter colns
dribblesDF = dribbles[['id','match_id','period','minute','player','player_id','position','period','type','home_away',
            'location_x','location_y','team','dribble_outcome']].reset_index(drop=True)

# Export
#dribblesDF.to_csv(r'./Dribbles.csv', index=False)

Key Passes

In [78]:
keypass = matchEvents[matchEvents['shot_key_pass_id'].isna() == False].reset_index(drop=True)

keypass['home_away'] = keypass.apply(assign_home_away, axis=1)

# Filter Data
keyPassDF = keypass[['id','match_id','period','minute','player','player_id','position','period','home_away',
                'location_x','location_y','shot_key_pass_id','team']]

# a function that takes the 'shot_key_pass_id' from the keypassdf
keypassids = []
for i in range(len(keyPassDF)):
    keypassids.append(keyPassDF['shot_key_pass_id'][i])

# passes the id into the match events dataframe
idEvents = pd.DataFrame(columns=['id','location_x','location_y'])

for i in keypassids:

    # Pass ids into matchevent
    eventData = matchEvents[matchEvents['id'] == i]

    # Filter to get the location_x and location_y
    filterEvent = eventData[['id','location_x','location_y']].reset_index(drop=True)

    idEvents = pd.concat([idEvents, filterEvent], ignore_index=True)

# gets the location_x and location_y then replace it with the pass_end_location_x and y in the key pass df
joinedEvents = keyPassDF.merge(idEvents, left_on = ['shot_key_pass_id'], right_on=['id'], suffixes=['','_end'])


# Export
#joinedEvents.to_csv(r'./Key Passes.csv', index=False)

  idEvents = pd.concat([idEvents, filterEvent], ignore_index=True)


Crosses

In [85]:
# Crosses Against
crosses = matchEvents[matchEvents['pass_cross'] == True].reset_index(drop=True)

crosses['home_away'] = crosses.apply(assign_home_away, axis=1)

crossesDF = crosses[['id','minute','player','player_id','location_x', 'location_y','pass_end_location_x',
                    'pass_end_location_y','pass_outcome','period','team', 'home_away']]
#crossesDF.to_csv(r'./Crosses.csv', index=False)

Fouls

In [90]:
fouls = matchEvents[matchEvents['type'] == 'Foul Committed'].reset_index(drop=True)

fouls['home_away'] = fouls.apply(assign_home_away, axis=1)

foulsDF = fouls[['id','minute','player','player_id','location_x', 'location_y',
                 'period','team', 'home_away']]

# Export
#foulsDF.to_csv(r'./Fouls.csv', index=False)