In [1]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import json 
import time

#list of Football Leagues - FOTMOB ID's
league_ids = [47,87,55,54,53,57,61,48,268,112,40,71,140,185,64,46] 
#Eng1,2 Esp1,2 Pt1,2 Ita1 Ale1 Fra1 Bra1 Arg1 Bel1 Tur1 Sco1 Den1 Ned1

#Base link for league fixtures
league_fixtures_base_url = 'https://www.fotmob.com/api/fixtures' 

# Initialize an empty dictionary to store match IDs and their corresponding league IDs
match_ids = []

# Iterate through each league ID
for league_id in league_ids:
    # Construct the complete URL for the league and season
    complete_url = f"{league_fixtures_base_url}?id={league_id}&season=2023%2F2024"
    response = requests.get(complete_url, headers={'User-Agent': 'Chrome/115.0.0.0'})

    if response.status_code == 200:
        json_response = response.json()
        # Extract 'id' values (match IDs) and add them to the dictionary
        for item in json_response:
            id_value = item['id']
            if id_value:
                match_ids.append(id_value)
    else:
        print(f"Failed to fetch data for league_id={league_id}")
        
print(match_ids)

['4193450', '4193451', '4193452', '4193453', '4193454', '4193455', '4193456', '4193457', '4193458', '4193459', '4193476', '4193472', '4193473', '4193479', '4193477', '4193475', '4193470', '4193478', '4193471', '4193485', '4193480', '4193481', '4193482', '4193486', '4193487', '4193483', '4193484', '4193489', '4193488', '4193497', '4193499', '4193491', '4193493', '4193494', '4193498', '4193492', '4193495', '4193496', '4193490', '4193509', '4193501', '4193503', '4193504', '4193507', '4193508', '4193505', '4193500', '4193502', '4193506', '4193515', '4193517', '4193518', '4193511', '4193513', '4193510', '4193512', '4193514', '4193516', '4193519', '4193521', '4193520', '4193522', '4193524', '4193525', '4193528', '4193529', '4193527', '4193526', '4193523', '4193474', '4193536', '4193532', '4193534', '4193535', '4193537', '4193533', '4193531', '4193538', '4193539', '4193530', '4193544', '4193540', '4193542', '4193545', '4193546', '4193547', '4193543', '4193548', '4193541', '4193549', '4193555'

In [2]:
### Scrapping all matches to get its shotmap ###

# Define the base URL
base_url = 'https://www.fotmob.com/api/matchDetails?matchId='

# User-Agent header
headers = {'User-Agent': 'Chrome/115.0.0.0'}

# Initialize a list to store match data
all_match_data = []

for match_id in match_ids:  # Use keys() to iterate through the keys of the dictionary
    # Construct the URL for the current matchId
    url = f'{base_url}{match_id}'

    # Send an HTTP request
    response_ = requests.get(url, headers=headers)

    if response_.status_code == 200:   #status code 200 means it is good to go
        # Parse the JSON response
        match_details = response_.json()

        # Extract the matchId, homeTeam, and awayTeam from the 'general' section
        match_id = match_details.get('general', {}).get('matchId')
        home_team_name = match_details['general']['homeTeam']['name']
        away_team_name = match_details['general']['awayTeam']['name']

        # Extract the shots data from the 'matchStats' section
        match_stats = match_details.get('content', {})
        shots_data = match_stats.get('shotmap', {}).get('shots', [])

        # Add the matchId, homeTeamName, and awayTeamName to every shot within the match
        for shot in shots_data:
            shot['matchId'] = match_id
            shot['homeTeamName'] = home_team_name
            shot['awayTeamName'] = away_team_name

        # Append the shots_data to the all_match_data list
        all_match_data.append(shots_data)

    else:
        print(f"Failed to fetch data for match ID {match_id}. Status code: {response.status_code}")

# Count the number of match IDs in match_to_league_dict and the number of shots in all_match_data
num_match_ids = len(match_ids)
num_shots_data_entries = len(all_match_data)

# Compare the counts
if num_match_ids == num_shots_data_entries:
    print(f"Number of match IDs and shot data entries match: {num_match_ids}")
else:
    print(f"Number of match IDs and shot data entries do not match: {num_match_ids} vs {num_shots_data_entries}")

Number of match IDs and shot data entries match: 5393


In [4]:
# FCreate a pandas dataframe with the data
flattened_data = []
for match_data in all_match_data:
    for shot in match_data:
        flattened_data.append({
            'matchId': shot.get('matchId', None),
            'id': shot.get('id', None),
            'eventType': shot.get('eventType', None),
            'teamId': shot.get('teamId', None),
            'playerId': shot.get('playerId', None),
            'playerName': shot.get('playerName', None),
            'x': shot.get('x', None),
            'y': shot.get('y', None),
            'min': shot.get('min', None),
            'minAdded': shot.get('minAdded', None),
            'isBlocked': shot.get('isBlocked', None),
            'isOnTarget': shot.get('isOnTarget', None),
            'blockedX': shot.get('blockedX', None),
            'blockedY': shot.get('blockedY', None),
            'goalCrossedY': shot.get('goalCrossedY', None),
            'goalCrossedZ': shot.get('goalCrossedZ', None),
            'expectedGoals': shot.get('expectedGoals', None),
            'expectedGoalsOnTarget': shot.get('expectedGoalsOnTarget', None),
            'shotType': shot.get('shotType', None),
            'situation': shot.get('situation', None),
            'period': shot.get('period', None),
            'isOwnGoal': shot.get('isOwnGoal', None),
            'isSavedOffLine': shot.get('isSavedOffLine', None),
            'firstName': shot.get('firstName', None),
            'lastName': shot.get('lastName', None),
            'fullName': shot.get('fullName', None),
            'teamColor': shot.get('teamColor', None),
            'homeTeam': shot.get('homeTeamName', None),
            'awayTeam': shot.get('awayTeamName', None)
        })

df = pd.DataFrame(flattened_data)

print(df)

       matchId          id     eventType  teamId  playerId       playerName  \
0      4193450  2575439147          Goal    8456    737066   Erling Haaland   
1      4193450  2575440765          Miss    8456    169200  Kevin De Bruyne   
2      4193450  2575447047          Miss    8456    737066   Erling Haaland   
3      4193450  2575450695          Miss    8191   1368318    Luca Koleosho   
4      4193450  2575454289  AttemptSaved    8191   1067168     Zeki Amdouni   
...        ...         ...           ...     ...       ...              ...   
56734  4192376  2622068895          Miss    8113    891678         Paulinho   
56735  4192376  2622070291          Goal    8113   1374500       Franculino   
56736  4192376  2622074933          Miss    8113   1329007     Dario Osorio   
56737  4192376  2622077687  AttemptSaved    8113   1374500       Franculino   
56738  4192376  2622078157  AttemptSaved    8113   1026781     Gue-Sung Cho   

                x          y  min  minAdded  ...   

In [5]:
df.to_excel('League_shotmaps.xlsx', index=False)