In [14]:
import httpx
from dataclasses import dataclass, asdict
import pandas as pd
from openpyxl.workbook import Workbook
import os

In [2]:
@dataclass
class ClubStats:
    cleanSheets: int
    ownGoals: int
    yellowCards: int
    redCards: int
    fouls: int
    offsides: int
    passes: int
    shots: int
    shotsOnTarget: int
    hitWoodwork: int
    penaltiesScored: int

In [3]:
@dataclass
class ClubData:
    name: str
    position: int
    stadium: str
    capacity: int
    city: str
    #home_data
    home_played: int
    home_won: int
    home_drawn: int
    home_lost: int
    home_goalsFor: int
    home_goalsAgainst: int
    home_points: int

    #away_data
    away_played: int
    away_won: int
    away_drawn: int
    away_lost: int
    away_goalsFor: int
    away_goalsAgainst: int
    away_points: int

    #overall_data
    played: int
    won: int
    drawn: int
    lost: int
    goalsFor: int
    goalsAgainst: int
    points: int

In [4]:
def get_ClubData():
    url = "https://footballapi.pulselive.com/football/standings"
    querystring = {
    "altIds": "true",
    "detail": "1",
    "FOOTBALL_COMPETITION": "1",
    "comps": "1"
    }
    headers = {
    "accept": "*/*",
    "accept-language": "en-US,en;q=0.9",
    "content-type": "application/x-www-form-urlencoded; charset=UTF-8",
    "^if-none-match": "W/^0fd4c028efcbfea9a8933c1f4030985fc^^",
    "origin": "https://www.premierleague.com",
    "referer": "https://www.premierleague.com/",
    "^sec-ch-ua": "^Microsoft",
    "sec-ch-ua-mobile": "?0",
    "^sec-ch-ua-platform": "^Windows^^",
    "sec-fetch-dest": "empty",
    "sec-fetch-mode": "cors",
    "sec-fetch-site": "cross-site",
    "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36 Edg/123.0.0.0"
    }

    try:
        response = httpx.get(url, headers=headers, params=querystring, timeout= 10)
        return response.json()['tables'][0]['entries']
    except httpx.TimeoutException:
        print("Request timed out.")

get_ClubData()

[{'team': {'name': 'Manchester United',
   'club': {'name': 'Manchester United',
    'shortName': 'Man Utd',
    'abbr': 'MUN',
    'id': 12},
   'teamType': 'FIRST',
   'shortName': 'Man Utd',
   'id': 12,
   'altIds': {'opta': 't1'}},
  'position': 1,
  'startingPosition': 1,
  'overall': {'played': 1221,
   'won': 741,
   'drawn': 267,
   'lost': 213,
   'goalsFor': 2288,
   'goalsAgainst': 1155,
   'goalsDifference': 1133,
   'points': 2490},
  'home': {'played': 610,
   'won': 425,
   'drawn': 115,
   'lost': 70,
   'goalsFor': 1273,
   'goalsAgainst': 460,
   'goalsDifference': 813,
   'points': 1390},
  'away': {'played': 611,
   'won': 316,
   'drawn': 152,
   'lost': 143,
   'goalsFor': 1015,
   'goalsAgainst': 695,
   'goalsDifference': 320,
   'points': 1100},
  'ground': {'name': 'Old Trafford',
   'city': 'Manchester',
   'capacity': 75635,
   'location': {'latitude': 53.4626, 'longitude': -2.29103},
   'source': 'OPTA',
   'id': 42}},
 {'team': {'name': 'Arsenal',
   'clu

In [5]:
def parse_ClubData(data):
    clubs_data = []
    for clubs in data:
        try:
            new_club = ClubData(
                name= clubs['team']['name'],
                position= clubs['position'],
                stadium= clubs['ground']['name'],
                capacity= clubs['ground'].get('capacity', None),
                city= clubs['ground']['city'],
                #home_data
                home_played= clubs['home']['played'],
                home_won= clubs['home']['won'],
                home_drawn= clubs['home']['drawn'],
                home_lost= clubs['home']['lost'],
                home_goalsFor= clubs['home']['goalsFor'],
                home_goalsAgainst= clubs['home']['goalsAgainst'],
                home_points= clubs['home']['points'],

                #away_data
                away_played= clubs['away']['played'],
                away_won= clubs['away']['won'],
                away_drawn= clubs['away']['drawn'],
                away_lost= clubs['away']['lost'],
                away_goalsFor= clubs['away']['goalsFor'],
                away_goalsAgainst= clubs['away']['goalsAgainst'],
                away_points= clubs['away']['points'],

                #overall_data
                played= clubs['overall']['played'],
                won= clubs['overall']['won'],
                drawn= clubs['overall']['drawn'],
                lost= clubs['overall']['lost'],
                goalsFor= clubs['overall']['goalsFor'],
                goalsAgainst= clubs['overall']['goalsAgainst'],
                points= clubs['overall']['points']
            )
            clubs_data.append(asdict(new_club))
        except:
            print(f"Skipping entry due to missing key: {e}")
    return clubs_data

In [6]:
def get_ClubStats(data):
    club_stats = []
    for club in data:
        id = club['team']['id']
        print(id)
        # clubIDs.append(id)
        querystring = {
        "altIds": "true",
        "detail": "1",
        "FOOTBALL_COMPETITION": "1",
        "comps": "1"
        }
        headers = {
        "accept": "*/*",
        "accept-language": "en-US,en;q=0.9",
        "content-type": "application/x-www-form-urlencoded; charset=UTF-8",
        "^if-none-match": "W/^0fd4c028efcbfea9a8933c1f4030985fc^^",
        "origin": "https://www.premierleague.com",
        "referer": "https://www.premierleague.com/",
        "^sec-ch-ua": "^Microsoft",
        "sec-ch-ua-mobile": "?0",
        "^sec-ch-ua-platform": "^Windows^^",
        "sec-fetch-dest": "empty",
        "sec-fetch-mode": "cors",
        "sec-fetch-site": "cross-site",
        "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36 Edg/123.0.0.0"
        }
        url = f"https://footballapi.pulselive.com/football/stats/team/{id}"
        try:
            response = httpx.get(url, headers=headers, params=querystring, timeout= 10)
            stats_json = response.json()['stats']
            if stats_json:
                club_stats.append(stats_json)
        except httpx.TimeoutException:
            print("Request timed out.")
    return club_stats

In [7]:
def parse_ClubStats(data):
    club_stats = []
    for clubs in data:
        try:
            new_club = ClubStats(
                cleanSheets=int(next((item['value'] for item in clubs if item['name'] == 'clean_sheet'), 0)),
                ownGoals=int(next((item['value'] for item in clubs if item['name'] == 'own_goals'), 0)),
                yellowCards=int(next((item['value'] for item in clubs if item['name'] == 'total_yel_card'), 0)),
                redCards=int(next((item['value'] for item in clubs if item['name'] == 'total_red_card'), 0)),
                fouls=int(next((item['value'] for item in clubs if item['name'] == 'attempted_tackle_foul'), 0)),
                offsides=int(next((item['value'] for item in clubs if item['name'] == 'total_offside'), 0)),
                passes=int(next((item['value'] for item in clubs if item['name'] == 'total_pass'), 0)),
                shots=int(next((item['value'] for item in clubs if item['name'] == 'total_scoring_att'), 0)),
                shotsOnTarget=int(next((item['value'] for item in clubs if item['name'] == 'ontarget_scoring_att'), 0)),
                hitWoodwork=int(next((item['value'] for item in clubs if item['name'] == 'hit_woodwork'), 0)),
                penaltiesScored=int(next((item['value'] for item in clubs if item['name'] == 'att_pen_goal'), 0))
            )
            club_stats.append(asdict(new_club))
        except:
            print(f"Skipping entry due to missing key: {e}")
    return club_stats

In [11]:
def createExcel(data, stats):
    df1 = pd.DataFrame(data)
    df2 = pd.DataFrame(stats)
    df_merged = pd.concat([df1, df2], ignore_index=False, axis =1)
    df_merged.to_excel('clubs_data.xlsx', index=False, header=not os.path.exists('clubs_data.xlsx'))

In [15]:
def main():
    json_data = get_ClubData()
    clubs_data = parse_ClubData(json_data)
    
    json_stats = get_ClubStats(json_data)
    clubs_stats = parse_ClubStats(json_stats)

    createExcel(clubs_data, clubs_stats)

if __name__ == "__main__":
    main()

12
1
10
4
21
11
7
23
2
25
20
3
26
9
34
13
6
29
27
36
42
38
5
14
19
22
31
43
39
131
45
17
15
35
37
127
33
28
18
8
41
130
40
16
46
32
159
44
30
24
163
