In [13]:
import pandas as pd
import numpy as np
import requests
import json
from src.config.secrets import settings

## Prelude: Sample coding

In [14]:
# base Url and API key
url = settings.API_BASE_URL
api_key = settings.API_KEY

# Headers
headers = {
    'x-apisports-key': api_key,
    }


# Qeury
league_param = {
    "id": "39",
    "name": "Premier League",
    "code": "GB-ENG",
    "season": 2024,
    "current": False,
    "type": "league",
    "country": "England"}


team_param = {
    "league": "39",
    "season": "2024"
}


team_stats_param = {
    "league": "39",
    "season": 2024,
    'team': "33"
}


players_param = {
    "league": "39",
    "season": "2024",
}

In [3]:
print("Request Url :" + url)

Request Url : https://v3.football.api-sports.io/


In [8]:
# League Request
response_l = requests.get(url= url+"leagues", headers=headers, params=league_param)
print(response_l.status_code)

200


In [9]:
league_data = response_l.json()
league_data.keys()

dict_keys(['get', 'parameters', 'errors', 'results', 'paging', 'response'])

In [10]:
league_data['response']

[{'league': {'id': 39,
   'name': 'Premier League',
   'type': 'League',
   'logo': 'https://media.api-sports.io/football/leagues/39.png'},
  'country': {'name': 'England',
   'code': 'GB-ENG',
   'flag': 'https://media.api-sports.io/flags/gb-eng.svg'},
  'seasons': [{'year': 2024,
    'start': '2024-08-16',
    'end': '2025-05-25',
    'current': False,
    'coverage': {'fixtures': {'events': True,
      'lineups': True,
      'statistics_fixtures': True,
      'statistics_players': True},
     'standings': True,
     'players': True,
     'top_scorers': True,
     'top_assists': True,
     'top_cards': True,
     'injuries': True,
     'predictions': True,
     'odds': False}}]}]

In [15]:
# Teams Request
response_t = requests.get(url= url+"teams", headers=headers, params=team_param)
print(response_t.status_code)

200


In [16]:
team_dict = response_t.json()
team_dict.keys()

dict_keys(['get', 'parameters', 'errors', 'results', 'paging', 'response'])

In [21]:
team_data = team_dict['response']
team_df = pd.DataFrame([team_data[i]['team'] for i in range(len(team_data))]).set_index('id')
team_df.index.tolist()

[33,
 34,
 35,
 36,
 39,
 40,
 41,
 42,
 45,
 46,
 47,
 48,
 49,
 50,
 51,
 52,
 55,
 57,
 65,
 66]

In [29]:
# Player data Request
response_ts = requests.get(url=  url+"teams/statistics", headers=headers, params=team_stats_param)
print(response_ts.status_code)

200


In [30]:
stats_data = response_ts.json()
stats_data.keys()

dict_keys(['get', 'parameters', 'errors', 'results', 'paging', 'response'])

In [33]:
stats = stats_data['errors']
print(type(stats))
stats

<class 'dict'>


{'plan': 'Free plans do not have access to this season, try from 2022 to 2024.'}

In [13]:
# For team id
stats['team']['id']

# for form
stats['form']

#for fixtures
stats['fixtures']['wins']['home']
stats['fixtures']['wins']['away']
stats['fixtures']['loses']['home']
stats['fixtures']['loses']['away']
stats['fixtures']['draws']['home']
stats['fixtures']['draws']['away']

# for goals
stats['goals']['for']['total']['home']
stats['goals']['for']['total']['away']
stats['goals']['against']['total']['home']
stats['goals']['against']['total']['away']

# for clean sheets
stats['clean_sheet']['home']
stats['clean_sheet']['away']

# for failed to score
stats['failed_to_score']['home']
stats['failed_to_score']['away']

# for penalties
stats['penalty']['scored']['total']
stats['penalty']['missed']['total']

# for cards
sum((bucket.get("total") or 0) for bucket in stats['cards']['yellow'].values())
sum((bucket.get("total") or 0) for bucket in stats['cards']['red'].values())

3

In [12]:
# stats is your dict, e.g. stats["cards"]["yellow"]["0-15"]["total"]

yellow_totals_sum = sum((bucket.get("total") or 0) for bucket in stats['cards']['yellow'].values())

print(yellow_totals_sum)

86


In [60]:
def get_teams(season, league=39):
    team_param = {
    "league": league,
    "season": season
    }
    response_t = requests.get(url= url+"teams", headers=headers, params=team_param)
    if response_t.status_code == 200:
        team_dict = response_t.json()
        team_data = team_dict['response']
        team_df = pd.DataFrame([team_data[i]['team'] for i in range(len(team_data))])
        return team_df
    else:
        print(f"Error: {response_t.status_code}")
        return None

In [None]:
def get_team_stats(team_id, season=2024, league_id=39):
    team_stats_param = {
        "league": "39",
        "season": season,
        'team': team_id
    }
    response_ts = requests.get(url= url+"teams/statistics", headers=headers, params=team_stats_param)
    if response_ts.status_code == 200:
        stats_data = response_ts.json()
        return stats_data['response']
    else:
        print(f"Failed to retrieve data for team ID {team_id}. Status code: {response_ts.status_code}")
        return None

In [2]:
import pickle
def load_data(file_path):
    with open(file_path, 'rb') as f:
        data = pickle.load(f)
    return data

In [None]:
# Loading Team data

team_24_df = load_data("../data/2024/teams_2024.pkl")
#team_23_df = load_data("../data/2023/teams_2023.pkl")
#team_22_df = load_data("../data/2022/teams_2022.pkl")


In [22]:
# Loading Team stats data
#team_stats_24_df = load_data("../data/2024/team_stats_2024.pkl")
team_stats_23_df = load_data("../data/2023/team_stats_2023.pkl")
#team_stats_22_df = load_data("../data/2022/team_stats_2022.pkl")

In [10]:
team_23_df.head()

NameError: name 'team_23_df' is not defined

In [23]:
team_stats_23_df.tail()

Unnamed: 0,id,form,fixture_hw,fixtures_aw,fixtures_hl,fixtures_al,fixtures_hd,fixtures_ad,goals_h,goals_a,conceded_h,conceded_a,clean_sheet_h,clean_sheet_a,fts_h,fts_a,penalty_scored,penalty_missed,cards_yellow,cards_red
15,55,DWDDLLDLWWWLLWLLLLLWLLWLLLDLLDDDWWLDWL,5,5,7,12,7,2,29,27,34,31,4,3,3,5,3,0,89,2
16,62,LLLDLLLLLLWDLLLWLDLLDLLWLLLDDLDLLLLLLL,2,1,13,15,4,3,19,16,57,47,1,0,7,8,5,0,101,5
17,65,LWLWDLDDDLWLLLLDLLWWLLDLWLLLDDWLDLLWLW,5,4,9,11,5,4,27,22,30,37,2,2,4,6,1,0,82,3
18,66,LWWLWWWDWWLWWDWWWDLWDLWLWWWLDWLDWWDLDL,12,8,3,7,4,4,48,28,28,33,4,4,1,5,4,0,94,2
19,1359,LLLLDWLLDLDLWLLLWWLDWDLLLLDLDLLWLLLDLL,4,2,11,13,4,4,28,24,37,48,2,0,1,4,5,0,71,0


In [24]:
team_stats_23_df.isna().sum()

id                0
form              0
fixture_hw        0
fixtures_aw       0
fixtures_hl       0
fixtures_al       0
fixtures_hd       0
fixtures_ad       0
goals_h           0
goals_a           0
conceded_h        0
conceded_a        0
clean_sheet_h     0
clean_sheet_a     0
fts_h             0
fts_a             0
penalty_scored    0
penalty_missed    0
cards_yellow      0
cards_red         0
dtype: int64