In [59]:
import requests
from dotenv import load_dotenv
import os
import pandas as pd

load_dotenv()

# access football-data.org API
API_KEY = os.getenv('FOOTBALL_DATA_API_KEY')

''' GET IDS and NAMES for EPL CLUBS '''
url = 'https://api.football-data.org/v4/competitions/PL/standings'

headers = {
    'X-Auth-Token': API_KEY
}

response = requests.get(url, headers=headers)
print(response)

if response.status_code == 200:
    print('Data retrieved from football-data API')
    data = response.json()
    clubs = data["standings"][0]["table"]
    club_data = pd.DataFrame([{
        'name': c["team"]["shortName"],
        'id': c["team"]["id"]
    } for c in clubs])
    print(club_data)

else:
    print(f"failed to retrieve data: {response.status_code}")

<Response [200]>
Data retrieved from football-data API
              name    id
0         Man City    65
1        Liverpool    64
2          Arsenal    57
3      Aston Villa    58
4          Chelsea    61
5    Brighton Hove   397
6       Nottingham   351
7        Tottenham    73
8        Brentford   402
9           Fulham    63
10     Bournemouth  1044
11       Newcastle    67
12        West Ham   563
13      Man United    66
14  Leicester City   338
15         Everton    62
16  Crystal Palace   354
17    Ipswich Town   349
18   Wolverhampton    76
19     Southampton   340


In [65]:
''' SHOW firsthalf 0-0 instances for a given club '''

club_id = 354
url = f"https://api.football-data.org/v4/teams/{club_id}/matches?season=2024&status=FINISHED"

response = requests.get(url, headers=headers)
print(response)

if response.status_code == 200:
    print('Data retrieved from football-data API')
    data = response.json()
    matches_data = data["matches"]
    ht_scores = pd.DataFrame([{
        'home_squad': m["homeTeam"]["tla"],
        'ht_score_home': m["score"]["halfTime"]["home"],
        'away_squad': m["awayTeam"]["tla"],
        'ht_score_away': m["score"]["halfTime"]["away"],
        'nil_nil_ht': (True if (
            (m["score"]["halfTime"]["home"] == 0 and m["score"]["halfTime"]["away"] == 0)
        )else (False))

    } for m in matches_data])
    # print(ht_scores["nil_nil_ht"].value_counts(normalize=True).get(True, 0) * 100)
    nil_nil_ht_pct = ht_scores["nil_nil_ht"].value_counts(normalize=True).get(True, 0) *100
    print(ht_scores)
else:
    print(f"failed to retrieve data: {response.status_code}")

<Response [200]>
Data retrieved from football-data API
  home_squad  ht_score_home away_squad  ht_score_away  nil_nil_ht
0        MUN              0        FUL              0        True
1        BHA              1        MUN              0       False
2        MUN              0        LIV              2       False
3        SOU              0        MUN              2       False
4        CRY              0        MUN              0        True
5        MUN              0        TOT              1       False
6        AVL              0        MUN              0        True
7        MUN              0        BRE              1       False
8        WHU              0        MUN              0        True


In [70]:
import time

def calculate_ht_nil_nil_percentage(club_id):
    url = f"https://api.football-data.org/v4/teams/{club_id}/matches?season=2024&status=FINISHED"
    response = requests.get(url, headers=headers)
    if response.status_code == 200:
        # print('input', club_id)
        data = response.json()
        club_matches_data = data["matches"]
        new_ht_scores = pd.DataFrame([{
            'nil_nil_ht': (True if (
                (m["score"]["halfTime"]["home"] == 0 and m["score"]["halfTime"]["away"] == 0)
            )else False)
        } for m in matches_data])
        nil_nil_ht_pct = new_ht_scores["nil_nil_ht"].value_counts(normalize=True).get(True, 0) *100
        return nil_nil_ht_pct
    else:
        print(f"failed to retrieve data: {response.status_code} club_id = {club_id}")

results = []
# we run into an issue: I am limited to 10 calls per minute on my free-tier. I can't run this for 19 clubs like I wrote it here.
# I abbreviate it with head(9)
for row in club_data.tail(9).itertuples():
    print(row)
    club_id = row.id
    print(club_id)
    name = row.name
    percent_nil_nil_ht = calculate_ht_nil_nil_percentage(club_id)
    print('TEST:', percent_nil_nil_ht)
    results.append({
        'name': name,
        'percent_nil_nil_ht': percent_nil_nil_ht
    })
print(results_df)


Pandas(Index=11, name='Newcastle', id=67)
67
TEST: 44.44444444444444
Pandas(Index=12, name='West Ham', id=563)
563
TEST: 44.44444444444444
Pandas(Index=13, name='Man United', id=66)
66
failed to retrieve data: 429 club_id = 66
TEST: None
Pandas(Index=14, name='Leicester City', id=338)
338
failed to retrieve data: 429 club_id = 338
TEST: None
Pandas(Index=15, name='Everton', id=62)
62
failed to retrieve data: 429 club_id = 62
TEST: None
Pandas(Index=16, name='Crystal Palace', id=354)
354
failed to retrieve data: 429 club_id = 354
TEST: None
Pandas(Index=17, name='Ipswich Town', id=349)
349
failed to retrieve data: 429 club_id = 349
TEST: None
Pandas(Index=18, name='Wolverhampton', id=76)
76
failed to retrieve data: 429 club_id = 76
TEST: None
Pandas(Index=19, name='Southampton', id=340)
340
failed to retrieve data: 429 club_id = 340
TEST: None
             name  percent_nil_nil_ht
0       Newcastle           22.222222
1        West Ham           22.222222
2      Man United           22.

In [60]:
print(calculate_ht_nil_nil_percentage(65))

22.22222222222222


In [68]:
print(calculate_ht_nil_nil_percentage(67))

44.44444444444444
