In [102]:
import sys
import os
import numpy as np
import pandas as pd
from scipy import stats
from matplotlib import pyplot as plt
import asyncio
import json
import aiohttp
from understat import Understat
from collections import namedtuple
from types import SimpleNamespace


In [103]:
# Get each team from that league
async with aiohttp.ClientSession() as session:
        understat = Understat(session)
        table = await understat.get_league_table("La_Liga", "2024")

leagueTable = pd.DataFrame(table[1:], columns=table[0])
leagueTable

Unnamed: 0,Team,M,W,D,L,G,GA,PTS,xG,NPxG,xGA,NPxGA,NPxGD,PPDA,OPPDA,DC,ODC,xPTS
0,Barcelona,32,23,4,5,88,32,73,84.04,78.84,35.12,32.89,45.95,6.86,15.23,386,105,71.34
1,Real Madrid,31,20,6,5,64,31,66,70.26,61.35,35.02,32.79,28.55,11.25,15.7,300,141,61.13
2,Atletico Madrid,32,18,9,5,53,27,63,57.48,52.28,34.28,30.56,21.72,13.9,14.12,237,160,57.16
3,Athletic Club,31,15,12,4,49,25,57,52.1,47.64,32.94,29.88,17.76,11.38,11.17,172,121,52.91
4,Villarreal,30,14,9,7,53,40,51,59.84,54.49,38.37,34.65,19.84,12.62,10.94,236,165,54.99
5,Real Betis,31,13,9,9,42,39,48,51.68,44.99,39.8,36.82,8.16,10.15,10.17,204,151,50.81
6,Mallorca,32,12,8,12,31,37,44,39.8,36.08,38.7,36.47,-0.39,12.3,9.27,125,192,46.24
7,Celta Vigo,32,12,7,13,47,49,43,47.13,41.93,39.45,37.08,4.85,11.41,14.3,221,155,49.71
8,Real Sociedad,31,12,5,14,30,34,41,37.57,36.09,38.71,34.25,1.83,8.09,12.12,187,151,42.56
9,Rayo Vallecano,32,10,11,11,35,39,41,42.29,41.46,47.93,43.47,-2.01,8.37,10.44,148,175,40.23


In [104]:
async with aiohttp.ClientSession() as session:
        understat = Understat(session)
        team_stats = await understat.get_team_stats("Barcelona", 2024)

def flatten_stat_section(section_name, section_data):
    df = pd.DataFrame.from_dict(section_data, orient="index")
    df.reset_index(inplace=True)
    df.rename(columns={"index": section_name}, inplace=True)

    # Flatten the nested 'against' dictionary
    if 'against' in df.columns:
        against_df = pd.json_normalize(df['against'])  # Convert dict to columns
        against_df.columns = [f"against_{col}" for col in against_df.columns]  # Rename columns
        df = pd.concat([df.drop(columns=['against']), against_df], axis=1)  # Combine and remove old column

    if "stat" in df.columns:
        df = df.drop(columns=['stat'])

    return df

situation = flatten_stat_section("situation", team_stats["situation"])
timing = flatten_stat_section("timing", team_stats["timing"])
shot_zone = flatten_stat_section("shotZone", team_stats["shotZone"])
attack_speed = flatten_stat_section("attackSpeed", team_stats["attackSpeed"])

In [105]:
# List dataframes
df_dict = {
    "situation": situation,
    "timing": timing,
    "shot_zone": shot_zone,
    "attack_speed": attack_speed,
}

# Display one under the other
for df in df_dict:
    display(df_dict[df])

Unnamed: 0,situation,shots,goals,xG,against_shots,against_goals,against_xG
0,OpenPlay,419,69,69.554013,196,27,27.258089
1,FromCorner,76,7,7.062155,35,2,2.858903
2,DirectFreekick,23,1,1.617523,7,0,0.353514
3,SetPiece,19,5,4.575287,11,0,2.608172
4,Penalty,7,6,5.202943,3,3,2.229833


Unnamed: 0,timing,shots,goals,xG,against_shots,against_goals,against_xG
0,1-15,70,12,12.182448,35,4,4.964253
1,16-30,83,17,15.54575,33,3,3.490968
2,31-45,83,7,11.318266,42,7,8.072534
3,46-60,121,19,18.323094,48,5,5.918752
4,61-75,80,12,13.474933,39,6,6.099062
5,76+,107,21,17.167431,55,7,6.762941


Unnamed: 0,shotZone,shots,goals,xG,against_shots,against_goals,against_xG
0,ownGoals,3,3,3.0,0,0,0.0
1,shotOboxTotal,187,13,8.084643,67,2,2.339317
2,shotPenaltyArea,301,56,55.898192,167,23,26.521113
3,shotSixYardBox,53,16,21.029087,18,7,6.44808


Unnamed: 0,attackSpeed,shots,goals,xG,against_shots,against_goals,against_xG
0,Normal,330,52,50.959581,151,17,19.874912
1,Standard,125,19,18.457908,56,5,8.050421
2,Slow,52,9,8.739464,15,1,1.212252
3,Fast,37,8,9.854969,30,9,6.170925


In [106]:
async with aiohttp.ClientSession() as session:
        understat = Understat(session)
        results = await understat.get_team_results(
            "Barcelona",
            2024
        )
        
df = pd.DataFrame(results)
# Extract necessary info
df_cleaned = pd.DataFrame({
    'MatchID': df['id'],
    'side': df['side'],
    'HomeTeam': df['h'].apply(lambda x: eval(x)['title'] if isinstance(x, str) else x['title']),
    'AwayTeam': df['a'].apply(lambda x: eval(x)['title'] if isinstance(x, str) else x['title']),
    'HomeGoals': df['goals'].apply(lambda x: int(eval(x)['h']) if isinstance(x, str) else int(x['h'])),
    'HomexG': df['xG'].apply(lambda x: float(eval(x)['h']) if isinstance(x, str) else float(x['h'])),
    'AwayGoals': df['goals'].apply(lambda x: int(eval(x)['a']) if isinstance(x, str) else int(x['a'])),
    'AwayxG': df['xG'].apply(lambda x: float(eval(x)['a']) if isinstance(x, str) else float(x['a'])),
    'Forecast': df['forecast'],
    'Result': df['result']
})

# If 'forecast' is a string dict, you can optionally parse it too:
df_cleaned['Forecast'] = df_cleaned['Forecast'].apply(lambda x: eval(x) if isinstance(x, str) else x)

# Split forecast dictionary into separate columns
df_cleaned['Odds Win'] = df_cleaned['Forecast'].apply(lambda x: x['w']) * 100
df_cleaned['Odds Draw'] = df_cleaned['Forecast'].apply(lambda x: x['d']) * 100
df_cleaned['Odss Loss'] = df_cleaned['Forecast'].apply(lambda x: x['l']) * 100
df_cleaned = df_cleaned.drop(columns="Forecast")

# Preview
df_cleaned


Unnamed: 0,MatchID,side,HomeTeam,AwayTeam,HomeGoals,HomexG,AwayGoals,AwayxG,Result,Odds Win,Odds Draw,Odss Loss
0,26987,a,Valencia,Barcelona,1,1.55206,2,3.48824,w,13.143847,12.955703,73.90004
1,26995,h,Barcelona,Athletic Club,2,2.41577,1,1.09985,w,66.387872,17.954589,15.657534
2,27004,a,Rayo Vallecano,Barcelona,1,0.392473,2,1.51872,w,9.28889,25.010307,65.700803
3,27012,h,Barcelona,Real Valladolid,7,4.94022,0,0.422493,w,97.305892,2.093156,0.580999
4,27028,a,Girona,Barcelona,1,1.09568,4,2.5936,w,13.897616,16.556577,69.545796
5,27037,a,Villarreal,Barcelona,1,2.39605,5,4.1937,w,18.118114,12.610141,69.268351
6,27043,h,Barcelona,Getafe,1,2.20799,0,0.929851,w,66.410359,19.002469,14.58717
7,27056,a,Osasuna,Barcelona,4,2.50067,2,0.751814,l,75.516296,15.361629,9.122068
8,27064,a,Alaves,Barcelona,0,0.794829,3,2.49367,w,9.83824,15.705535,74.456218
9,27074,h,Barcelona,Sevilla,5,4.08528,1,0.599652,w,92.994445,5.011434,1.991592


In [None]:
# split data into home and away
home = df_cleaned[df_cleaned['side'] == 'h'].drop(columns = ['HomeTeam','side']).rename(columns={"AwayTeam":"Team","HomeGoals": "g", "AwayGoals": "gA","HomexG": "xG", "AwayxG": "xGa"})
home = home[["MatchID","Team","g","gA","xG","xGa","Odds Win","Odds Draw","Odss Loss","Result"]]
home


In [None]:
away = df_cleaned[df_cleaned['side'] == 'a'].drop(columns = ['AwayTeam','side']).rename(columns={"HomeTeam":"Team","HomeGoals": "gA", "AwayGoals": "g","HomexG": "xGa", "AwayxG": "xG"})
away = away[["MatchID","Team","g","gA","xG","xGa","Odds Win","Odds Draw","Odss Loss","Result"]]
away