## 1. Introduction

### This is portfolio project where I explore Euroleague data and analyse it to gather insights beyond of what is possible with current easily accessible resources. I try to utilize best data visualisation techniques for different types of data. 

In [1]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import requests
import json
from datetime import datetime
from time import sleep

## 2. Data Collection

### Fetching data from the Euroleague API.

API (JSON) ‚Üí Python(DataFrame) ‚Üí SQL ‚Üí Analysi

In [2]:
uri_dict = {
    'Header': 'Game metadata (teams, date, location, scores)',
    'BoxScore': 'Detailed player statistics per game',
    'Comparison': 'Team comparison stats (rebounds, assists, etc.)',
    'PlayByPlay': 'Timeline of in-game events (fouls, points, substitutions)',
    'Points': 'Aggregated scoring statistics',
    'ShootingGraphic': 'Shot chart data (location and success)',
    'Evolution': 'Score evolution over quarters and OT'
}

In [3]:
#Settings
max_game_code = 400
base_url = "https://live.euroleague.net/api"
output_dir = "data"
# Generate last 4 Euroleague season codes based on current year
current_year = datetime.now().year
last_4_seasons = [f"E{year}" for year in range(current_year - 1, current_year - 5, -1)]
last_4_seasons

['E2024', 'E2023', 'E2022', 'E2021']

In [4]:
# #test
# import requests
# print(requests.get("https://live.euroleague.net/api/Header?gamecode=1&seasoncode=E2024").json())

In [6]:
# Create folder structure
for season in last_4_seasons:
    for uri in uri_dict.keys():
        path = os.path.join(output_dir, season, uri)
        os.makedirs(path, exist_ok=True)

In [7]:
def fetch_and_save(uri, gamecode, seasoncode):
    url = f"{base_url}/{uri}?gamecode={gamecode}&seasoncode={seasoncode}"
    try:
        response = requests.get(url)
        if response.status_code == 200 and response.content.strip() not in [b"", b"null"]:
            data = response.json()
            filename = os.path.join(output_dir, seasoncode, uri, f"game_{gamecode}.json")
            with open(filename, "w", encoding="utf-8") as f:
                json.dump(data, f, ensure_ascii=False, indent=2)
            print(f"‚úÖ Saved: {uri} for game {gamecode}, {seasoncode}")
        else:
            print(f"‚ùå Skipped (empty or error): {uri} for game {gamecode}, {seasoncode}")
    except Exception as e:
        print(f"‚ùå Exception: {uri} game {gamecode}, {seasoncode} ‚Äî {e}")


In [34]:
# Loop through all combinations (last ran 29-04-2025)
for season in last_4_seasons:
    for gamecode in range(1, max_game_code + 1):
        for uri in uri_dict:
            fetch_and_save(uri, gamecode, season)
        sleep(0.3)

‚úÖ Saved: Header for game 1, E2024
‚úÖ Saved: BoxScore for game 1, E2024
‚úÖ Saved: Comparison for game 1, E2024
‚úÖ Saved: PlayByPlay for game 1, E2024
‚úÖ Saved: Points for game 1, E2024
‚úÖ Saved: ShootingGraphic for game 1, E2024
‚úÖ Saved: Evolution for game 1, E2024
‚úÖ Saved: Header for game 2, E2024
‚úÖ Saved: BoxScore for game 2, E2024
‚úÖ Saved: Comparison for game 2, E2024
‚úÖ Saved: PlayByPlay for game 2, E2024
‚úÖ Saved: Points for game 2, E2024
‚úÖ Saved: ShootingGraphic for game 2, E2024
‚úÖ Saved: Evolution for game 2, E2024
‚úÖ Saved: Header for game 3, E2024
‚úÖ Saved: BoxScore for game 3, E2024
‚úÖ Saved: Comparison for game 3, E2024
‚úÖ Saved: PlayByPlay for game 3, E2024
‚úÖ Saved: Points for game 3, E2024
‚úÖ Saved: ShootingGraphic for game 3, E2024
‚úÖ Saved: Evolution for game 3, E2024
‚úÖ Saved: Header for game 4, E2024
‚úÖ Saved: BoxScore for game 4, E2024
‚úÖ Saved: Comparison for game 4, E2024
‚úÖ Saved: PlayByPlay for game 4, E2024
‚úÖ Saved: Points for ga

## Data Exploration

### Inspecting data from different URI‚Äôs. Identifying structure and key fields.

In [6]:
# Load a 'header' sample file
with open('data/E2024/Header/game_1.json', 'r', encoding='utf-8') as f:
    data = json.load(f)

# Preview the top-level keys and structure
print(json.dumps(data, indent=2))


{
  "Live": false,
  "Round": "1",
  "Date": "03/10/2024",
  "Hour": "18:45 ",
  "Stadium": "UBER ARENA",
  "Capacity": "11856",
  "TeamA": "ALBA BERLIN",
  "TeamB": "PANATHINAIKOS AKTOR ATHENS",
  "CodeTeamA": "BER",
  "TVCodeA": "BER",
  "CodeTeamB": "PAN",
  "TVCodeB": "PAO",
  "imA": "BER       ",
  "imB": "PAN       ",
  "ScoreA": "77",
  "ScoreB": "87",
  "CoachA": "GONZALEZ, ISRAEL",
  "CoachB": "ATAMAN, ERGIN",
  "GameTime": "40:00",
  "RemainingPartialTime": "00:00",
  "wid": "80",
  "Quarter": "",
  "FoultsA": "16",
  "FoultsB": "11",
  "TimeoutsA": "3",
  "TimeoutsB": "2",
  "ScoreQuarter1A": 17,
  "ScoreQuarter2A": 36,
  "ScoreQuarter3A": 57,
  "ScoreQuarter4A": 77,
  "ScoreExtraTimeA": 0,
  "ScoreQuarter1B": 26,
  "ScoreQuarter2B": 45,
  "ScoreQuarter3B": 62,
  "ScoreQuarter4B": 87,
  "ScoreExtraTimeB": 0,
  "Phase": "REGULAR SEASON",
  "PhaseReducedName": "R S ",
  "Competition": "EUROLEAGUE 2024-25",
  "CompetitionReducedName": "E2024     ",
  "pcom": "E2024     ",
  "Re

Header URI ‚Äì Game metadata (teams, date, location, scores, referees). '2024' in E2024 indicates first year of the season. E2023 indicates 2023-2024; E2024 indicates 2024-2025 season, etc.

In [7]:
# Load a 'boxscore' sample file
with open('data/E2024/boxscore/game_300.json', 'r', encoding='utf-8') as f:
    data = json.load(f)

# Preview the top-level keys and structure
print(json.dumps(data, indent=2))


{
  "Live": false,
  "Referees": "MOGULKOC, EMIN, RYZHYK, BORYS, SUKYS, ARTURAS",
  "Attendance": "8548",
  "ByQuarter": [
    {
      "Team": "EA7 EMPORIO ARMANI MILAN",
      "Quarter1": 31,
      "Quarter2": 31,
      "Quarter3": 20,
      "Quarter4": 29
    },
    {
      "Team": "BASKONIA VITORIA-GASTEIZ",
      "Quarter1": 18,
      "Quarter2": 17,
      "Quarter3": 26,
      "Quarter4": 28
    }
  ],
  "EndOfQuarter": [
    {
      "Team": "EA7 EMPORIO ARMANI MILAN",
      "Quarter1": 31,
      "Quarter2": 62,
      "Quarter3": 82,
      "Quarter4": 111
    },
    {
      "Team": "BASKONIA VITORIA-GASTEIZ",
      "Quarter1": 18,
      "Quarter2": 35,
      "Quarter3": 61,
      "Quarter4": 89
    }
  ],
  "Stats": [
    {
      "Team": "EA7 EMPORIO ARMANI MILAN",
      "Coach": "MESSINA, ETTORE",
      "PlayersStats": [
        {
          "Player_ID": "P011064   ",
          "IsStarter": 1,
          "IsPlaying": 0,
          "Team": "MIL",
          "Dorsal": "2",
          "P

Boxscore data seems to be the most useful for this project. It shows all of the main stats of the players in that game. Accumulated boxscore statistics  for all of the games throughout the season may reveal interesting information. All columns are readable as-is.

In [11]:
# Load a 'Comparison' sample file
with open('data/E2024/comparison/game_300.json', 'r', encoding='utf-8') as f:
    data = json.load(f)

# Preview the top-level keys and structure
print(json.dumps(data, indent=2))

{
  "DefensiveReboundsA": 21,
  "OffensiveReboundsB": 15,
  "OffensiveReboundsA": 15,
  "DefensiveReboundsB": 23,
  "TurnoversStartersA": 0,
  "TurnoversBenchA": 3,
  "TurnoversStartersB": 4,
  "TurnoversBenchB": 11,
  "StealsStartersA": 4,
  "StealsBenchA": 3,
  "StealsStartersB": 0,
  "StealsBenchB": 3,
  "AssistsStartersA": 20,
  "AssistsBenchA": 12,
  "AssistsStartersB": 8,
  "AssistsBenchB": 10,
  "PointsStartersA": 70,
  "PointsBenchA": 41,
  "PointsStartersB": 37,
  "PointsBenchB": 52,
  "maxA": 14,
  "minutePrevA": 12,
  "prevA": "36-23",
  "minuteStrA": 15,
  "strA": "50-23",
  "maxB": 7,
  "minutePrevB": 25,
  "prevB": "77-44",
  "minuteStrB": 26,
  "strB": "77-51",
  "maxLeadA": 33,
  "maxLeadB": 2,
  "minuteMaxLeadA": 25,
  "minuteMaxLeadB": 1,
  "puntosMaxLeadA": "77-44",
  "puntosMaxLeadB": "0-2",
  "minutoActual": 1,
  "isLive": false
}


There is no indication of team names, so only readable in tandem with data from another URI. Not all columns are easily readable.
Explanation of harder to read columns:

Field	Explanation
maxA, maxB	Largest scoring run by Team A and Team B, respectively (i.e. most points scored without interruption).
minutePrevA, minutePrevB	Minute when the last significant run ended for Team A and B.
prevA, prevB	Score at the end of the previous significant run for Team A and B.
minuteStrA, minuteStrB	Minute when the strongest run of the game started for Team A and B.
strA, strB	Score at the start of the strongest run for Team A and B.
maxLeadA, maxLeadB	Maximum lead (in points) achieved by Team A and B.
minuteMaxLeadA, minuteMaxLeadB	Minute at which Team A and B reached their maximum lead.
puntosMaxLeadA, puntosMaxLeadB	Scoreline at the time of maximum lead for Team A and B.
minutoActual	Current minute (useful during live games ‚Äî static for past games).
isLive	Boolean flag for whether the game is ongoing (true) or finished (false).

In [12]:
# Load a 'Points' sample file
with open('data/E2024/points/game_250.json', 'r', encoding='utf-8') as f:
    data = json.load(f)

# Preview the top-level keys and structure
print(json.dumps(data, indent=2))

{
  "Rows": [
    {
      "NUM_ANOT": 6,
      "TEAM": "ASV       ",
      "ID_PLAYER": "P007027   ",
      "PLAYER": "MALEDON, THEO",
      "ID_ACTION": "3FGA",
      "ACTION": "Missed Three Pointer",
      "POINTS": 0,
      "COORD_X": 677,
      "COORD_Y": 62,
      "ZONE": "I",
      "FASTBREAK": "0",
      "SECOND_CHANCE": "0",
      "POINTS_OFF_TURNOVER": "0",
      "MINUTE": 1,
      "CONSOLE": "09:39",
      "POINTS_A": 0,
      "POINTS_B": 0,
      "UTC": "20250307190227"
    },
    {
      "NUM_ANOT": 8,
      "TEAM": "TEL       ",
      "ID_PLAYER": "P011219   ",
      "PLAYER": "SORKIN, ROMAN",
      "ID_ACTION": "2FGM",
      "ACTION": "Two Pointer",
      "POINTS": 2,
      "COORD_X": -43,
      "COORD_Y": 94,
      "ZONE": "B",
      "FASTBREAK": "1",
      "SECOND_CHANCE": "0",
      "POINTS_OFF_TURNOVER": "0",
      "MINUTE": 1,
      "CONSOLE": "09:29",
      "POINTS_A": 0,
      "POINTS_B": 2,
      "UTC": "20250307190236"
    },
    {
      "NUM_ANOT": 10,
      "TE

Points - individual scoring breakdown (including missed shots). Each scoring action documented.

Unknown column explanation:

Field	Explanation
NUM_ANOT	Sequential event number (i.e., this is the 17th recorded scoring event).

ID_PLAYER	Unique player ID (internal Euroleague code, not standardized globally).

ID_ACTION	Type of action that led to points. Common codes: FTM (Free Throw Made), 2PM (2pt Made), 3PM (3pt Made), etc.

COORD_X, COORD_Y	Shot coordinates on the court. -1 means coordinates not recorded (e.g., for free throws). 

ZONE	Court zone. Often blank (" ") if not explicitly recorded. üèÄ Common "ZONE" values and likely meanings:

Zone Code	Likely Area on Court
"A"	Under the basket (paint / restricted area)
"B"	Mid-range, baseline
"C"	Corner 3-point area
"D"	Wing 3-point area
"E"	Top of the key / above the arc
"F"	Near free-throw line / elbow area
"G"	Long-range 3-pointers (deep shots)

FASTBREAK	1 if this was a fast-break basket, 0 otherwise.

SECOND_CHANCE	1 if points came from an offensive rebound (second chance), 0 otherwise.

POINTS_OFF_TURNOVER	1 if points were a direct result of a turnover, 0 otherwise.

MINUTE	Game minute during which the scoring event happened.

CONSOLE	Timestamp in game time format ‚Äî MM:SS remaining in quarter.

POINTS_A, POINTS_B	Updated score after the basket: Team A‚Äôs and Team B‚Äôs score after this event.

UTC	UTC timestamp of when the event occurred (YYYYMMDDHHMMSS). Helpful for ordering.

In [13]:
# Load a 'PlayByPlay' sample file
with open('data/E2024/playbyplay/game_250.json', 'r', encoding='utf-8') as f:
    data = json.load(f)

# Preview the top-level keys and structure
print(json.dumps(data, indent=2))

{
  "Live": false,
  "TeamA": "LDLC ASVEL Villeurbanne",
  "TeamB": "Maccabi Playtika Tel Aviv",
  "CodeTeamA": "ASV       ",
  "CodeTeamB": "TEL       ",
  "ActualQuarter": 4,
  "FirstQuarter": [
    {
      "TYPE": 0,
      "NUMBEROFPLAY": 4,
      "CODETEAM": "          ",
      "PLAYER_ID": "          ",
      "PLAYTYPE": "BP",
      "PLAYER": null,
      "TEAM": null,
      "DORSAL": null,
      "MINUTE": 1,
      "MARKERTIME": "",
      "POINTS_A": null,
      "POINTS_B": null,
      "COMMENT": "",
      "PLAYINFO": "Begin Period"
    },
    {
      "TYPE": 0,
      "NUMBEROFPLAY": 5,
      "CODETEAM": "ASV       ",
      "PLAYER_ID": "          ",
      "PLAYTYPE": "JB",
      "PLAYER": null,
      "TEAM": "LDLC ASVEL Villeurbanne",
      "DORSAL": "",
      "MINUTE": 1,
      "MARKERTIME": "09:59",
      "POINTS_A": null,
      "POINTS_B": null,
      "COMMENT": "",
      "PLAYINFO": ""
    },
    {
      "TYPE": 0,
      "NUMBEROFPLAY": 6,
      "CODETEAM": "ASV       ",
     

In [15]:
# Load a 'ShootingGraphic' sample file
with open('data/E2024/shootinggraphic/game_30.json', 'r', encoding='utf-8') as f:
    data = json.load(f)

# Preview the top-level keys and structure
print(json.dumps(data, indent=2))

{
  "FastbreakPointsA": 9,
  "FastbreakPointsB": 13,
  "TurnoversPointsA": 18,
  "TurnoversPointsB": 15,
  "SecondChancePointsA": 8,
  "SecondChancePointsB": 21
}


Missleading URI name. This data is an indicator on how well the team is getting extra points (points of TO, 2nd chances, fastbreak points)

In [16]:
# Load a 'Evolution' sample file
with open('data/E2024/evolution/game_30.json', 'r', encoding='utf-8') as f:
    data = json.load(f)

# Preview the top-level keys and structure
print(json.dumps(data, indent=2))

{
  "PointsList": [
    [
      0,
      2,
      4,
      11,
      11,
      13,
      17,
      22,
      22,
      24,
      30,
      33,
      33,
      35,
      39,
      42,
      44,
      48,
      50,
      54,
      58,
      58,
      58,
      61,
      66,
      66,
      68,
      73,
      75,
      81,
      84,
      87,
      87,
      89,
      91,
      96,
      98,
      101,
      103,
      109,
      109
    ],
    [
      4,
      6,
      8,
      11,
      13,
      13,
      16,
      16,
      19,
      21,
      21,
      23,
      23,
      28,
      30,
      32,
      35,
      40,
      45,
      47,
      47,
      52,
      52,
      52,
      58,
      60,
      68,
      71,
      74,
      78,
      78,
      81,
      83,
      86,
      89,
      91,
      93,
      96,
      101,
      107,
      107
    ]
  ],
  "MinutesList": [
    0,
    1,
    2,
    3,
    4,
    5,
    6,
    7,
    8,
    9,
    10,
    11,
    12,
    13,
    14,
  

PointsList
A list of two lists: PointsList[0] = team A's cumulative score at each minute, PointsList[1] = team B‚Äôs score.

Each index corresponds to a minute in MinutesList.

MinutesList
An array of game minutes (0 to 40). This aligns with indices in PointsList, ScoreDiffPerMinute, etc.

ScoreDiffPerMinute
A list of two lists, just like PointsList.

ScoreDiffPerMinute[0] ‚Üí how much team A led by at each minute (positive if leading, negative if trailing).

ScoreDiffPerMinute[1] ‚Üí how much team B led by at each minute.

Values are null if no score change occurred or not applicable.

LargestDifference
Shows the largest point difference in favor of each team.

Format: [[teamA_largest_lead, null], [null, teamB_largest_lead]]

So:
"[[11, null], [null, 4]]"
Means:

Team A‚Äôs biggest lead: 11 points

Team B‚Äôs biggest lead: 4 points

MinuteMaxA, MinuteMaxB
The minute at which each team had their largest lead.

ScoreMaxA, ScoreMaxB
The score snapshot when each team had their largest lead.

"ScoreMaxA": "58 - 47" ‚Üí Team A led by 11 at minute 21.

(Optional) difp, dA, dB
These are sometimes used for additional derived stats, but are often null or inconsistently populated ‚Äî you can usually ignore these.

Use Case Ideas:
Plot score evolution graphs (line charts)

Identify turning points or key momentum shifts

Create game summaries (e.g., "Team A took control in Q2 with an 11-point lead at minute 21.")

In [2]:
#Validating if efficiency in this dataset is calculated the same way as official Euroleague rulebook.
#(Points + Rebounds + Assists + Steals + Blocks + Fouls Drawn) - (Missed Field Goals + Missed Free Throws + Turnovers + Shots Rejected + Fouls Committed).
Leday_efficiency_test=(32+5+2+2)-(8+1+1)
print(Leday_efficiency_test)
          # "Player": "LEDAY, ZACH",

          # "Points": 32,
          # "FieldGoalsMade2": 6,
          # "FieldGoalsAttempted2": 10,
          # "FieldGoalsMade3": 6,
          # "FieldGoalsAttempted3": 10,
          # "FreeThrowsMade": 2,
          # "FreeThrowsAttempted": 3,
          # "OffensiveRebounds": 0,
          # "DefensiveRebounds": 5,
          # "TotalRebounds": 5,
          # "Assistances": 2,
          # "Steals": 0,
          # "Turnovers": 0,
          # "BlocksFavour": 0,
          # "BlocksAgainst": 0,
          # "FoulsCommited": 1,
          # "FoulsReceived": 2,
          # "Valuation": 31,
          # "Plusminus": 14

31


In [3]:
hall_efficiency_test=(10+1+1)-(1+1+1)
print(hall_efficiency_test)
# "Player": "HALL, DONTA",
#           "Minutes": "18:16",
#           "Points": 10,
#           "FieldGoalsMade2": 5,
#           "FieldGoalsAttempted2": 6,
#           "FieldGoalsMade3": 0,
#           "FieldGoalsAttempted3": 0,
#           "FreeThrowsMade": 0,
#           "FreeThrowsAttempted": 0,
#           "OffensiveRebounds": 1,
#           "DefensiveRebounds": 0,
#           "TotalRebounds": 1,
#           "Assistances": 1,
#           "Steals": 0,
#           "Turnovers": 1,
#           "BlocksFavour": 0,
#           "BlocksAgainst": 0,
#           "FoulsCommited": 1,
#           "FoulsReceived": 0,
#           "Valuation": 9,

9


Seems that PIR rating is calculated correctly in this dataset

## Data Preprocessing & Cleaning

In [5]:
def load_boxscore_player_stats(json_dir='data', uri='BoxScore'):
    all_data = []

    for seasoncode in os.listdir(json_dir):
        season_path = os.path.join(json_dir, seasoncode)
        if not os.path.isdir(season_path):
            continue

        uri_path = os.path.join(season_path, uri)
        if not os.path.isdir(uri_path):
            continue

        for filename in os.listdir(uri_path):
            if not filename.endswith('.json'):
                continue

            game_path = os.path.join(uri_path, filename)
            gamecode = filename.replace('game_', '').replace('.json', '')

            try:
                with open(game_path, 'r', encoding='utf-8') as f:
                    data = json.load(f)

                if not data or 'PlayersStats' not in data:
                    continue

                for player in data['Stats']['PlayersStats']:
                    player['seasoncode'] = seasoncode
                    player['gamecode'] = gamecode
                    player['team'] = data.get('Team', 'Unknown')
                    all_data.append(player)

            except Exception as e:
                print(f"Error reading {game_path}: {e}")

    return pd.DataFrame(all_data)

In [8]:
import os, glob, json
import pandas as pd
from pathlib import Path

def convert_minutes_str_to_float(minutes_str):
    try:
        if pd.isna(minutes_str) or not isinstance(minutes_str, str):
            return None
        minutes, seconds = map(int, minutes_str.strip().split(":"))
        return round(minutes + seconds / 60, 2)
    except Exception:
        return None

def load_euroleague_boxscores(root_dir="data"):
    records = []
    pattern = os.path.join(root_dir, "*", "BoxScore", "game_*.json")
    for filepath in glob.glob(pattern, recursive=True):
        # Extract season and game code from the path
        p = Path(filepath)
        seasoncode = p.parents[1].name   # e.g. "2023-24"
        gamecode = p.stem.replace("game_", "")
        try:
            with open(filepath, 'r', encoding='utf-8') as f:
                data = json.load(f)
        except Exception:
            # Skip malformed JSON files
            continue

        # Flatten each team's player stats
        for team in data.get("Stats", []):
            team_name = team.get("TeamName") or team.get("Team", "")
            for player in team.get("PlayersStats", []):
                rec = dict(player)  # copy player stats dict
                rec["team"] = team_name
                rec["seasoncode"] = seasoncode
                rec["gamecode"] = gamecode
                records.append(rec)

    # Build DataFrame if any records were collected
    if records:
        df = pd.DataFrame(records)
        # Trim whitespace in all string columns
        df = df.apply(lambda col: col.str.strip() if col.dtype == object else col)
       
    for col in df.columns:
        if df[col].dtype == object:
            df[col] = df[col].str.strip()
    
    # Convert only relevant numeric columns
    numeric_columns = [col for col in df.columns if col.lower() in [
        'points', 'rebounds', 'assists', 'blocks', 'steals', 'turnovers',
        'minutes', 'seconds', 'fieldgoalsmade', 'fieldgoalsattempted',
        'threepointsmade', 'freethrowsmade', 'performanceindexrating', 'plusminus'
        # Add more numeric columns as needed
    ]]
    
    if 'Minutes' in df.columns:
        df['Minutes'] = df['Minutes'].apply(convert_minutes_str_to_float)
    
    for col in numeric_columns:
        if col in df.columns:
            df[col] = df[col].astype(str).str.replace(',', '.')  # optional
            df[col] = pd.to_numeric(df[col], errors='coerce')
        return df
    else:
        # No valid data found; return empty DataFrame
        return pd.DataFrame()


In [9]:
testukas = load_euroleague_boxscores()
print(testukas.shape)
print(testukas.head(3))
print(testukas.columns.tolist())


(30292, 29)
  Player_ID  IsStarter  IsPlaying Team Dorsal           Player  Minutes  \
0   P011225          0          0  MCO      5       LEE, PARIS    18.55   
1   P002543          1          0  MCO      9  WESTERMANN, LEO    21.98   
2   P005856          0          1  MCO     10     THOMAS, WILL    22.47   

   Points  FieldGoalsMade2  FieldGoalsAttempted2  ...  Turnovers  \
0      10                1                     3  ...          0   
1       8                1                     2  ...          2   
2       5                1                     4  ...          0   

   BlocksFavour  BlocksAgainst  FoulsCommited  FoulsReceived  Valuation  \
0             0              0              2              3         14   
1             1              1              2              1         11   
2             0              1              1              1          3   

   Plusminus       team  seasoncode  gamecode  
0         13  AS MONACO       E2021         1  
1          8  AS 

In [None]:
# 4. Exploratory Data Analysis (EDA)
# ----------------------------------
# Visualize trends, distributions, and key insights.

def plot_scores(df):
    plt.figure(figsize=(10,5))
    sns.barplot(x=['TeamA', 'TeamB'], y=[df['ScoreA'][0], df['ScoreB'][0]])
    plt.title("Game Score Comparison")
    plt.show()

# 5. Feature Engineering
# ----------------------
# Create new features useful for analysis or modeling.

def add_features(df):
    df['Score_Difference'] = abs(df['ScoreA'] - df['ScoreB'])
    return df

# 6. Modeling (Optional)
# ----------------------
# Build predictive models if needed.

# 7. Insights & Conclusions
# -------------------------
# Summarize key findings and potential applications.

# Save cleaned data to CSV
# df.to_csv("euroleague_cleaned.csv", index=False)
