In [1]:
import pandas as pd
import matplotlib.pyplot as plt


# Load the Premier League table CSV
df = pd.read_csv('pl-tables-1993-2024.csv')

df

Unnamed: 0,season_end_year,team,position,played,won,drawn,lost,gf,ga,gd,points,notes
0,1993,Manchester Utd,1,42,24,12,6,67,31,36,84,→ Champions League via league finish
1,1993,Aston Villa,2,42,21,11,10,57,40,17,74,→ UEFA Cup via league finish
2,1993,Norwich City,3,42,21,9,12,61,65,-4,72,→ UEFA Cup via league finish 1
3,1993,Blackburn,4,42,20,11,11,68,46,22,71,
4,1993,QPR,5,42,17,12,13,63,55,8,63,
...,...,...,...,...,...,...,...,...,...,...,...,...
641,2024,Brentford,16,38,10,9,19,56,65,-9,39,
642,2024,Nottingham Forest,17,38,9,9,20,49,67,-18,32,4-point deduction
643,2024,Luton Town,18,38,6,8,24,52,85,-33,26,Relegated
644,2024,Burnley,19,38,5,9,24,41,78,-37,24,Relegated


In [2]:

# Basic info summary
info = df.info(buf=None)

# Descriptive statistics for numeric columns
desc = df.describe()
desc

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 646 entries, 0 to 645
Data columns (total 12 columns):
 #   Column           Non-Null Count  Dtype 
---  ------           --------------  ----- 
 0   season_end_year  646 non-null    int64 
 1   team             646 non-null    object
 2   position         646 non-null    int64 
 3   played           646 non-null    int64 
 4   won              646 non-null    int64 
 5   drawn            646 non-null    int64 
 6   lost             646 non-null    int64 
 7   gf               646 non-null    int64 
 8   ga               646 non-null    int64 
 9   gd               646 non-null    int64 
 10  points           646 non-null    int64 
 11  notes            326 non-null    object
dtypes: int64(10), object(2)
memory usage: 60.7+ KB


Unnamed: 0,season_end_year,position,played,won,drawn,lost,gf,ga,gd,points
count,646.0,646.0,646.0,646.0,646.0,646.0,646.0,646.0,646.0,646.0
mean,2008.365325,10.602167,38.408669,14.283282,9.842105,14.283282,51.577399,51.577399,0.0,52.654799
std,9.302311,5.840351,1.212411,5.741345,2.956505,5.401573,15.195452,13.035291,24.952482,16.49919
min,1993.0,1.0,38.0,1.0,2.0,0.0,20.0,15.0,-69.0,11.0
25%,2000.0,6.0,38.0,10.0,8.0,10.25,41.0,43.0,-17.0,41.0
50%,2008.0,11.0,38.0,13.0,10.0,15.0,48.0,52.0,-5.0,50.0
75%,2016.0,16.0,38.0,18.0,12.0,18.0,60.75,59.75,15.0,63.0
max,2024.0,22.0,42.0,32.0,18.0,29.0,106.0,104.0,79.0,100.0


In [3]:


# How many unique clubs and seasons?
n_clubs = df['team'].nunique()
n_seasons = df['season_end_year'].nunique()

print(f"Unique clubs: {n_clubs}")
print(f"Seasons (1993/94 to 2023/24): {n_seasons}")


Unique clubs: 51
Seasons (1993/94 to 2023/24): 32


In [4]:
import pandas as pd, json, os, statistics

# ---- Filter the decade window ----
decade = (df[df["season_end_year"].between(2015, 2024)]
            .sort_values(["team", "season_end_year"]))

records = []
for club, grp in decade.groupby("team"):
    season_rows = []
    for _, row in grp.iterrows():
        season_rows.append({
            "year"    : int(row["season_end_year"]),
            "points"  : int(row["points"]),
            "position": int(row["position"]),
            "gd"      : int(row["gd"]),
            "won"     : int(row["won"]),
            "drawn"   : int(row["drawn"]),
            "lost"    : int(row["lost"]),
            "gf"      : int(row["gf"]),
            "ga"      : int(row["ga"]),
            "notes"   : row["notes"] if pd.notna(row["notes"]) else ""
        })

    pts = [s["points"] for s in season_rows]          # club‑level quick stats
    records.append({
        "team"        : club,
        "values"      : season_rows,
        "total_pts"   : sum(pts),
        "avg_pts"     : round(statistics.mean(pts), 2),
        "best_pts"    : max(pts),
        "seasons_played": len(season_rows)
    })

# ---- Write JSON to disk ----
os.makedirs("data", exist_ok=True)
with open("data/teams_points_2015-24.json", "w", encoding="utf-8") as f:
    json.dump(records, f, indent=2)

print(f"✓ JSON saved to data/teams_points_2015-24.json — {len(records)} clubs "
      f"({decade['team'].nunique()} with ≥1 season)")


✓ JSON saved to data/teams_points_2015-24.json — 34 clubs (34 with ≥1 season)


In [5]:
df[df['position'] == 1].sort_values('season_end_year')


Unnamed: 0,season_end_year,team,position,played,won,drawn,lost,gf,ga,gd,points,notes
0,1993,Manchester Utd,1,42,24,12,6,67,31,36,84,→ Champions League via league finish
22,1994,Manchester Utd,1,42,27,11,4,80,38,42,92,→ Champions League via league finish
44,1995,Blackburn,1,42,27,8,7,80,39,41,89,→ Champions League via league finish
66,1996,Manchester Utd,1,38,25,7,6,73,35,38,82,→ Champions League via league finish
86,1997,Manchester Utd,1,38,21,12,5,76,44,32,75,→ Champions League via league finish
106,1998,Arsenal,1,38,23,9,6,68,33,35,78,→ Champions League via league finish
126,1999,Manchester Utd,1,38,22,13,3,80,37,43,79,→ Champions League via league finish
146,2000,Manchester Utd,1,38,28,7,3,97,45,52,91,→ Champions League via league finish
166,2001,Manchester Utd,1,38,24,8,6,79,31,48,80,→ Champions League via league finish
186,2002,Arsenal,1,38,26,9,3,79,36,43,87,→ Champions League via league finish
