In [1]:
!pip install faker --quiet

import random
from itertools import combinations
from datetime import date, timedelta
import pandas as pd
from faker import Faker

# --- Setup ---
random.seed(42)
fake = Faker()

# --- Teams (20 unique readable names) ---
teams_df = pd.DataFrame({
    "team_id": range(1, 21),
    "team_name": [f"Team {fake.unique.city()}" for _ in range(20)]
})
print("Teams sample:")
display(teams_df.head())

# --- Matches (200 total, every pair ≥ once, season=2023) ---
team_ids = teams_df.team_id.tolist()
pairs = list(combinations(team_ids, 2))                  # 190 pairs
pairs += [random.choice(pairs) for _ in range(10)]       # +10 to reach 200

def random_date_2023():
    start, end = date(2023,1,1), date(2023,12,31)
    return start + timedelta(days=random.randint(0, (end-start).days))

rows = []
for i, (a, b) in enumerate(pairs, start=1):
    winner, loser = (a, b) if random.random() < 0.5 else (b, a)
    rows.append({
        "match_id": i,
        "season": 2023,
        "match_date": random_date_2023().isoformat(),
        "winner_team_id": winner,
        "loser_team_id": loser
    })

matches_df = pd.DataFrame(rows)
print("Matches sample:")
display(matches_df.head())

# --- Save both CSVs ---
teams_df.to_csv("teams.csv", index=False)
matches_df.to_csv("matches.csv", index=False)
print("Saved: teams.csv, matches.csv")

# --- Top 10 teams by wins (2023) ---
top10 = (matches_df.groupby("winner_team_id")
           .size()
           .reset_index(name="wins")
           .merge(teams_df, left_on="winner_team_id", right_on="team_id")
           .sort_values(["wins","team_name"], ascending=[False, True])
           .head(10)[["team_id","team_name","wins"]])

print("\nTop 10 teams by wins (2023):")
display(top10)


Teams sample:


Unnamed: 0,team_id,team_name
0,1,Team Taylorland
1,2,Team New David
2,3,Team North Stephen
3,4,Team Port Angela
4,5,Team Jeremychester


Matches sample:


Unnamed: 0,match_id,season,match_date,winner_team_id,loser_team_id
0,1,2023,2023-10-07,2,1
1,2,2023,2023-08-05,1,3
2,3,2023,2023-02-17,1,4
3,4,2023,2023-09-16,1,5
4,5,2023,2023-10-15,6,1


Saved: teams.csv, matches.csv

Top 10 teams by wins (2023):


Unnamed: 0,team_id,team_name,wins
7,8,Team Lake Jacqueline,14
19,20,Team Andersonhaven,13
8,9,Team Andrewland,12
9,10,Team Lake Kyle,12
1,2,Team New David,12
16,17,Team South Maryside,11
0,1,Team Taylorland,11
12,13,Team Terrystad,11
5,6,Team West Brian,11
2,3,Team North Stephen,10
