# Combining All Team's Logos to Dataset

In [1]:
import pandas as pd
from difflib import get_close_matches

# Load the base CSV again
all_teams_df = pd.read_csv("../../data/teams/cleaned/all_league_teams.csv")

# Master TEAM_IDS across top 5 European leagues
TEAM_IDS = {
    # Premier League
    "Arsenal": "18bb7c10",
    "Aston Villa": "8602292d",
    "Bournemouth": "4ba7cbea",
    "Brentford": "cd051869",
    "Brighton": "d07537b9",
    "Chelsea": "cff3d9bb",
    "Crystal Palace": "47c64c55",
    "Everton": "d3fd31cc",
    "Fulham": "fd962109",
    "Ipswich Town": "b74092de",
    "Leicester City": "a2d435b3",
    "Liverpool": "822bd0ba",
    "Manchester City": "b8fd03ef",
    "Manchester United": "19538871",
    "Newcastle Utd": "b2b47a98",
    "Nottingham Forest": "e4a775cb",
    "Southampton": "33c895d4",
    "Tottenham": "361ca564",
    "West Ham": "7c21e445",
    "Wolves": "8cec06e1",

    # Serie A
    "Napoli": "d48ad4ff",
    "Inter": "d609edc0",
    "Atalanta": "922493f3",
    "Juventus": "e0652b02",
    "Lazio": "7213da33",
    "Roma": "cf74a709",
    "Bologna": "1d8099f8",
    "AC Milan": "dc56fe14",
    "Fiorentina": "421387cf",
    "Como": "28c9c3cd",
    "Torino": "105360fe",
    "Udinese": "04eea015",
    "Genoa": "658bf2de",
    "Cagliari": "c4260e09",
    "Hellas Verona": "0e72edf2",
    "Parma": "eab4234c",
    "Venezia": "af5d5982",
    "Lecce": "ffcbe334",
    "Empoli": "a3d88bd8",
    "Monza": "21680aa4",

    # La Liga
    "Barcelona": "206d90db",
    "Real Madrid": "53a2f082",
    "Atletico Madrid": "db3b9613",
    "Athletic Bilbao": "2b390eca",
    "Villarreal": "2a8183b3",
    "Real Betis": "fc536746",
    "Celta Vigo": "f25da7fb",
    "Rayo Vallecano": "98e8af82",
    "Mallorca": "2aa12281",
    "Osasuna": "03c57e2b",
    "Valencia": "dcc91a7b",
    "Real Sociedad": "e31d1cd9",
    "Girona": "9024a00a",
    "Sevilla": "ad2be733",
    "Getafe": "7848bd64",
    "Espanyol": "a8661628",
    "Alaves": "8d6fd021",
    "Leganes": "7c6f2c78",
    "Las Palmas": "0049d422",
    "Valladolid": "17859612",

    # Bundesliga
   "Bayern Munich": "054efa67",
    "Bayer Leverkusen": "c7a9f859",
    "Eintracht Frankfurt": "f0ac8ee6",
    "Freiburg": "a486e511",
    "Borussia Dortmund": "add600ae",
    "Mainz 05": "a224b06a",
    "RB Leipzig": "acbb6a5b",
    "Werder Bremen": "62add3bf",
    "Stuttgart": "598bc722",
    "Monchengladbach": "32f3ee20",
    "Augsburg": "0cdc4311",
    "Wolfsburg": "4eaa11d7",
    "Union Berlin": "7a41008f",
    "St Pauli": "54864664",
    "Hoffenheim": "033ea6b8",
    "Heidenheim": "18d9d2a7",
    "Holstein Kiel": "2ac661d9",
    "Bochum": "b42c6323",

    # Ligue 1
   "Paris Saint-Germain": "e2d8892c",
    "Marseille": "5725cc7b",
    "Monaco": "fd6114db",
    "Nice": "132ebc33",
    "Lille": "cb188c0c",
    "Strasbourg": "c0d3eab4",
    "Lyon": "d53c0b06",
    "Brest": "fb08dbb3",
    "Lens": "fd4e0f7d",
    "Auxerre": "5ae09109",
    "Rennes": "b3072e00",
    "Toulouse": "3f8c4b5f",
    "Angers": "69236f98",
    "Reims": "7fdd64e0",
    "Nantes": "d7a486cd",
    "Le Havre": "5c2737db",
    "Saint-Étienne": "d298ef2c",
    "Montpellier": "281b0e73"
}

# Create a DataFrame from the TEAM_IDS dictionary
logo_df = pd.DataFrame([
    {"Team": name, "Logo URL": f"https://cdn.ssref.net/nocdn/tlogo/fb/{team_id}.png"}
    for name, team_id in TEAM_IDS.items()
])
# For reliable matching, add a 'League' column from the main dataset
logo_df["League"] = logo_df["Team"].map(
    lambda team: all_teams_df[all_teams_df["Team Name"].str.contains(team, case=False, na=False)]["League"].values[0]
    if not all_teams_df[all_teams_df["Team Name"].str.contains(team, case=False, na=False)].empty else None
)

manual_logo_urls = {
    "Brighton & Hove Albion": "https://cdn.ssref.net/nocdn/tlogo/fb/d07537b9.png",
    "Wolverhampton Wanderers": "https://cdn.ssref.net/nocdn/tlogo/fb/8cec06e1.png",
    "Parma Calcio 1913": "https://cdn.ssref.net/nocdn/tlogo/fb/eab4234c.png",
    "RCD Espanyol Barcelona": "https://cdn.ssref.net/nocdn/tlogo/fb/a8661628.png",
    "Deportivo Alavés": "https://cdn.ssref.net/nocdn/tlogo/fb/8d6fd021.png",
    "Olympique Lyon": "https://cdn.ssref.net/nocdn/tlogo/fb/d53c0b06.png",
    "Stade Rennais FC": "https://cdn.ssref.net/nocdn/tlogo/fb/b3072e00.png",
    "Stade Brestois 29": "https://cdn.ssref.net/nocdn/tlogo/fb/fb08dbb3.png"
}

# Apply manual fixes
for team, url in manual_logo_urls.items():
    all_teams_df.loc[all_teams_df["Team Name"] == team, "Logo URL"] = url

# Merge logos on both Team + League
merged_teams_df = pd.merge(all_teams_df, logo_df, left_on=["Team Name", "League"], right_on=["Team", "League"], how="left")
merged_teams_df.drop(columns=["Team"], inplace=True)

# Combine _x and _y columns, preferring manually patched values first
merged_teams_df["Logo URL"] = merged_teams_df["Logo URL_x"].combine_first(merged_teams_df["Logo URL_y"])

# Drop the old split columns
merged_teams_df.drop(columns=["Logo URL_x", "Logo URL_y"], inplace=True)

# Fuzzy match missing logos
teams_missing = merged_teams_df[merged_teams_df["Logo URL"].isna()]
manual_matches = {}
for team in teams_missing["Team Name"]:
    match = get_close_matches(team, logo_df["Team"].tolist(), n=1, cutoff=0.6)
    if match:
        logo_url = logo_df[logo_df["Team"] == match[0]]["Logo URL"].values[0]
        manual_matches[team] = logo_url

# Fill in missing logos using fuzzy match
merged_teams_df["Logo URL"] = merged_teams_df.apply(
    lambda row: manual_matches.get(row["Team Name"], row["Logo URL"]),
    axis=1
)

merged_teams_df

Unnamed: 0,League,Team Name,MP,GF,GA,GD,xG,xGA,xGD,Shots/90,...,Passes into Penalty Area/90,Crosses into Penalty Area/90,Progressive Passes/90,Possession %,Tackles,Tackles Won,Challenges Attempted,Aerial Duel Win %,Most Common Formation,Logo URL
0,Premier League,Manchester City,36,67,43,24,63.6,45.3,18.3,15.89,...,4.66,0.68,20.52,61.7,475,287,430,48.4,4-2-3-1,https://cdn.ssref.net/nocdn/tlogo/fb/b8fd03ef.png
1,Premier League,Arsenal FC,36,66,33,33,57.0,32.3,24.7,14.14,...,4.60,0.76,19.31,56.9,563,335,483,50.5,4-3-3,https://cdn.ssref.net/nocdn/tlogo/fb/18bb7c10.png
2,Premier League,Liverpool FC,36,83,37,46,77.8,34.6,43.2,16.86,...,4.57,0.88,20.03,57.6,614,380,626,52.8,4-2-3-1,https://cdn.ssref.net/nocdn/tlogo/fb/822bd0ba.png
3,Premier League,Chelsea FC,36,62,43,19,65.9,45.8,20.1,15.92,...,3.78,0.81,17.08,57.6,555,346,516,51.1,4-2-3-1,https://cdn.ssref.net/nocdn/tlogo/fb/cff3d9bb.png
4,Premier League,Tottenham Hotspur,36,63,59,4,56.2,59.7,-3.5,13.53,...,4.06,0.92,17.39,55.8,639,403,569,49.8,4-3-3,https://cdn.ssref.net/nocdn/tlogo/fb/361ca564.png
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
91,Ligue 1,AJ Auxerre,33,47,48,-1,40.4,55.5,-15.1,11.15,...,2.32,0.73,11.86,42.8,642,394,583,49.3,5-4-1,https://cdn.ssref.net/nocdn/tlogo/fb/5ae09109.png
92,Ligue 1,Le Havre AC,33,37,69,-32,38.9,61.3,-22.4,10.03,...,2.16,0.89,11.43,42.9,610,379,546,50.6,4-2-3-1,https://cdn.ssref.net/nocdn/tlogo/fb/5c2737db.png
93,Ligue 1,AS Saint-Étienne,32,35,74,-39,35.0,71.1,-36.1,10.21,...,2.51,0.34,13.22,46.8,562,356,571,46.7,4-3-3,https://cdn.ssref.net/nocdn/tlogo/fb/d298ef2c.png
94,Ligue 1,Montpellier HSC,32,23,74,-51,32.7,60.8,-28.1,10.58,...,1.80,0.80,11.14,44.9,615,369,594,46.4,4-2-3-1,https://cdn.ssref.net/nocdn/tlogo/fb/281b0e73.png


In [2]:
output_path = "../../data/teams/cleaned/all_league_teams.csv"
merged_teams_df.to_csv(output_path, index=False)