# Project

# Package import and project setup

In [None]:
import pandas as pd
import numpy as np
import os
import glob
import networkx as nx
import matplotlib.pyplot as plt
import re

In [None]:
pd.options.display.max_rows = 100
pd.options.display.max_columns = 25

Set working directory to home project directory. This will make the reading of files easier.

In [None]:
#WORK_DIR = os.path.abspath(os.path.join(".."))
#os.chdir(WORK_DIR)

Read all data files and concatenate them to one single DataFrame

In [None]:
df = pd.DataFrame()
for file in glob.glob("../data/*/*.csv"):
    temp = pd.read_csv(file)
    df = pd.concat([df, temp])

In [None]:
df.head(10)

In [None]:
# Reset index
df.reset_index(drop=True, inplace=True)

# Make copy of original DataFrame
df_copy = df.copy()

# Inspect data

In [None]:
df.head(10)

In [None]:
df.info()

In [None]:
df.describe().round(2)

In [None]:
df.isna().sum()

# Transform data
First, we will apply some basic modification to our DataFrame by defining types for some columns.

In [None]:
df["age"] = df["age"].astype("Int64")

## Rearrange incoming and outgoing club names
In the current DataFrame there is no clear indication where the player came from or to which club he went to. It has to be read from the columns club_name, club_involved_name and transfer_movement. In order to create the base for our network, we need to normalize and transform the data to only having the club the player came from and the club he went to.

In [None]:
def transform_transfers(row):
    club_name = row["club_name"]
    club_involved_name = row["club_involved_name"]
    league = row["league_name"]
    movement = row["transfer_movement"]
    league_to = np.nan
    league_from = np.nan
    
    if movement == "in":
        club_from = club_involved_name
        club_to = club_name
        league_to = league
    elif movement == "out":
        club_from = club_name
        club_to = club_involved_name
        league_from = league
    else:
        print("Movement '{}' not defined".format(movement))
    return club_from, club_to, league_from, league_to

In [None]:
df[["club_from", "club_to", "league_from", "league_to"]] = df.apply(lambda x: transform_transfers(x), axis=1, result_type="expand")
df.drop(columns=["club_name", "club_involved_name", "transfer_movement", "league_name"], inplace=True)

We took Cristiano Ronaldo as an example to check whether the data needs some more processing. As we can see in the data below, some data points are duplicated. Unfortunately, the data can not just be dropped and needs to be cleaned first. Once the data is cleaned we can then drop duplicates by subset.

In [None]:
df.loc[df["player_name"] == "Cristiano Ronaldo"].sort_values(by="year")

## Uniform club names

The above mentioned problem causes also that missing league names can not be enriched.

In [None]:
df_temp = df_copy[["club_name", "league_name"]].drop_duplicates()
df_temp.shape
club_to_league = dict(zip(df_temp["club_name"], df_temp["league_name"]))

As a last cleaning step we need to drop all remaining duplicates from the data set. A transfer is unique over player name, fee and season.

In [None]:
df_copy.duplicated(subset=["player_name", "fee_cleaned", "season"]).sum()

### Youth club names
Uniforming all youth clubs and delete the appendix from fromer existing clubs.

In [None]:
replacements_youth=['U16','U17','U18','U19','U20','U21','U22','U23','Res.','Reserves','II','Primaver','Yth.', 'Prom', 'Jgd', 'Youth', 'youthrves', '/RKC-19', 'Acad.']
for i in replacements_youth:
    df['club_to'] = df['club_to'].str.replace(str(i),'youth', regex=True)
    df['club_from'] = df['club_from'].str.replace(str(i),'youth', regex=True)

replacements_delete=['liq.','diss.']
for i in replacements_delete:
    df['club_to'] = df['club_to'].str.replace(str(i),'', regex=True)
    df['club_from'] = df['club_from'].str.replace(str(i),'', regex=True)

df["club_to"] = df["club_to"].replace("\s+[BC]$", " youth", regex=True)
df["club_from"] = df["club_from"].replace("\s+[BC]$", " youth", regex=True)

#Correcting the falsely changed club Willem II Tilburg:
df['club_to'] = df['club_to'].str.replace('Willem youth youth', 'Willem II youth', regex=True)
df['club_from'] = df['club_from'].str.replace('Willem youth youth', 'Willem II youth', regex=True)
df['club_to'] = df['club_to'].str.replace('Willem youth Tilburg', 'Willem II Tilburg', regex=True)
df['club_from'] = df['club_from'].str.replace('Willem youth Tilburg', 'Willem II Tilburg', regex=True)
df['club_to'] = df['club_to'].str.replace('Willem youth/RKC-19', 'Willem II/RKC-19', regex=True)
df['club_from'] = df['club_from'].str.replace('Willem youth/RKC-19', 'Willem II/RKC-19', regex=True)
df['club_to'] = df['club_to'].str.replace('Willem youth', 'Willem II', regex=True)
df['club_from'] = df['club_from'].str.replace('Willem youth', 'Willem II', regex=True)


In [None]:
#Leagues to be cleaned:
df['league_from'].unique()

In [None]:
#creating a list with all unique club names in the data set in order to be able to browse through.
club_names = set(df['club_from'].unique().tolist() + df['club_to'].unique().tolist())
#Search for any club
[i for i in filter(lambda x: "salamanca" in x.lower(), sorted(club_names))]

### Eredivisie

In [None]:
nl_map = {
    "Ajax youth": "Ajax Amsterdam",
    "Ajax Amateurs": "Ajax Amsterdam",
    "Ajax": "Ajax Amsterdam",
    "PSV youth": "PSV Eindhoven",
    "SC Feyenoord": "Feyenoord Rotterdam",
    "Feyenoord": "Feyenoord Rotterdam",
    "Feyenoord youth": "Feyenoord Rotterdam",
    "Vitesse": "Vitesse Arnhem",
    "Vitesse youth": "Vitesse Arnhem",
    "Utrecht youth": "FC Utrecht",
    "FC Twente": "Twente Enschede FC",
    "FC Twente youth": "Twente Enschede FC",
    "Twente FC": "Twente Enschede FC",
    "Groningen youth": "FC Groningen",
    "Heerenveen": "SC Heerenveen",
    "Heerenveen youth": "SC Heerenveen",
    "FC Den Haag": "ADO Den Haag",
    #"": "AZ Alkmaar",
    "FC Zwolle": "PEC Zwolle",
    "Heracles": "Heracles Almelo",
    "Heracles youth": "Heracles Almelo",
    #"": "NAC Breda",
    #"": "RKC Waalwijk",
    #"": "NEC Nijmegen",
    "Roda JC": "Roda JC Kerkrade",
    "Roda JC youth": "Roda JC Kerkrade",
    "SC Cambuur": "SC Cambuur-Leeuwarden",
    "SC Cambuur youth": "SC Cambuur-Leeuwarden",
    "Cambuur L.": "SC Cambuur-Leeuwarden",
    "Cambuur-Leeuwarden bvo": "SC Cambuur-Leeuwarden",
    "Go Ahead Eagles": "Go Ahead Eagles Deventer",
    "Willem IIyouth": "Willem II",
    "Willem II Tilburg": "Willem II",
    "Dordrecht youth": "FC Dordrecht",
    "Dordrecht'90": "FC Dordrecht",
    "Excelsior youth": "SBV Excelsior Rotterdam",
    "Excelsior": "SBV Excelsior Rotterdam",
    "De Graafschap": "De Graafschap Doetinchem",
    "Graafschap youth": "De Graafschap Doetinchem",
    #"": "VVV-Venlo",
    "Volendam youth": "FC Volendam",
    "Maastricht youth": "MVV Maastricht",
    #"": "Sparta Rotterdam",
    "Sittard youth": "Fortuna Sittard",
    "Den Bosch youth": "FC Den Bosch",
    "Roosendaal youth": "RBC Roosendaal",
    "Emmen": "FC Emmen",
    "Emmen youth": "FC Emmen",
    "H'veen/Emmen youth": "FC Emmen"
}

In [None]:
df["club_from"] = df["club_from"].replace(nl_map)
df["club_to"] = df["club_to"].replace(nl_map)

In [None]:
names = [i for i in df_copy["club_name"].loc[df_copy["league_name"] == "Eredivisie"].unique() if i not in nl_map.values()]
names.extend(list(nl_map.values()))
names = set(names)
league_nl = {k: "Eredivisie" for k in names}

### Ligue 1

In [None]:
#List all the club names from Ligue 1 to replace

replacementligue1 = []

lehavre = ['AC Le Havre', ['Le Havre youth', 'Le Havre youth']]
replacementligue1.append(lehavre)

bordeaux = ['G. Bordeaux', ['Bordeaux youth', 'FC Girondins Bordeaux', 'G. Bordeaux youth']]
replacementligue1.append(bordeaux)

sochaux = ['FC Sochaux', ['FC Sochaux youth', 'FC Sochaux-Montbéliard', 'FC Sochaux youth']]
replacementligue1.append(sochaux)

monaco = ['AS Monaco', ['Monaco', 'Monaco youth', 'Monaco youth']]
replacementligue1.append(monaco)

lens = ['Lens', ['RC Lens', 'RC Lens youth', 'RC Lensyouth']]
replacementligue1.append(lens)

marseille = ['Marseille', ['Marseille Yth', 'Marseille youth', 'Marseille youth', 'Olympique Marseille']]
replacementligue1.append(marseille)

montpellier=['Montpellier HSC',['HSC Montpellier youth', 'HSC Montpellier youth', 'Montpellier', 'Montpellier youth']]
replacementligue1.append(montpellier)

auxerre=['AJ Auxerre', ['AJ Auxerre youth', 'AJ Auxerreyouth']]
replacementligue1.append(auxerre)

nantes=['FC Nantes', ['FC Nantes youth', 'FC Nantes youth']]
replacementligue1.append(nantes)

metz=['FC Metz', ['FC Metz youth', 'FC Metz youth', 'Metz youth']]
replacementligue1.append(metz)

lyon=['Olympique Lyon', ['Olymp. Lyon youth', 'Olymp. Lyon youth']]
replacementligue1.append(lyon)

toulouse=['FC Toulouse',['FC Toulouse youth', 'Toulouse', 'Toulouse youth']]
replacementligue1.append(toulouse)

lille=['LOSC Lille', ['LOSC Lille youth', 'LOSC Lille youth']]
replacementligue1.append(lille)

toulon=['SC Toulon',['SC Toulon 83', 'SC Toulon Var', 'SC Toulon youth', 'Sp. Toulon Var', 'Sporting Club de Toulon et du Var']]
replacementligue1.append(toulon)

nimes=['Nîmes Olympique', ['Nîmes Ol. youth', 'Nîmesyouth']]
replacementligue1.append(nimes)

psg=['Paris SG',['Paris SG youth', 'Paris SG youth', 'Paris Saint-Germain']]
replacementligue1.append(psg)

caen=['SM Caen', ['SM Caen youth', 'SM Caen youth']]
replacementligue1.append(caen)

stetienne=["AS Saint-Étienne", ["Saint-Étienne", "St-Étienne youth", "Saint-Étienne youth"]]
replacementligue1.append(stetienne)

valenciennes=['US Valenciennes',['Union Sportive Valenciennes-Anzin Arronment', 'Valenciennes FC', 'Valenciennes youth', 'Valencienne youth']]
replacementligue1.append(valenciennes)

strasbourg=["R. Strasbourg",["RC Strasbourg Alsace", "R. Strasbourg youth", "Strasbourg youth"]]
replacementligue1.append(strasbourg)

martigues=['FC Martigues', ['Martigues youth']]
replacementligue1.append(martigues)

angers=['SCO Angers',['SCO Angers youth', 'SCO Angers youth']]
replacementligue1.append(angers)

cannes=['AS Cannes', ['AS Cannes youth', 'Cannes', 'Cannes youth']]
replacementligue1.append(cannes)

nice=['OGC Nice', ['OGC Niceyouth', "Nizza youth"]]
replacementligue1.append(nice)

rennes=['Stade Rennais', ['Stade Rennais FC', 'Rennes youth', 'Rennes youth']]
replacementligue1.append(rennes)

bastia=['SC Bastia', ['Bastia', 'SC Bastia youth', 'SC Bastia youth']]
replacementligue1.append(bastia)

guingamp=['EA Guingamp', ['EA Guingamp youth', 'EA Guingamp youth', 'Guingamp']]
replacementligue1.append(guingamp)

nancy=['AS Nancy', ['AS Nancy youth', 'AS Nancy-Lorraine', 'AS Nancy youth']]
replacementligue1.append(nancy)

chateauroux = ['LB Châteauroux',  ['LB Châteauroux youth']]
replacementligue1.append(chateauroux)

lorient=['FC Lorient', ['FC Lorient youth', 'Lorient youth']]
replacementligue1.append(lorient)

sedan=['CS Sedan', ['CS Sedan-Ardennes', 'CS Sedan youth']]
replacementligue1.append(sedan)

troyes=['Troyes',['Association Troyes Aube Champagne', 'ES Troyes AC', 'ES Troyes AC youth', 'ES Troyes ACyouth', 'ESTAC Troyes']]
replacementligue1.append(troyes)

ajaccio=['AC Ajaccio', ['AC Ajaccioyouth']]
replacementligue1.append(ajaccio)

lemans=['FC Le Mans', ['FC Le Mans youth', 'Le Mans FC']]
replacementligue1.append(lemans)

istres=['FC Istres',  ['FC Istres Ouest Provence', 'Istres FC', 'Istres FCyouth']]
replacementligue1.append(istres)

grenoble=['Grenoble', ['Grenoble Foot 38', 'Grenobleyouth']]
replacementligue1.append(grenoble)

boulogne=['US Boulogne', ['Boulogne youth']]
replacementligue1.append(boulogne)

brest=['Brest', ['Brestyouth', 'Stade Brest 29', 'Stade Brestois 29']]
replacementligue1.append(brest)

arles=['AC Arles',  ['AC Arles-Avignon', 'AC Arles youth']]
replacementligue1.append(arles)

evian=['Évian', ['FC Évian Thonon Gaillard', 'Thonon Évian', 'Thonon Évianyouth']]
replacementligue1.append(evian)

dijon=['Dijon',  ['Dijon FCO', 'Dijon youth', 'FCO Dijon youth']]
replacementligue1.append(dijon)

reims=['Stade Reims',  ['Stade Reims youth', 'Stade Reims youth']]
replacementligue1.append(reims)

g_ajaccio=['G. Ajaccio',  ['GFC Ajaccio', 'GFCO Ajaccio']]
replacementligue1.append(g_ajaccio)

amiens=['Amiens SC', ['SC Amiens', 'SC Amiens youth', 'SC Amiens youth']]
replacementligue1.append(amiens)

In [None]:
# Replace names
for i in replacementligue1:
    for k in i[1]:
        df['club_to'] = df['club_to'].replace(str(k), str(i[0]))
        df['club_from'] = df['club_from'].replace(str(k), str(i[0]))

In [None]:
league_fr = {i[0]: "Ligue 1" for i in replacementligue1}

### La Liga

In [None]:
es_map = {
    "Albacete": "Albacete Balompié",
    "Albacete youth": "Albacete Balompié",
    "Bilbao Athletic": "Athletic Bilbao",
    "Atlético Madrid": "Atlético de Madrid",
    "Burgos CF": "Real Burgos CF",
    "Osasuna youth": "CA Osasuna",
    "Leganés youth": "CD Leganés",
    "CD Logroñés ()": "UD Logroñés",
    "CD Logroñés": "UD Logroñés",
    "Logroñés CF": "UD Logroñés",
    "Logroñés youth": "UD Logroñés",
    "Tenerife youth": "CD Tenerife",
    "CF Extremadura ()": "CF Extremadura",
    "Extremadura": "CF Extremadura",
    "Extremadura youth": "CF Extremadura",
    "CP Mérida ()": "Mérida UD",
    "CP Mérida": "Mérida UD",
    "Celta youth": "Celta de Vigo",
    "Celta Vigo youth": "Celta de Vigo",
    "Cádiz youth": "Cádiz CF youth",
    "Depor. Alavés youth": "Deportivo Alavés",
    "Alavés youth": "Deportivo Alavés",
    "Alavés": "Deportivo Alavés",
    "Dep. La Coruña": "Deportivo de La Coruña",
    "Barcelona": "FC Barcelona",
    "Barcelona youth": "FC Barcelona",
    "Getafe youth": "Getafe CF",
    "Getafe": "Getafe CF",
    "Girona": "Girona FC",
    "Levante": "Levante UD",
    "Levante youth": "Levante UD",
    "Málaga youth": "Málaga CF",
    "Mallorca youth": "RCD Mallorca",
    "Betis youth": "Real Betis Balompié",
    "Real Betis": "Real Betis Balompié",
    "Real Murcia": "Real Murcia CF",
    "Oviedo youth": "Real Oviedo",
    "R. Sociedad youth": "Real Sociedad",
    "R. Valladolid youth": "Real Valladolid CF",
    "Real Valladolid": "Real Valladolid CF",
    "Valladolid youth": "Real Valladolid CF",
    "Recr. Huelva": "Recreativo Huelva",
    "Recr. Huelva youth": "Recreativo Huelva",
    "Compostela youth": "SD Compostela",
    "Sevilla youth": "Sevilla FC",
    "Las Palmas youth": "UD Las Palmas",
    "Las Palmas Atl.": "UD Las Palmas",
    "UD Salamanca ()": "UD Salamanca",
    "Salamanca CF": "UD Salamanca",
    "Salamanca youth": "UD Salamanca",
    "UE Lleida ()": "UE Lleida",
    "Lleida Esportiu": "UE Lleida",
    "Valencia": "Valencia CF",
    "Valencia youth": "Valencia CF",
    "Villarreal": "Villarreal CF",
    "Villarreal youth": "Villarreal CF",
    "FC Villarreal youth": "Villarreal CF"
}

In [None]:
df["club_from"] = df["club_from"].replace(es_map)
df["club_to"] = df["club_to"].replace(es_map)

In [None]:
names = [i for i in df_copy["club_name"].loc[df_copy["league_name"] == "Primera Division"].unique() if i not in es_map.values()]
names.extend(list(es_map.values()))
names = set(names)
league_es = {k: "Primera Division" for k in names}

### Premier League

In [None]:
#List all the club names from the Premier League to replace

replacementpremierleague = []

oldham_athletic = ['Oldham Athletic', ['Oldham youth']]
replacementpremierleague.append(oldham_athletic)

chelsea = ['Chelsea',['Chelsea FC', 'Chelsea youth']]
replacementpremierleague.append(chelsea)

arsenal = ['Arsenal',['Arsenal FC youth', 'Arsenal FC', 'Arsenal youth']]
replacementpremierleague.append(arsenal)

everton = ['Everton',['Everton youth', 'Everton FC']]
replacementpremierleague.append(everton)

liverpool=['Liverpool',['Liverpool FC', 'Liverpool youth']]
replacementpremierleague.append(liverpool)

tottenham=['Tottenham Hotspurs',['Spurs', 'Tottenham youth', 'Tottenham Hotspur']]
replacementpremierleague.append(tottenham)

manutd=['Man Utd',['Manchester United', 'Man Utd youth']]
replacementpremierleague.append(manutd)

mancity=['Man City',['Man City youth', 'Manchester City']]
replacementpremierleague.append(mancity)

wimbledon=['Wimbledon',['AFC Wimbledon', 'Wimbledon FC']]
replacementpremierleague.append(wimbledon)

sheffield_utd=['Sheff Utd',['Sheff Utd youth', 'Sheffield United', 'Sheffield Utd.']]
replacementpremierleague.append(sheffield_utd)

sheffield_wed=['Sheff Wed',['Sheff Wed youth', 'Sheffield Wednesday']]
replacementpremierleague.append(sheffield_wed)

swindon=['Swindon Town',['Swindon youth']]
replacementpremierleague.append(swindon)

bradford=['Bradford',['Bradford City', 'Bradford youth']]
replacementpremierleague.append(bradford)

In [None]:
#Replace the names
for i in replacementpremierleague:
    for k in i[1]:
        df['club_to'] = df['club_to'].replace(str(k), str(i[0]))
        df['club_from'] = df['club_from'].replace(str(k), str(i[0]))

In [None]:
league_en = {i[0]: "Premier League" for i in replacementpremierleague}

### Championship (English League 2)

In [None]:
#List all the club names from the Championship to replace

replacementschampionship=[]
wigan=['Wigan Athletic',['Wigan', 'Wigan youth']]
replacementschampionship.append(wigan)

reading=['Reading FC',['Reading', 'Reading youth']]
replacementschampionship.append(reading)

qpr=['Queens Park Rangers',['QPR', 'QPR youth']]
replacementschampionship.append(qpr)

watford=['Watford FC',['Watford', 'Watford youth']]
replacementschampionship.append(watford)

brighton=['Brighton & Hove Albion',['Brighton', 'Brighton youth']]
replacementschampionship.append(brighton)

leicester=['Leicester City',['Leicester', 'Leicester youth']]
replacementschampionship.append(leicester)

bolton=['Bolton Wanderers',['Bolton', 'Bolton youth']]
replacementschampionship.append(bolton)

nottingham=['Nottingham Forest',['Nottingham youth', 'Nottm Forest']]
replacementschampionship.append(nottingham)

charlton=['Charlton Athletic',['Charlton youth', 'Charlton']]
replacementschampionship.append(charlton)

derby=['Derby County',['Derby', 'Derby youth']]
replacementschampionship.append(derby)

burnley=['Burnley FC',['Burnley', 'Burnley youth']]
replacementschampionship.append(burnley)

birmingham=['Birmingham City',['Birmingham', 'Birmingham youth']]
replacementschampionship.append(birmingham)

leeds=['Leeds United',['Leeds', 'Leeds youth']]
replacementschampionship.append(leeds)

ipswich=['Ipswich Tow',['Ipswich youth', 'Ipswich']]
replacementschampionship.append(ipswich)

blackpool=['Blackpool FC',['Blackpool', 'Blackpool youth']]
replacementschampionship.append(blackpool)

mbrough=['Middlesbrough FC',['Middlesbrough']]
replacementschampionship.append(mbrough)

blackburn=['blackburn Rovers',['Blackburn', 'Blackburn youth']]
replacementschampionship.append(blackburn)

huddersfield=['Huddersfield Town',['Huddersfieldyouth', 'Huddersf. youth', 'Huddersfield']]
replacementschampionship.append(huddersfield)

millwall=['Millwall FC',['Millwall', 'Millwall youth']]
replacementschampionship.append(millwall)

barnsley=['Barnsley FC',['Barnsley youth']]
replacementschampionship.append(barnsley)

doncaster=['Doncaster Rovers',['Doncaster youth', 'Doncaster']]
replacementschampionship.append(doncaster)

bmouth=['AFC Bournemouth',['Bournemouth youth', 'Bournemouth']]
replacementschampionship.append(bmouth)

yeovil=['Yeovil Town',['Yeovil youth']]
replacementschampionship.append(yeovil)

brentford=['Brentford FC',['Brentfordyouth', 'Brentford youth', 'Brentford']]
replacementschampionship.append(brentford)

cardiff=['Cardiff City',['Cardiff youth', 'Cardiff']]
replacementschampionship.append(cardiff)

fulham=['Fulham FC',['Fulham youth', 'Fulham']]
replacementschampionship.append(fulham)

norwich=['Norwich City',['Norwich youth', 'Norwich']]
replacementschampionship.append(norwich)

rotherham=['Rotherham United',['Rotherham youth', 'Rotherham']]
replacementschampionship.append(rotherham)

wolves=['Wolverhampton Wanderers',['Wolves youth', 'Wolves', 'Wolves FC']]
replacementschampionship.append(wolves)

hull=['Hull City',['Hull youth']]
replacementschampionship.append(hull)

bristol=['Bristol City',['Bristol C youth', 'Bristol City youth', 'Bristol youth']]
replacementschampionship.append(bristol)

mkdons=['Milton Keynes Dons',['MK Dons youth', 'MK Dons']]
replacementschampionship.append(mkdons)

preston=['Preston North End',['Preston NE', 'Preston youth', 'Preston']]
replacementschampionship.append(preston)

crystal=['Crystal Palace',['Palace youth']]
replacementschampionship.append(crystal)

pborough=['Peterborough United',['Peterborough']]
replacementschampionship.append(pborough)

westbrom=['West Bromwich Albion',['West Brom youth', 'West Brom']]
replacementschampionship.append(westbrom)

swansea=['Swansea City',['Swansea youth', 'Swansea']]
replacementschampionship.append(swansea)

stoke=['Stoke City',['Stoke youth']]
replacementschampionship.append(stoke)

luton=['Luton Town',['Luton youth', 'Luton']]
replacementschampionship.append(luton)

sheffield=['Sheffield United',['Sheffield Utd.']]
replacementschampionship.append(sheffield)

plymouth=['Plymouth Argyle',['Plymouth youth', 'Plymouth']]
replacementschampionship.append(plymouth)

southampton=['Southampton FC',['Southampton youth', 'Southampton', 'Southamptonyouth']]
replacementschampionship.append(southampton)

coventry=['Coventry City',['Coventry youth', 'Coventry']]
replacementschampionship.append(coventry)

sunderland=['Sunderland AFC',['Sunderland youth', 'Sunderland']]
replacementschampionship.append(sunderland)

southend=['Southend United',['Southend youth']]
replacementschampionship.append(southend)

colchester=['Colchester United',['Colchester Utd.', 'Colchester youth']]
replacementschampionship.append(colchester)

scunthorpe=['Scunthorpe United',['Scunthorpe Utd.', 'Scunthorpe youth']]
replacementschampionship.append(scunthorpe)

newcastle=['Newcastle United',['Newcastle youth', 'Newcastle']]
replacementschampionship.append(newcastle)

aston=['Aston Villa',['Aston Villa youth']]
replacementschampionship.append(aston)

burton=['Burton Albion',['Burton youth']]
replacementschampionship.append(burton)

portsmouth=['Portsmouth FC',['Portsmouth youth', 'Portsmouth']]
replacementschampionship.append(portsmouth)

wycombe=['Wycombe Wanderers',['Wycombe', 'Test']]
replacementschampionship.append(wycombe)

westham=['West Ham United',['West Ham youth', 'West Ham']]
replacementschampionship.append(westham)

crewe=['Crewe Alexandra',['Crewe youth', 'Test']]
replacementschampionship.append(crewe)

gillingham=['Gillingham FC',['Gillingham youth']]
replacementschampionship.append(gillingham)

In [None]:
#Replace the names

for i in replacementschampionship:
    for k in i[1]:
        df['club_to'] = df['club_to'].replace(str(k), str(i[0]))
        df['club_from'] = df['club_from'].replace(str(k), str(i[0]))

In [None]:
league_en_2 = {i[0]: "Championship" for i in replacementschampionship}

### Serie A

In [None]:
#List all the club names from Serie A to replace

replacementsseriea=[]
acmilan=['AC Milan',['AC Milan Youth', 'AC Milan youth', 'Milan', 'Milan youth', 'Milan AC']]
replacementsseriea.append(acmilan)

atalanta=['Atalanta BC',['Atalanta', 'Atalanta youth']]
replacementsseriea.append(atalanta)

chievo=['Chievo Verona',['Chievo youth', 'Chievo Youth', 'Verona', 'Verona youth']]
replacementsseriea.append(chievo)

bologna=['Bologna FC 1909',['Bologna youth', 'Bologna']]
replacementsseriea.append(bologna)

sampdoria=['UC Sampdoria',['Sampdoria', 'Sampdoria youth', 'Genoa', 'Genoa youth']]
replacementsseriea.append(sampdoria)

livorno=['AS Livorno',['Livorno youth']]
replacementsseriea.append(livorno)

cagliari=['Cagliari Calcio',['Cagliari youth', 'Cagliari Youth']]
replacementsseriea.append(cagliari)

catania=['Calcio Catania',['Catania', 'Catania youth']]
replacementsseriea.append(catania)

napoli=['SSC Napoli',['Napoli', 'Napoli youth']]
replacementsseriea.append(napoli)

juventus=['Juventus FC',['Juventus', 'Juventus youth']]
replacementsseriea.append(juventus)

fiorentina=['ACF Fiorentina',['AC Fiorentina', 'Fiorentina', 'Fiorentina youth', 'Florentia']]
replacementsseriea.append(fiorentina)

asroma=['AS Roma',['Roma youth', 'AS Rom Youth', 'AS Rom youth']]
replacementsseriea.append(asroma)

intermailand=['FC Internazionale',['FC Inter', 'Inter Milan', 'Inter youth', 'Inter-2005 M.']]
replacementsseriea.append(intermailand)

parma=['Parma FC',['AC Parma', 'Parma', 'Parma youth', 'Parma Calcio 1913', 'Parma FC Youth']]
replacementsseriea.append(parma)

genoa=['Genoa CFC',['Genoa 1893', 'Genoa CFC Youth']]
replacementsseriea.append(genoa)

hellas=['Hellas Verona',['Hellas Sport']]
replacementsseriea.append(hellas)

lazio=['SS Lazio',['Lazio', 'Lazio youth', 'Lazio Youth']]
replacementsseriea.append(lazio)

udinese=['Udinese Calcio',['Udinese', 'Udinese youth']]
replacementsseriea.append(udinese)

torino=['Torino FC',['Torino', 'Torino youth', 'Torino Calcio', 'Torino FC Youth']]
replacementsseriea.append(torino)

sassuolo=['US Sassuolo',['Sassuolo', 'Sassuolo youth']]
replacementsseriea.append(sassuolo)

cesena=['AC Cesena',['Cesena', 'Cesena youth', 'RC Cesena']]
replacementsseriea.append(cesena)

empoli=['FC Empoli',['Empoli youth', 'Empoli FC Youth', 'Empoli FC youth']]
replacementsseriea.append(empoli)

palermo=['US Palermo',['Palermo', 'Palermo youth', 'US Palermo Yout']]
replacementsseriea.append(palermo)

carpi=['Carpi FC 1909',['Carpi', 'Carpi youth']]
replacementsseriea.append(carpi)

frosinone=['Frosinone Calcio',['Frosinone']]
replacementsseriea.append(frosinone)

siena=['AC Siena',['Siena', 'Siena youth']]
replacementsseriea.append(siena)

pescara=['Delfino Pescara 1936',['Pescara', 'Pescara youth', 'Pescara Youth']]
replacementsseriea.append(pescara)

reggiana=['AC Reggiana',['Reggiana', 'Reggiana youth', 'Reggiana Giovani']]
replacementsseriea.append(reggiana)

cremonese=['US Cremonese',['Cremonese', 'Cremonese youth', 'Cremonese Giov.']]
replacementsseriea.append(cremonese)

piacenza=['Piacenza FC',['Lupa Piacenza', 'Piacenza youth', 'Piacenza']]
replacementsseriea.append(piacenza)

foggia=['Foggia Calcio',['Foggia', 'Foggia youth']]
replacementsseriea.append(foggia)

lecce=['US Lecce',['Lecce youth', 'Lecce']]
replacementsseriea.append(lecce)

padova=['Calcio Padova',['Padova', 'Padova youth']]
replacementsseriea.append(padova)

bari=['AS Bari',['Bari', 'Bari youth']]
replacementsseriea.append(bari)

brescia=['Brescia Calcio',['Brescia youth', 'Brescia']]
replacementsseriea.append(brescia)

vicenza=['Vicenza Calcio',['LR Vicenza', 'Vicenza', 'Vicenza youth']]
replacementsseriea.append(vicenza)

ancona=['Ancona Calcio',['Ancona', 'Ancona Matelica', 'Ancona Youth']]
replacementsseriea.append(ancona)

reggina=['Reggina Calcio',['Reggina', 'Reggina youth']]
replacementsseriea.append(reggina)

acvenezia=['AC Venezia 1907',['AC Venezia 1907']]
replacementsseriea.append(acvenezia)

perugia=['AC Perugia',['Perugia youth', 'Perugia']]
replacementsseriea.append(perugia)

messina=['FC Messina Peloro',['ACR Messina', 'ACR Messina You', 'FC Messina', 'Messina', 'Messina youth', 'Messina Peloro']]
replacementsseriea.append(messina)

ascoli=['Ascoli Calcio 1898',['Ascoli youth', 'Ascoli']]
replacementsseriea.append(ascoli)

benevento=['Benevento Calcio',['Benevento youth', 'Benevento']]
replacementsseriea.append(benevento)

crotone=['FC Crotone',['Crotone youth', 'Crotone']]
replacementsseriea.append(crotone)

spal=['SPAL 2013',['SPAL youth', 'SPAL']]
replacementsseriea.append(spal)

spezia=['Spezia Calcio',['Spezia youth', 'Spezia']]
replacementsseriea.append(spezia)

salernitana=['US Salernitana 1919',['US Salernitana', 'Salernitana youth', 'Salernitana']]
replacementsseriea.append(salernitana)

veneziafc=['Venezia FC',['SSC Venezia', 'Unione Venezia', 'Venezia', 'Venezia youth']]
replacementsseriea.append(veneziafc)

novara=['Novara Calcio 1908',['Novara']]
replacementsseriea.append(novara)

treviso=['ACD Treviso',['Treviso', 'Treviso youth']]
replacementsseriea.append(treviso)

como=['Como Calcio',['Como youth', 'Como']]
replacementsseriea.append(como)

modena=['Modena FC',['Modena youth', 'Modena']]
replacementsseriea.append(modena)

In [None]:
#Replace the names

for i in replacementsseriea:
    for k in i[1]:
        df['club_to'] = df['club_to'].replace(str(k), str(i[0]))
        df['club_from'] = df['club_from'].replace(str(k), str(i[0]))

In [None]:
league_it = {i[0]: "Serie A" for i in replacementsseriea}

### 1. Bundesliga

In [None]:
#List all the club names from the Bundesliga to replace

replacementsbundesliga=[]
bayern=['Bayern Munich',['FC Bayern', 'Bayern Munich youth']]
replacementsbundesliga.append(bayern)

bvb=['Borussia Dortmund',['B. Dortmund youth', 'Bor. Dortmund']]
replacementsbundesliga.append(bvb)

leverkusen=['Bayer 04 Leverkusen',['Bay. Leverkusen', 'Leverkusen youth']]
replacementsbundesliga.append(leverkusen)

schalke=['FC Schalke 04',['Schalke 04 youth']]
replacementsbundesliga.append(schalke)

freiburg=['SC Freiburg',['SC Freiburg youth']]
replacementsbundesliga.append(freiburg)

frankfurt=['Eintracht Frankfurt',['E. Frankfurt', 'E. Frankfurt youth']]
replacementsbundesliga.append(frankfurt)

hsv=['Hamburger SV',['Hamburger SV youth', 'Hamburg youth']]
replacementsbundesliga.append(hsv)

gladbach=['Borussia Mönchengladbach',["M'gladbach youth", "Bor. M'gladbach"]]
replacementsbundesliga.append(gladbach)

hannover=['Hannover 96',['Hannover 96 youth']]
replacementsbundesliga.append(hannover)

nuernberg=['1.FC Nuremberg',['Nuremberg youth', 'Nürnberg youth']]
replacementsbundesliga.append(nuernberg)

wolfsburg=['VfL Wolfsburg',['Wolfsburg youth']]
replacementsbundesliga.append(wolfsburg)

stuttgart=['VfB Stuttgart',['Stuttgart youth']]
replacementsbundesliga.append(stuttgart)

mainz=['1.FSV Mainz 05',['FSV Mainz 05 youth', 'Mainz 05 youth']]
replacementsbundesliga.append(mainz)

bremen=['SV Werder Bremen',['Werder Bremen', 'W. Bremen youth']]
replacementsbundesliga.append(bremen)

augsburg=['FC Augsburg',['FC Augsburg youth']]
replacementsbundesliga.append(augsburg)

hoffenheim=['TSG 1899 Hoffenheim',['Hoffenheim youth', 'TSG Hoffenheim']]
replacementsbundesliga.append(hoffenheim)

hertha=['Hertha BSC',['Hertha BSC youth']]
replacementsbundesliga.append(hertha)

braunschweig=['Eintracht Braunschweig',['E. Braunschweig', 'Braunschweig youth']]
replacementsbundesliga.append(braunschweig)

koln=['1. FC Köln',['1.FC Köln youth']]
replacementsbundesliga.append(koln)

paderborn=['SC Paderborn 07',['Paderborn', 'Paderborn youth', 'SC Paderborn', 'SC Paderborn youth']]
replacementsbundesliga.append(paderborn)

ingolstadt=['FC Ingolstadt 04',['FC Ingolstadt', 'Ingolstadt youth']]
replacementsbundesliga.append(ingolstadt)

darmstadt=['SV Darmstadt 98',['Darmstadt youth']]
replacementsbundesliga.append(darmstadt)

furth=['SpVgg Greuther Fürth',['Gr. Fürth youth', 'Greuther Fürth', 'SpVgg Fürth']]
replacementsbundesliga.append(furth)

fortuna=['Fortuna Düsseldorf',['Fortuna youth', 'Düsseldorf youth', 'F. Düsseldorf', 'F. Düsseldorf youth']]
replacementsbundesliga.append(fortuna)

lautern=['1.FC Kaiserslautern',["1.FC K'lautern", "K'lautern youth"]]
replacementsbundesliga.append(lautern)

ddresden=['SG Dynamo Dresden',['Dynamo Dresden', 'D. Dresden youth']]
replacementsbundesliga.append(ddresden)

watten=['SG Wattenscheid 09',['Wattenscheid youth', 'Wattenscheid 09']]
replacementsbundesliga.append(watten)

duisburg=['MSV Duisburg',['Duisburg youth', 'MSV Duisburg youth']]
replacementsbundesliga.append(duisburg)

karlsruhe=['Karlsruher SC',['Karlsruhe youth']]
replacementsbundesliga.append(karlsruhe)

vfbleipzig=['VfB Leipzig',['VfB Leipzig youth', 'change']]
replacementsbundesliga.append(vfbleipzig)

uerdingen=['KFC Uerdingen 05',['Bay. Uerdingen', 'Bayer 05 Uerdingen', 'KFC Uerdingen', 'Uerdingen youth']]
replacementsbundesliga.append(uerdingen)

hansa=['FC Hansa Rostock',['H. Rostock youth', 'Hansa Rostock']]
replacementsbundesliga.append(hansa)

saarbrucken=['1.FC Saarbrücken',['Saarbrücken', 'Saarbrücken youth']]
replacementsbundesliga.append(saarbrucken)

cottbus=['FC Energie Cottbus',['Energie Cottbus', 'E. Cottbus youth']]
replacementsbundesliga.append(cottbus)

bielefeld=['Arminia Bielefeld',['A. Bielefeld youth', 'Arm. Bielefeld', 'Bielefeld youth']]
replacementsbundesliga.append(bielefeld)

aachen=['Alemannia Aachen',['A. Aachen youth', 'Aachen youth', 'Alem. Aachen']]
replacementsbundesliga.append(aachen)

unterhaching=['SpVgg Unterhaching',['Unterhaching', 'Unterhaching youth']]
replacementsbundesliga.append(unterhaching)

rbleipzig=['RB Leipzig',['RB Leipzig youth']]
replacementsbundesliga.append(rbleipzig)

unionberlin=['1.FC Union Berlin',['Union Berlin', 'U. Berlin youth']]
replacementsbundesliga.append(unionberlin)

ulm=['SSV Ulm 1846',['SSV Ulm 1846 youth']]
replacementsbundesliga.append(ulm)

bochum=['VfL Bochum',['VfL Bochum youth', 'change']]
replacementsbundesliga.append(bochum)

sechziger=['TSV 1860 Munich',['1860 Munich', '1860 München youth', 'TSV 1860 youth']]
replacementsbundesliga.append(sechziger)

pauli=['FC St. Pauli',['FC St. Pauli youth', 'St. Pauli youth']]
replacementsbundesliga.append(pauli)

In [None]:
#Replace the names

for i in replacementsbundesliga:
    for k in i[1]:
        df['club_to'] = df['club_to'].replace(str(k), str(i[0]))
        df['club_from'] = df['club_from'].replace(str(k), str(i[0]))

In [None]:
league_de = {i[0]: "1 Bundesliga" for i in replacementsbundesliga}

### Liga NOS (Portugal)

In [None]:
#List all the club names from the Liga Nos to replace

replacementsliganos=[]
porto=['FC Porto',['FC Porto youth', 'FC Portoyouth' ]]
replacementsliganos.append(porto)

benfica=['SL Benfica',['Futebol Benfica', 'Benfica youth', 'Benfica youth' ]]
replacementsliganos.append(benfica)

sporting=['Sporting CP',['Sporting CP youth']]
replacementsliganos.append(sporting)

boavista=['Boavista FC',['Boavista youth', 'Boavistayouth']]
replacementsliganos.append(boavista)

marítimo=['CS Marítimo',['Marítimo','Marítimo youth', 'Marítimoyouth']]
replacementsliganos.append(marítimo)

farense=['SC Farense',['Farense', 'Farense youth']]
replacementsliganos.append(farense)

belenenses=['CF Belenenses',['Belenenses SAD', 'Belenenses youth']]
replacementsliganos.append(belenenses)

gilvicente=['Gil Vicente FC',['Gil Vicente', 'Gil Vicente youth']]
replacementsliganos.append(gilvicente)

beiramar=['CSC Beira-Mar',['Beira-Mar', 'Beira-Mar youth']]
replacementsliganos.append(beiramar)

paços=['FC Paços de Ferreira',['Paços Ferreira','Paços F. youth', 'Paços F.youth', 'Paços de F. youth']]
replacementsliganos.append(paços)

guimarães=['Vitória Guimarães SC',['Guimarães youth', 'Guimarãesyouth', 'Vit. Guimarães']]
replacementsliganos.append(guimarães)

salgueiros=['SC Salgueiros',['Salgueiros', 'Salgueiros 08', 'Salgueiros youth']]
replacementsliganos.append(salgueiros)

tirsense=['FC Tirsense',['Tirsense']]
replacementsliganos.append(tirsense)

espinho=['SC Espinho',['Espinho']]
replacementsliganos.append(espinho)

chaves=['GD Chaves',['Chaves', 'Chaves youth', 'Chavesyouth']]
replacementsliganos.append(chaves)

vitóriasetúbal=['Vitória Setúbal FC',['Vitória Setúbal', 'Setúbal youth', 'Setúbalyouth']]
replacementsliganos.append(vitóriasetúbal)

amadora=['CF Estrela Amadora SAD',['E. Amadora youth', 'Estrela Amadora']]
replacementsliganos.append(amadora)

leiria=['União de Leiria',['Leiria', 'Leiria Sub-17', 'Leiria youth']]
replacementsliganos.append(leiria)

madeira=['CF União Madeira',['U. Madeira youth', 'União Madeira']]
replacementsliganos.append(madeira)

leça=['Leça FC',['Leça']]
replacementsliganos.append(leça)

felgueiras=['FC Felgueiras 1932',['Felgueiras']]
replacementsliganos.append(felgueiras)

campomaiorense=['SC Campomaiorense',['Campomaiorense']]
replacementsliganos.append(campomaiorense)

rioave=['Rio Ave FC',['Rio Ave FC youth', 'Rio Ave youth', 'Rio Aveyouth']]
replacementsliganos.append(rioave)

coimbra=['União Coimbra',['Coimbra', 'Coimbrayouth', 'youthmica Coimbra']]
replacementsliganos.append(coimbra)

varzim=['varzim',['Varzim','Varzim SCyouth', 'Varzim youth']]
replacementsliganos.append(varzim)

alverca=['FC Alverca',['Alverca', 'Alverca youth']]
replacementsliganos.append(alverca)

clara=['CD Santa Clara',['Santa Clara', 'Santa Clara youth']]
replacementsliganos.append(clara)

aves=['Desportivo Aves',['Aves youth','Avesyouth']]
replacementsliganos.append(aves)

nacional=['CD Nacional',['Nacional','Nacional youth']]
replacementsliganos.append(nacional)

moreirense=['Moreirense FC',['Moreirense','Moreirense youth']]
replacementsliganos.append(moreirense)

penafiel=['FC Penafiel',['Penafiel', 'Penafiel youth']]
replacementsliganos.append(penafiel)

leixões=['Leixões SC',['Leixões', 'Leixões youth']]
replacementsliganos.append(leixões)

trofense=['CD Trofense',['Trofense']]
replacementsliganos.append(trofense)

olhanense=['SC Olhanense',['Olhanense', 'Olhanense youth']]
replacementsliganos.append(olhanense)

portimonense=['Portimonense SC',['Portimonense','Portimonense 23', 'Portimonense Futebol SAD']]
replacementsliganos.append(portimonense)

feirense=['CD Feirense',['Feirense', 'Feirense Sub-23', 'Feirense youth']]
replacementsliganos.append(feirense)

arouca=['FC Arouca',['Arouca', 'Arouca youth']]
replacementsliganos.append(arouca)

tondela=['CD Tondela',['Tondela', 'Tondela Sub-23', 'Tondela youth']]
replacementsliganos.append(tondela)

vizela=['FC Vizela',['FC Vizela youth', 'Vizela', 'Vizela Y19']]
replacementsliganos.append(vizela)

In [None]:
#Replace the names
for i in replacementsliganos:
    for k in i[1]:
        df['club_to'] = df['club_to'].replace(str(k), str(i[0]))
        df['club_from'] = df['club_from'].replace(str(k), str(i[0]))

In [None]:
league_pt = {i[0]: "Liga Nos" for i in replacementsliganos}

### Clean League names

In [None]:
league_map = {}
league_map.update(league_fr)
league_map.update(league_nl)
league_map.update(league_es)
league_map.update(league_en)
league_map.update(league_en_2)
league_map.update(league_it)
league_map.update(league_de)
league_map.update(league_pt)

In [None]:
df["league_from"] = df["club_from"].apply(lambda x: league_map[x] if x in league_map.keys() else "Other")
df["league_to"] = df["club_to"].apply(lambda x: league_map[x] if x in league_map.keys() else "Other")

In [None]:
df.drop_duplicates(subset=["player_name", "age", "club_from"], inplace=True)
df.reset_index(drop=True, inplace=True)

In [None]:
df.to_csv("leagues_cleaned.csv", index = False)

# Network Analysis

In [None]:
# Get saved DataFrame
#df = pd.read_csv("leagues_cleaned.csv")

# Change all English clubs to Premier League and exclude uncleaned leagues from analysis
df.loc[(df['league_from'] == "Championship") , 'league_from'] = "Premier League"
df.loc[(df['league_to'] == "Championship"), 'league_to'] = "Premier League"
df = df[(df["league_from"] != "Other") & (df["league_to"] != "Other")]

# Create dfs for each decade
df_92_02 = df[(df["year"] <= 2002)]
df_03_12 = df[(df["year"] > 2002) & (df["year"] <= 2012)]
df_13_today = df[(df["year"] > 2012) & (df["year"] <= 2022)]

In [None]:
# Define functions
def group_league_combinations(df_):
    df_grouped = df_.groupby(["league_from", "league_to"]).size().reset_index()
    df_grouped.columns = ['league_from', 'league_to', "count"]
    return df_grouped

def group_league_combinations_fees(df_):
    df_grouped = df_.groupby(["league_from", "league_to"]).sum().reset_index()
    df_grouped.columns = ['league_from', 'league_to', 'age', 'count', 'year']
    return df_grouped

def node_sizes_and_colors(nodes, leagues_within, size_multiplier):
    color_map = []
    node_sizes = []
    
    for league in nodes:
        if league == "Premier League":
                node_sizes.append(leagues_within.loc[leagues_within['league_from'] == league, 'count'].item() * size_multiplier)
                color_map.append("lightgreen")
        elif league == "Serie A":
            node_sizes.append(leagues_within.loc[leagues_within['league_from'] == league, 'count'].item() * size_multiplier)
            color_map.append("lightblue")
        elif league == "1 Bundesliga":
            node_sizes.append(leagues_within.loc[leagues_within['league_from'] == league, 'count'].item() * size_multiplier)
            color_map.append("red")
        elif league == "Ligue 1":
            node_sizes.append(leagues_within.loc[leagues_within['league_from'] == league, 'count'].item() * size_multiplier)
            color_map.append("green")
        elif league == "Primera Division":
            node_sizes.append(leagues_within.loc[leagues_within['league_from'] == league, 'count'].item() * size_multiplier)
            color_map.append("yellow")
        elif league == "Liga Nos":
            node_sizes.append(leagues_within.loc[leagues_within['league_from'] == league, 'count'].item() * size_multiplier)
            color_map.append("turquoise")
        elif league == "Eredivisie":
            node_sizes.append(leagues_within.loc[leagues_within['league_from'] == league, 'count'].item() * size_multiplier)
            color_map.append("grey")
    return color_map, node_sizes

### Research Question 1: Interactions Between Leagues

#### 1.1.1 Total

In [None]:
# Create dictionary containing DataFrames for each decade
dfs = {"Transfers Overall": df,
       "Transfers 1992 - 2002": df_92_02,
       "Transfers 2003 - 2012": df_03_12,
       "Transfers 2013 - 2021": df_13_today}

# Loop through dictionary and create graphs for all three decades
for name, df_ in dfs.items():
    # Reduce df to leagues
    df_grouped = group_league_combinations(df_)
    leagues_within = df_grouped[(df_grouped["league_from"] == df_grouped["league_to"])]
    leagues_between = df_grouped[(df_grouped["league_from"] != df_grouped["league_to"])]
    
    # Create graph
    G = nx.convert_matrix.from_pandas_edgelist(leagues_between, source = "league_from", target = "league_to", edge_attr = "count", create_using = nx.Graph())
    coords = nx.kamada_kawai_layout(G)
    edges = G.edges()
    nodes = G.nodes()

    # Loop through nodes and create colour and width of edges in accordance with number of transfers
    weights = [G[u][v]['count'] / 15 for u, v in edges]
    color_map, node_sizes = node_sizes_and_colors(nodes, leagues_within, 5)

    # Draw the graph and save it
    plt.figure(figsize = (25, 15))
    plt.title(name, fontdict = {'fontsize': 50})
    nx.draw(G, node_color = color_map, width = weights, with_labels = True, node_size = node_sizes,
            pos = coords, arrows = False, font_size = 15, edge_color = weights,
            edge_cmap = plt.cm.Blues)
    plt.savefig(f'output/leagues/{name}.png')

#### 1.1.2 In- and Outgoing

In [None]:
# Create dictionary containing DataFrames for each decade
dfs = {"Transfers Bidirectional Overall": df,
       "Transfers Bidirectional 1992 - 2002": df_92_02,
       "Transfers Bidirectional 2003 - 2012": df_03_12,
       "Transfers Bidirectional 2013 - 2021": df_13_today}

# Loop through dictionary and create graphs for all three decades
for name, df in dfs.items():
    # Reduce df to leagues
    df_grouped = group_league_combinations(df)
    leagues_within = df_grouped[(df_grouped["league_from"] == df_grouped["league_to"])]
    leagues_between = df_grouped[(df_grouped["league_from"] != df_grouped["league_to"])]
    
    # Create graph
    G = nx.convert_matrix.from_pandas_edgelist(leagues_between, source = "league_from", target = "league_to", edge_attr = "count", create_using = nx.DiGraph())
    coords = nx.kamada_kawai_layout(G)
    
    edges = G.edges()
    nodes = G.nodes()

    # Loop through nodes and create colour and width of edges in accordance with number of transfers
    weights = [G[u][v]['count'] / 25 for u, v in edges]
    color_map, node_sizes = node_sizes_and_colors(nodes, leagues_within, 4)

    # Draw the graph and save it
    plt.figure(figsize = (25, 15))
    plt.title(name, fontdict = {'fontsize': 50})
    nx.draw(G, node_color = color_map, width = weights, with_labels = True, node_size = node_sizes,
            pos = coords, arrows = True, font_size = 15, edge_color = weights, edge_cmap = plt.cm.Blues,
            connectionstyle = "arc3, rad = 0.3")
    plt.savefig(f'output/leagues/{name}.png')

### 1.2. Transfer Fees

#### 1.2.1. Total

In [None]:
# Create dictionary containing DataFrames for each decade
dfs = {"Transfer Fees Overall": df,
       "Transfer Fees 1992 - 2002": df_92_02,
       "Transfer Fees 2003 - 2012": df_03_12,
       "Transfer Fees 2013 - 2021": df_13_today}

# Loop through dictionary and create graphs for all three decades
for name, df_ in dfs.items():
    # Reduce df to leagues
    df_grouped = group_league_combinations_fees(df_)
    leagues_within = df_grouped[(df_grouped["league_from"] == df_grouped["league_to"])]
    leagues_between = df_grouped[(df_grouped["league_from"] != df_grouped["league_to"])]
    
    # Create graph
    G = nx.convert_matrix.from_pandas_edgelist(leagues_between, source = "league_from", target = "league_to", edge_attr = "count", create_using = nx.Graph())
    coords = nx.kamada_kawai_layout(G)
    edges = G.edges()
    nodes = G.nodes()

    # Loop through nodes and create colour and width of edges in accordance with number of transfers
    weights = [G[u][v]['count'] / 15 for u, v in edges]
    color_map, node_sizes = node_sizes_and_colors(nodes, leagues_within, 15)

    # Draw the graph and save it
    plt.figure(figsize = (25, 15))
    plt.title(name, fontdict = {'fontsize': 50})
    nx.draw(G, node_color = color_map, width = weights, with_labels = True, node_size = node_sizes,
            pos = coords, arrows = False, font_size = 15, edge_color = weights,
            edge_cmap = plt.cm.Blues)
    plt.savefig(f'output/leagues/{name}.png')

#### 1.2.2. Transfer Fees In- and Outgoing

In [None]:
# Create dictionary containing DataFrames for each decade
dfs = {"Transfer Fees Bidirectional Overall": df,
       "Transfer Fees Bidirectional 1992 - 2002": df_92_02,
       "Transfer Fees Bidirectional 2003 - 2012": df_03_12,
       "Transfer Fees Bidirectional 2013 - 2021": df_13_today}

# Loop through dictionary and create graphs for all three decades
for name, df_ in dfs.items():
    # Reduce df to leagues
    df_grouped = group_league_combinations_fees(df_)
    leagues_within = df_grouped[(df_grouped["league_from"] == df_grouped["league_to"])]
    leagues_between = df_grouped[(df_grouped["league_from"] != df_grouped["league_to"])]
    
    # Create graph
    G = nx.convert_matrix.from_pandas_edgelist(leagues_between, source = "league_from", target = "league_to", edge_attr = "count", create_using = nx.DiGraph())
    coords = nx.kamada_kawai_layout(G)
    
    edges = G.edges()
    nodes = G.nodes()

    # Loop through nodes and create colour and width of edges in accordance with number of transfers
    weights = [G[u][v]['count'] / 50 for u, v in edges]
    color_map, node_sizes = node_sizes_and_colors(nodes, leagues_within, 10)

    # Draw the graph and save it
    plt.figure(figsize = (25, 15))
    plt.title(name, fontdict = {'fontsize': 50})
    nx.draw(G, node_color = color_map, width = weights, with_labels = True, node_size = node_sizes,
            pos = coords, arrows = True, font_size = 15, edge_color = weights, edge_cmap = plt.cm.Blues,
            connectionstyle = "arc3, rad = 0.3")
    plt.savefig(f'output/leagues/{name}.png')

### Research Question 2: Club clusters

In [None]:
df = pd.read_csv("leagues_cleaned.csv")

In [None]:
dfclubsnational = df.copy()

In [None]:
dfclubsperyearnational = {"1992 - 2002": dfclubsnational[(dfclubsnational["year"] <= 2002)],
       "2003 - 2012": dfclubsnational[(dfclubsnational["year"] > 2002) & (dfclubsnational["year"] <= 2012)],
       "2013 - 2021": dfclubsnational[(dfclubsnational["year"] > 2012) & (dfclubsnational["year"] <= 2022)]}

In [None]:
for name, dfclubsnational in dfclubsperyearnational.items():
    dfclubnational = pd.DataFrame()
    dfclubnational = dfclubsnational.groupby(["club_from", "club_to"])[["club_from"]].count().dropna()
    dfclubnational = dfclubnational.rename(columns={"club_from": "num_transfers"})
    dfclubnational = dfclubnational.reset_index()
    dfclubnational = dfclubnational[dfclubnational["club_from"] != dfclubnational["club_to"]]
    dfclubnational = dfclubnational[dfclubnational["num_transfers"] >= 15]
    G=nx.from_pandas_edgelist(dfclubnational, 'club_from', 'club_to', edge_attr='num_transfers')
    labels = [i for i in dict(G.nodes).keys()]
    labels = {i:i for i in dict(G.nodes).keys()}
    fig, ax = plt.subplots(figsize=(20,10))
    pos = nx.spring_layout(G, k = 0.4)
    nx.draw_networkx_nodes(G, pos, ax = ax, node_size = 100, node_color = 'red')
    nx.draw_networkx_edges(G, pos, ax=ax, edge_color= 'red')
    plt.title('Club interactions ' + str(name), fontdict = {'fontsize': 50})
    _ = nx.draw_networkx_labels(G, pos, labels, ax=ax, font_size = 15, font_color = 'black')
    plt.savefig(f'output/clubs/Clubs interactions {name}.png')

    

In [None]:
dfclubsinternational = df.copy()

In [None]:
dfclubsperyearinternational = {"1992 - 2002": dfclubsinternational[(dfclubsinternational["year"] <= 2002)],
       "2003 - 2012": dfclubsinternational[(dfclubsinternational["year"] > 2002) & (dfclubsinternational["year"] <= 2012)],
       "2013 - 2021": dfclubsinternational[(dfclubsinternational["year"] > 2012) & (dfclubsinternational["year"] <= 2022)]}

In [None]:
for name, dfclubsinternational in dfclubsperyearinternational.items():
    dfclub = pd.DataFrame()
    dfclub = dfclubsinternational[dfclubsinternational["league_from"] != dfclubsinternational["league_to"]]
    dfclub = dfclub.groupby(["club_from", "club_to"])[["club_from"]].count().dropna()
    dfclub = dfclub.rename(columns={"club_from": "num_transfers"})
    dfclub = dfclub.reset_index()
    dfclub = dfclub[dfclub["club_from"] != dfclub["club_to"]]
    dfclub = dfclub[dfclub["num_transfers"] >= 7]
    G=nx.from_pandas_edgelist(dfclub, 'club_from', 'club_to', edge_attr='num_transfers')
    labels = [i for i in dict(G.nodes).keys()]
    labels = {i:i for i in dict(G.nodes).keys()}
    fig, ax = plt.subplots(figsize=(20,10))
    pos = nx.spring_layout(G, k = 0.5)
    nx.draw_networkx_nodes(G, pos, ax = ax, node_size = 100, node_color = 'red')
    nx.draw_networkx_edges(G, pos, ax=ax, edge_color= 'red')
    plt.title('Club interactions non-national ' + str(name), fontdict = {'fontsize': 50})
    _ = nx.draw_networkx_labels(G, pos, labels, ax=ax, font_size = 20, font_color="black")
    plt.savefig(f'output/clubs/Clubs interactions non-national {name}.png')


## Analysis of player development contribution on club level in specific leagues 

### Filter dataframe

In [None]:
# Filter league
df = pd.read_csv("leagues_cleaned.csv")
df_bundesliga = df[df.league_to.isin(["1 Bundesliga"]) & df.league_from.isin(["1 Bundesliga"])]
df_serieA = df[df.league_to.isin(["Serie A"]) & df.league_from.isin(["Serie A"])]
df_prem = df[df.league_to.isin(["Premier League"]) & df.league_from.isin(["Premier League"])]

In [None]:
# filter year 
df_bund_1 = df_bundesliga[(df_bundesliga['year'] >= 1992) & (df_bundesliga['year'] <= 2002)]
df_bund_2 = df_bundesliga[(df_bundesliga['year'] >= 2003) & (df_bundesliga['year'] <= 2012)]
df_bund_3 = df_bundesliga[(df_bundesliga['year'] >= 2013) & (df_bundesliga['year'] <= 2021)]

df_serie_1 = df_serieA[(df_serieA['year'] >= 1992) & (df_serieA['year'] <= 2002)]
df_serie_2 = df_serieA[(df_serieA['year'] >= 2003) & (df_serieA['year'] <= 2012)]
df_serie_3 = df_serieA[(df_serieA['year'] >= 2013) & (df_serieA['year'] <= 2021)]

df_prem_1 = df_prem[(df_prem['year'] >= 1992) & (df_prem['year'] <= 2002)]
df_prem_2 = df_prem[(df_prem['year'] >= 2003) & (df_prem['year'] <= 2012)]
df_prem_3 = df_prem[(df_prem['year'] >= 2013) & (df_prem['year'] <= 2021)]

In [None]:
# filter age <= 19
df_bund_age_1 = df_bund_1[(df_bund_1['age'] <= 19)]
df_bund_age_2 = df_bund_2[(df_bund_2['age'] <= 19)]
df_bund_age_3 = df_bund_3[(df_bund_3['age'] <= 19)]

df_serie_age_1 = df_serie_1[(df_serie_1['age'] <= 19)]
df_serie_age_2 = df_serie_2[(df_serie_2['age'] <= 19)]
df_serie_age_3 = df_serie_3[(df_serie_3['age'] <= 19)]

df_prem_age_1 = df_prem_1[(df_prem_1['age'] <= 19)]
df_prem_age_2 = df_prem_2[(df_prem_2['age'] <= 19)]
df_prem_age_3 = df_prem_3[(df_prem_3['age'] <= 19)]

In [None]:
# filter age >= 20 age <= 24
df_bund_age_4 = df_bund_1[(df_bund_1['age'] >= 20) & (df_bund_1['age'] <= 24)]
df_bund_age_5 = df_bund_2[(df_bund_2['age'] >= 20) & (df_bund_2['age'] <= 24)]
df_bund_age_6 = df_bund_3[(df_bund_3['age'] >= 20) & (df_bund_3['age'] <= 24)]

df_serie_age_4 = df_serie_1[(df_serie_1['age'] >= 20) & (df_serie_1['age'] <= 24)]
df_serie_age_5 = df_serie_2[(df_serie_2['age'] >= 20) & (df_serie_2['age'] <= 24)]
df_serie_age_6 = df_serie_3[(df_serie_3['age'] >= 20) & (df_serie_3['age'] <= 24)]

df_prem_age_4 = df_prem_1[(df_prem_1['age'] >= 20) & (df_prem_1['age'] <= 24)]
df_prem_age_5 = df_prem_2[(df_prem_2['age'] >= 20) & (df_prem_2['age'] <= 24)]
df_prem_age_6 = df_prem_3[(df_prem_3['age'] >= 20) & (df_prem_3['age'] <= 24)]

### Create Network for the Bundesliga with the out_degree of the nodes

In [None]:
#coords = nx.spring_layout(net_bundesliga_age,scale=0.5, k=1,seed=675)
df_list = [df_bund_age_4,df_bund_age_5,df_bund_age_6]

plot_titles = ["Traded players in the Bundesliga (out_degree) \n aged between 20 and 24 years for the Season 1992 - 2002",
               "Traded players in the Bundesliga (out_degree) \n aged between 20 and 24 years for the Season 2003 - 2012",
               "Traded players in the Bundesliga (out_degree) \n aged between 20 and 24 years for the Season 2013 - 2021"]

file_names = ["bund_outdegree_2002","bund_outdegree_2012","bund_outdegree_2021"]
#subplot = 1
#figure = plt.figure(figsize=(35, 17))

i = 0

for d in df_list:
    
    #plt.subplot(1, 2, subplot) 
    plot_title = plot_titles[i]
    figure = plt.figure(figsize=(18, 23))
    plt.title(plot_title,fontsize = 20,fontweight=800)
    
    d = d
    G = nx.DiGraph()
    
    # add edges to empty graph
    G.add_edges_from(list(zip(d.club_from,d.club_to)))
    
    coords = nx.kamada_kawai_layout(G)
    G.remove_edges_from(nx.selfloop_edges(G))
    
    plot_title = plot_titles[i]

    #plt.title("Traded players in the Bundesliga (out_degree) \n aged between 20 and 24 years for the Season 2013 - 2021",fontsize = 20,fontweight=800)
    nx.draw(
        G, 
        pos = coords,
        with_labels = True,
        node_size = [degree * 100 for degree in dict(G.out_degree).values()],
        verticalalignment='center',
        horizontalalignment = "center",
        arrowsize=10,
        cmap="RdYlBu_r",
        node_color = [degree for degree in dict(G.out_degree).values()],
        edge_color = "lightgray"
    )

    max_color = (max(dict(G.out_degree).values())) + 3
    sm = plt.cm.ScalarMappable(cmap="RdYlBu_r",norm=plt.Normalize(vmin=0, vmax=max_color))
    cbar = plt.colorbar(sm,shrink=0.4,orientation="horizontal",pad=0.001)
    cbar.set_label('Out_Degree of Nodes')
    
    file_name  = file_names[i]
    plt.savefig(f"output/youth/{file_name}.png")
    i = i + 1
#figure.tight_layout(pad=0)

### Create Network for the Bundesliga with the in_degree of the nodes

In [None]:
df_list = [df_bund_age_1,df_bund_age_2,df_bund_age_3]

plot_titles = ["Traded players in the Bundesliga (in_degree) \n aged up to 19 years for the Season 1992 - 2002",
               "Traded players in the Bundesliga (in_degree) \n aged up to 19 years for the Season 2003 - 2012",
               "Traded players in the Bundesliga (in_degree) \n aged up to 19 years for the Season 2013 - 2021"]

file_names = ["bund_indegree_2002","bund_indegree_2012","bund_indegree_2021"]
#subplot = 1
#figure = plt.figure(figsize=(35, 17))

i = 0

for d in df_list:
    
    #plt.subplot(1, 2, subplot) 
    plot_title = plot_titles[i]
    figure = plt.figure(figsize=(20, 23))
    plt.title(plot_title,fontsize = 20,fontweight=800)
    
    d = d
    G = nx.DiGraph()
    
    # add edges to empty graph
    G.add_edges_from(list(zip(d.club_from,d.club_to)))
    
    coords = nx.kamada_kawai_layout(G)
    G.remove_edges_from(nx.selfloop_edges(G))
    
    plot_title = plot_titles[i]

    #plt.title("Traded players in the Bundesliga (out_degree) \n aged between 20 and 24 years for the Season 2013 - 2021",fontsize = 20,fontweight=800)
    nx.draw(
        G, 
        pos = coords,
        with_labels = True,
        node_size = [degree * 300 for degree in dict(G.in_degree).values()],
        verticalalignment='center',
        horizontalalignment = "center",
        arrowsize=10,
        cmap="RdYlBu_r",
        node_color = [degree for degree in dict(G.in_degree).values()],
        edge_color = "lightgray"
    )

    max_color = (max(dict(G.in_degree).values())) + 4
    sm = plt.cm.ScalarMappable(cmap="RdYlBu_r",norm=plt.Normalize(vmin=0, vmax=max_color))
    cbar = plt.colorbar(sm,shrink=0.4,orientation="horizontal",pad=0.001)
    cbar.set_label('In_Degree of Nodes')
    
    file_name  = file_names[i]
    plt.savefig(f"output/youth/{file_name}.png")
    i = i + 1
#figure.tight_layout(pad=0)

### Create Network for the Serie A with the out_degree of the nodes

In [None]:
df_list = [df_serie_age_4,df_serie_age_5,df_serie_age_6]

plot_titles = ["Traded players in the Serie A (out_degree) \n aged between 20 and 24 years for the Season 1992 - 2002",
               "Traded players in the Serie A (out_degree) \n aged between 20 and 24 years for the Season 2003 - 2012",
               "Traded players in the Serie A (out_degree) \n aged between 20 and 24 years for the Season 2013 - 2021"]

file_names = ["serie_outdegree_2002","serie_outdegree_2012","serie_outdegree_2021"]

i = 0
for d in df_list:
    
    plot_title = plot_titles[i]
    figure = plt.figure(figsize=(18, 23))
    plt.title(plot_title,fontsize = 20,fontweight=800)
    
    d = d
    G = nx.DiGraph()
    
    # add edges to empty graph
    G.add_edges_from(list(zip(d.club_from,d.club_to)))
    
    coords = nx.kamada_kawai_layout(G)
    G.remove_edges_from(nx.selfloop_edges(G))
    
    plot_title = plot_titles[i]
    
    nx.draw(
        G, 
        pos = coords,
        with_labels = True,
        node_size = [degree * 100 for degree in dict(G.out_degree).values()],
        verticalalignment='center',
        horizontalalignment = "center",
        arrowsize=10,
        cmap="RdYlBu_r",
        node_color = [degree for degree in dict(G.out_degree).values()],
        edge_color = "lightgray"
    )

    max_color = (max(dict(G.out_degree).values())) + 3
    sm = plt.cm.ScalarMappable(cmap="RdYlBu_r",norm=plt.Normalize(vmin=0, vmax=max_color))
    cbar = plt.colorbar(sm,shrink=0.4,orientation="horizontal",pad=0.001)
    cbar.set_label('Out_Degree of Nodes')
    
    file_name  = file_names[i]
    plt.savefig(f"output/youth/{file_name}.png")
    i = i + 1

### Create Network for the Serie A with the in_degree of the nodes

In [None]:
df_list = [df_serie_age_1,df_serie_age_2,df_serie_age_3]

plot_titles = ["Traded players in the Serie A (in_degree) \n aged up to 19 years for the Season 1992 - 2002",
               "Traded players in the Serie A (in_degree) \n aged up to 19 years for the Season 2003 - 2012",
               "Traded players in the Serie A (in_degree) \n aged up to 19 years for the Season 2013 - 2021"]

file_names = ["serie_indegree_2002","serie_indegree_2012","serie_indegree_2021"]

i = 0
for d in df_list:
    
    plot_title = plot_titles[i]
    figure = plt.figure(figsize=(18, 23))
    plt.title(plot_title,fontsize = 20,fontweight=800)
    
    d = d
    G = nx.DiGraph()
    
    # add edges to empty graph
    G.add_edges_from(list(zip(d.club_from,d.club_to)))
    
    coords = nx.kamada_kawai_layout(G)
    G.remove_edges_from(nx.selfloop_edges(G))
    
    plot_title = plot_titles[i]

    nx.draw(
        G, 
        pos = coords,
        with_labels = True,
        node_size = [degree * 300 for degree in dict(G.in_degree).values()],
        verticalalignment='center',
        horizontalalignment = "center",
        arrowsize=10,
        cmap="RdYlBu_r",
        node_color = [degree for degree in dict(G.in_degree).values()],
        edge_color = "lightgray"
    )

    max_color = (max(dict(G.in_degree).values())) + 4
    sm = plt.cm.ScalarMappable(cmap="RdYlBu_r",norm=plt.Normalize(vmin=0, vmax=max_color))
    cbar = plt.colorbar(sm,shrink=0.4,orientation="horizontal",pad=0.001)
    cbar.set_label('In_Degree of Nodes')
    
    file_name  = file_names[i]
    plt.savefig(f"output/youth/{file_name}.png")
    i = i + 1

### Create Network for the Premier League with the out_degree of the nodes

In [None]:
df_list = [df_prem_age_4,df_prem_age_5,df_prem_age_6]

plot_titles = ["Traded players in the Premier League (out_degree) \n aged between 20 and 24 years for the Season 1992 - 2002",
               "Traded players in the Premier League (out_degree) \n aged between 20 and 24 years for the Season 2003 - 2012",
               "Traded players in the Premier League (out_degree) \n aged between 20 and 24 years for the Season 2013 - 2021"]

file_names = ["prem_outdegree_2002","prem_outdegree_2012","prem_outdegree_2021"]

i = 0
for d in df_list:
    
    plot_title = plot_titles[i]
    figure = plt.figure(figsize=(18, 23))
    plt.title(plot_title,fontsize = 20,fontweight=800)
    
    d = d
    G = nx.DiGraph()
    
    # add edges to empty graph
    G.add_edges_from(list(zip(d.club_from,d.club_to)))
    
    coords = nx.kamada_kawai_layout(G)
    G.remove_edges_from(nx.selfloop_edges(G))
    
    plot_title = plot_titles[i]
    
    nx.draw(
        G, 
        pos = coords,
        with_labels = True,
        node_size = [degree * 100 for degree in dict(G.out_degree).values()],
        verticalalignment='center',
        horizontalalignment = "center",
        arrowsize=10,
        cmap="RdYlBu_r",
        node_color = [degree for degree in dict(G.out_degree).values()],
        edge_color = "lightgray"
    )

    max_color = (max(dict(G.out_degree).values())) + 3
    sm = plt.cm.ScalarMappable(cmap="RdYlBu_r",norm=plt.Normalize(vmin=0, vmax=max_color))
    cbar = plt.colorbar(sm,shrink=0.4,orientation="horizontal",pad=0.001)
    cbar.set_label('Out_Degree of Nodes')
    
    file_name  = file_names[i]
    plt.savefig(f"output/youth/{file_name}.png")
    i = i + 1

### Create Network for the Premier League with the in_degree of the nodes

In [None]:
df_list = [df_prem_age_1,df_prem_age_2,df_prem_age_3]

plot_titles = ["Traded players in the Premier League (in_degree) \n aged up to 19 years for the Season 1992 - 2002",
               "Traded players in the Premier League (in_degree) \n aged up to 19 years for the Season 2003 - 2012",
               "Traded players in the Premier League (in_degree) \n aged up to 19 years for the Season 2013 - 2021"]

file_names = ["prem_indegree_2002","prem_indegree_2012","prem_indegree_2021"]

i = 0
for d in df_list:
    
    plot_title = plot_titles[i]
    figure = plt.figure(figsize=(18, 23))
    plt.title(plot_title,fontsize = 20,fontweight=800)
    
    d = d
    G = nx.DiGraph()
    
    # add edges to empty graph
    G.add_edges_from(list(zip(d.club_from,d.club_to)))
    
    coords = nx.kamada_kawai_layout(G)
    G.remove_edges_from(nx.selfloop_edges(G))
    
    plot_title = plot_titles[i]

    nx.draw(
        G, 
        pos = coords,
        with_labels = True,
        node_size = [degree * 300 for degree in dict(G.in_degree).values()],
        verticalalignment='center',
        horizontalalignment = "center",
        arrowsize=10,
        cmap="RdYlBu_r",
        node_color = [degree for degree in dict(G.in_degree).values()],
        edge_color = "lightgray"
    )

    max_color = (max(dict(G.in_degree).values())) + 4
    sm = plt.cm.ScalarMappable(cmap="RdYlBu_r",norm=plt.Normalize(vmin=0, vmax=max_color))
    cbar = plt.colorbar(sm,shrink=0.4,orientation="horizontal",pad=0.001)
    cbar.set_label('In_Degree of Nodes')
    
    file_name  = file_names[i]
    plt.savefig(f"output/youth/{file_name}.png")
    i = i + 1