In [41]:
import pandas as pd
import json
import numpy as np
import requests
import os
import time
import glob

In [42]:
# Path containing teams
directory_path = "updated_teams"  
# List to store DFs
dataframes = []

# Iterate on files
for filename in os.listdir(directory_path):
    if filename.endswith(".csv"):
        file_path = os.path.join(directory_path, filename)
        df = pd.read_csv(file_path)
        dataframes.append(df)

# Concat all DFs in a unique DF
players_df = pd.concat(dataframes, ignore_index = True)
players_df

Unnamed: 0,team_id,player_id,name,first_name,last_name,birth_date,country,nationality,height,weight,position
0,489,105,F. Ballo-Touré,Fodé,Ballo-Touré,1997-01-03,France,Senegal,182 cm,70 kg,Defender
1,489,305,D. Origi,Divock,Okoth Origi,1995-04-18,Belgium,Belgium,185 cm,75 kg,Attacker
2,489,765,A. Mirante,Antonio,Mirante,1983-07-08,Italy,Italy,193 cm,79 kg,Goalkeeper
3,489,2045,S. Kjær,Simon,Thorup Kjær,1989-03-26,Denmark,Denmark,191 cm,84 kg,Defender
4,489,31054,R. Krunić,Rade,Krunić,1993-10-07,Bosnia and Herzegovina,Bosnia and Herzegovina,184 cm,74 kg,Midfielder
...,...,...,...,...,...,...,...,...,...,...,...
932,504,30912,A. Berardi,Alessandro,Berardi,1991-01-16,Italy,Italy,185 cm,74 kg,Goalkeeper
933,504,93001,E. Tavşan,Elayis,Tavşan,2001-04-30,Netherlands,Netherlands,183 cm,74 kg,Attacker
934,504,109806,D. Flakus Bosilj,David,Flakus Bosilj,2002-02-01,Slovenia,Slovenia,185 cm,,Attacker
935,504,180128,S. Mitrović,Stefan,Mitrović,2002-08-15,Serbia,Serbia,181 cm,,Attacker


In [43]:
player_ids = players_df.player_id.unique().tolist()

In [44]:
# Define a function to acquire statistics data from API
def get_players_by_ids(player_ids, season_string):

    # Endpoint
    url = "https://v3.football.api-sports.io/players"

    headers = {
        'x-rapidapi-key': '51d2c444f814ff411c5b2dc66e16ad20',
        'x-rapidapi-host': 'v3.football.api-sports.io'
    }

    players_data = []

    # Iterate on player ID
    for player_id in player_ids:
        params = {
            'id': player_id,
            'season': season_string
        }

        # Effettuare la richiesta per ciascun giocatore
        response = requests.get(url, headers=headers, params=params)
        player_info = response.json()

        if response.status_code == 200:
            if not player_info['response']:
                # Case where there is no data
                player_data = {
                    'player_id': player_id,
                    'name': np.NaN,
                    'team_id': np.NaN,
                    'team_name': np.NaN,
                    'league_id': np.NaN,
                    'league_name': np.NaN,
                    'appereances': np.NaN,
                    'lineups': np.NaN,
                    'minutes': np.NaN,
                    'sub_in': np.NaN,
                    'sub_out': np.NaN,
                    'bench': np.NaN,
                    'total_shots': np.NaN,
                    'on_shots': np.NaN,
                    'scored_goals': np.NaN,
                    'conceded_goals': np.NaN,
                    'saved_goals': np.NaN,
                    'assists': np.NaN,
                    'total_passes': np.NaN,
                    'key_passes': np.NaN,
                    'passes_accuracy': np.NaN,
                    'tackles': np.NaN,
                    'blocks': np.NaN,
                    'interceptions': np.NaN,
                    'total_duels': np.NaN,
                    'won_duels': np.NaN,
                    'attempted_dribbles': np.NaN,
                    'won_dribbles': np.NaN,
                    'drawn_fouls': np.NaN,
                    'committed_fouls': np.NaN,
                    'yellow_cards': np.NaN,
                    'yellowred_cards': np.NaN,
                    'red_cards': np.NaN,
                    'won_penalties': np.NaN,
                    'committed_penalties': np.NaN,
                    'scored_penalties': np.NaN,
                    'missed_penalties': np.NaN,
                    'saved_penalties': np.NaN
                }
                players_data.append(player_data)
                continue  # Go to the next player 

            # Case where there are player data
            player = player_info['response'][0]['player']

            # Iterating on all competitions for the player
            for stat in player_info['response'][0]['statistics']:
                player_data = {
                    'player_id': player['id'],
                    'name': player['name'],
                    'team_id': stat['team']['id'],
                    'team_name': stat['team']['name'],
                    'league_id': stat['league']['id'],
                    'league_name': stat['league']['name'],
                    'appereances': stat['games']['appearences'],
                    'lineups': stat['games']['lineups'],
                    'minutes': stat['games']['minutes'],
                    'sub_in': stat['substitutes']['in'],
                    'sub_out': stat['substitutes']['out'],
                    'bench': stat['substitutes']['bench'],
                    'total_shots': stat['shots']['total'],
                    'on_shots': stat['shots']['on'],
                    'scored_goals': stat['goals']['total'],
                    'conceded_goals': stat['goals']['conceded'],
                    'saved_goals': stat['goals']['saves'],
                    'assists': stat['goals']['assists'],
                    'total_passes': stat['passes']['total'],
                    'key_passes': stat['passes']['key'],
                    'passes_accuracy': stat['passes']['accuracy'],
                    'tackles': stat['tackles']['total'],
                    'blocks': stat['tackles']['blocks'],
                    'interceptions': stat['tackles']['interceptions'],
                    'total_duels': stat['duels']['total'],
                    'won_duels': stat['duels']['won'],
                    'attempted_dribbles': stat['dribbles']['attempts'],
                    'won_dribbles': stat['dribbles']['success'],
                    'drawn_fouls': stat['fouls']['drawn'],
                    'committed_fouls': stat['fouls']['committed'],
                    'yellow_cards': stat['cards']['yellow'],
                    'yellowred_cards': stat['cards']['yellowred'],
                    'red_cards': stat['cards']['red'],
                    'won_penalties': stat['penalty']['won'],
                    'committed_penalties': stat['penalty']['commited'],
                    'scored_penalties': stat['penalty']['scored'],
                    'missed_penalties': stat['penalty']['missed'],
                    'saved_penalties': stat['penalty']['saved']
                }
                players_data.append(player_data)

        else:
            print(f"Error for player with ID {player_id}: {response.status_code}")
            continue

        time.sleep(0.25)  # Pause to avoid server overload

    # Creation of DF to store all data
    df_players = pd.DataFrame(players_data)
    return df_players


In [45]:
players_stats = get_players_by_ids(player_ids, '2023')

In [46]:
players_stats

Unnamed: 0,player_id,name,team_id,team_name,league_id,league_name,appereances,lineups,minutes,sub_in,...,drawn_fouls,committed_fouls,yellow_cards,yellowred_cards,red_cards,won_penalties,committed_penalties,scored_penalties,missed_penalties,saved_penalties
0,105,F. Ballo-Touré,36.0,Fulham,39.0,Premier League,6.0,0.0,66.0,6.0,...,2.0,4.0,1.0,0.0,0.0,,,0.0,0.0,
1,105,F. Ballo-Touré,489.0,AC Milan,135.0,Serie A,0.0,0.0,0.0,0.0,...,,,0.0,0.0,0.0,,,,,
2,105,F. Ballo-Touré,36.0,Fulham,48.0,League Cup,2.0,2.0,159.0,0.0,...,1.0,1.0,0.0,0.0,0.0,,,0.0,0.0,
3,105,F. Ballo-Touré,36.0,Fulham,45.0,FA Cup,0.0,0.0,0.0,0.0,...,,,0.0,0.0,0.0,,,,,
4,105,F. Ballo-Touré,489.0,AC Milan,137.0,Coppa Italia,0.0,0.0,0.0,0.0,...,,,0.0,0.0,0.0,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3890,180128,S. Mitrović,14.0,Serbia,10.0,Friendlies,0.0,0.0,0.0,0.0,...,,,0.0,0.0,0.0,,,,,
3891,315026,Y. Kallon,495.0,Genoa,135.0,Serie A,,,,,...,,,,,,,,,,
3892,315026,Y. Kallon,508.0,Bari,136.0,Serie B,14.0,8.0,645.0,6.0,...,19.0,4.0,2.0,0.0,0.0,,,0.0,0.0,
3893,315026,Y. Kallon,504.0,Verona,135.0,Serie A,1.0,0.0,3.0,1.0,...,,,0.0,0.0,0.0,,,0.0,0.0,


In [47]:
all_columns = players_stats.columns.tolist()
string_columns = ['name', 'team_name', 'league_name']
numeric_columns = list(set(all_columns) - set(string_columns))

# Convert right column from string to numeric
for col in numeric_columns:
    players_stats[col] = pd.to_numeric(players_stats[col], errors = 'coerce').fillna(np.nan).astype('Int64')

players_stats.head()

Unnamed: 0,player_id,name,team_id,team_name,league_id,league_name,appereances,lineups,minutes,sub_in,...,drawn_fouls,committed_fouls,yellow_cards,yellowred_cards,red_cards,won_penalties,committed_penalties,scored_penalties,missed_penalties,saved_penalties
0,105,F. Ballo-Touré,36,Fulham,39,Premier League,6,0,66,6,...,2.0,4.0,1,0,0,,,0.0,0.0,
1,105,F. Ballo-Touré,489,AC Milan,135,Serie A,0,0,0,0,...,,,0,0,0,,,,,
2,105,F. Ballo-Touré,36,Fulham,48,League Cup,2,2,159,0,...,1.0,1.0,0,0,0,,,0.0,0.0,
3,105,F. Ballo-Touré,36,Fulham,45,FA Cup,0,0,0,0,...,,,0,0,0,,,,,
4,105,F. Ballo-Touré,489,AC Milan,137,Coppa Italia,0,0,0,0,...,,,0,0,0,,,,,


In [49]:
players_stats[players_stats['name'].isna()][['player_id']].to_csv('raw_data/missing_stats.csv')
players_stats.to_csv('raw_data/full_stats_23_24.csv')

# Remove rows where the ‘name’ column has null values
players_stats_cleaned = players_stats.dropna(subset = ['name'])
players_stats_cleaned

Unnamed: 0,player_id,name,team_id,team_name,league_id,league_name,appereances,lineups,minutes,sub_in,...,drawn_fouls,committed_fouls,yellow_cards,yellowred_cards,red_cards,won_penalties,committed_penalties,scored_penalties,missed_penalties,saved_penalties
0,105,F. Ballo-Touré,36,Fulham,39,Premier League,6,0,66,6,...,2,4,1,0,0,,,0,0,
1,105,F. Ballo-Touré,489,AC Milan,135,Serie A,0,0,0,0,...,,,0,0,0,,,,,
2,105,F. Ballo-Touré,36,Fulham,48,League Cup,2,2,159,0,...,1,1,0,0,0,,,0,0,
3,105,F. Ballo-Touré,36,Fulham,45,FA Cup,0,0,0,0,...,,,0,0,0,,,,,
4,105,F. Ballo-Touré,489,AC Milan,137,Coppa Italia,0,0,0,0,...,,,0,0,0,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3890,180128,S. Mitrović,14,Serbia,10,Friendlies,0,0,0,0,...,,,0,0,0,,,,,
3891,315026,Y. Kallon,495,Genoa,135,Serie A,,,,,...,,,,,,,,,,
3892,315026,Y. Kallon,508,Bari,136,Serie B,14,8,645,6,...,19,4,2,0,0,,,0,0,
3893,315026,Y. Kallon,504,Verona,135,Serie A,1,0,3,1,...,,,0,0,0,,,0,0,


In [50]:
condition = (players_stats.appereances == 0) & (players_stats.minutes == 0) & (players_stats.bench == 0)
players_stats_cleaned = players_stats_cleaned[~condition]

players_stats_cleaned

  players_stats_cleaned = players_stats_cleaned[~condition]


Unnamed: 0,player_id,name,team_id,team_name,league_id,league_name,appereances,lineups,minutes,sub_in,...,drawn_fouls,committed_fouls,yellow_cards,yellowred_cards,red_cards,won_penalties,committed_penalties,scored_penalties,missed_penalties,saved_penalties
0,105,F. Ballo-Touré,36,Fulham,39,Premier League,6,0,66,6,...,2,4,1,0,0,,,0,0,
2,105,F. Ballo-Touré,36,Fulham,48,League Cup,2,2,159,0,...,1,1,0,0,0,,,0,0,
7,105,F. Ballo-Touré,13,Senegal,29,World Cup - Qualification Africa,0,0,0,0,...,,,0,0,0,,,,,
8,105,F. Ballo-Touré,13,Senegal,,Africa Cup of Nations Qualification,2,1,70,1,...,,,0,0,0,,,,,
9,105,F. Ballo-Touré,13,Senegal,6,Africa Cup of Nations,1,0,2,1,...,,,0,0,0,,,0,0,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3888,180128,S. Mitrović,14,Serbia,960,Euro Championship - Qualification,1,0,18,1,...,1,1,1,0,0,,,0,0,
3889,180128,S. Mitrović,8222,Serbia U21,850,UEFA U21 Championship - Qualification,5,2,241,3,...,,,0,0,0,,,,,
3892,315026,Y. Kallon,508,Bari,136,Serie B,14,8,645,6,...,19,4,2,0,0,,,0,0,
3893,315026,Y. Kallon,504,Verona,135,Serie A,1,0,3,1,...,,,0,0,0,,,0,0,


In [51]:
players_stats_cleaned = players_stats_cleaned[players_stats_cleaned['league_name'] != 'Friendlies Clubs']
players_stats_cleaned

Unnamed: 0,player_id,name,team_id,team_name,league_id,league_name,appereances,lineups,minutes,sub_in,...,drawn_fouls,committed_fouls,yellow_cards,yellowred_cards,red_cards,won_penalties,committed_penalties,scored_penalties,missed_penalties,saved_penalties
0,105,F. Ballo-Touré,36,Fulham,39,Premier League,6,0,66,6,...,2,4,1,0,0,,,0,0,
2,105,F. Ballo-Touré,36,Fulham,48,League Cup,2,2,159,0,...,1,1,0,0,0,,,0,0,
7,105,F. Ballo-Touré,13,Senegal,29,World Cup - Qualification Africa,0,0,0,0,...,,,0,0,0,,,,,
8,105,F. Ballo-Touré,13,Senegal,,Africa Cup of Nations Qualification,2,1,70,1,...,,,0,0,0,,,,,
9,105,F. Ballo-Touré,13,Senegal,6,Africa Cup of Nations,1,0,2,1,...,,,0,0,0,,,0,0,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3888,180128,S. Mitrović,14,Serbia,960,Euro Championship - Qualification,1,0,18,1,...,1,1,1,0,0,,,0,0,
3889,180128,S. Mitrović,8222,Serbia U21,850,UEFA U21 Championship - Qualification,5,2,241,3,...,,,0,0,0,,,,,
3892,315026,Y. Kallon,508,Bari,136,Serie B,14,8,645,6,...,19,4,2,0,0,,,0,0,
3893,315026,Y. Kallon,504,Verona,135,Serie A,1,0,3,1,...,,,0,0,0,,,0,0,


In [52]:
# Require list of league -> use to clean other data
url = "https://v3.football.api-sports.io/leagues"

headers = {
        'x-rapidapi-key': '51d2c444f814ff411c5b2dc66e16ad20',
        'x-rapidapi-host': 'v3.football.api-sports.io'
    }

response = requests.get(url, headers = headers)
leagues = response.json()
df_leagues = pd.json_normalize(leagues['response'])

df_leagues

Unnamed: 0,seasons,league.id,league.name,league.type,league.logo,country.name,country.code,country.flag
0,"[{'year': 2008, 'start': '2008-06-07', 'end': ...",4,Euro Championship,Cup,https://media.api-sports.io/football/leagues/4...,World,,
1,"[{'year': 2009, 'start': '2009-06-14', 'end': ...",21,Confederations Cup,Cup,https://media.api-sports.io/football/leagues/2...,World,,
2,"[{'year': 2010, 'start': '2010-08-07', 'end': ...",61,Ligue 1,League,https://media.api-sports.io/football/leagues/6...,France,FR,https://media.api-sports.io/flags/fr.svg
3,"[{'year': 2010, 'start': '2010-07-30', 'end': ...",144,Jupiler Pro League,League,https://media.api-sports.io/football/leagues/1...,Belgium,BE,https://media.api-sports.io/flags/be.svg
4,"[{'year': 2010, 'start': '2010-05-08', 'end': ...",71,Serie A,League,https://media.api-sports.io/football/leagues/7...,Brazil,BR,https://media.api-sports.io/flags/br.svg
...,...,...,...,...,...,...,...,...
1155,"[{'year': 2024, 'start': '2024-09-07', 'end': ...",1145,Paraense B1,League,https://media.api-sports.io/football/leagues/1...,Brazil,BR,https://media.api-sports.io/flags/br.svg
1156,"[{'year': 2024, 'start': '2024-09-07', 'end': ...",1146,Alagoano - 2,Cup,https://media.api-sports.io/football/leagues/1...,Brazil,BR,https://media.api-sports.io/flags/br.svg
1157,"[{'year': 2024, 'start': '2024-09-14', 'end': ...",1148,Maranhense - 2,League,https://media.api-sports.io/football/leagues/1...,Brazil,BR,https://media.api-sports.io/flags/br.svg
1158,"[{'year': 2025, 'start': '2024-04-05', 'end': ...",1083,UEFA Championship - Women - Qualification,Cup,https://media.api-sports.io/football/leagues/1...,World,,


In [53]:
international_competitions = df_leagues[(df_leagues['country.name'] == 'World')]['league.id'].tolist()
national_cup = df_leagues[(df_leagues['country.name'] != 'World') & (df_leagues['league.type'] == 'Cup')]['league.id'].tolist()

In [54]:
players_stats_cleaned = players_stats_cleaned[~players_stats_cleaned['league_id'].isin(international_competitions)]
players_stats_cleaned

Unnamed: 0,player_id,name,team_id,team_name,league_id,league_name,appereances,lineups,minutes,sub_in,...,drawn_fouls,committed_fouls,yellow_cards,yellowred_cards,red_cards,won_penalties,committed_penalties,scored_penalties,missed_penalties,saved_penalties
0,105,F. Ballo-Touré,36,Fulham,39,Premier League,6,0,66,6,...,2,4,1,0,0,,,0,0,
2,105,F. Ballo-Touré,36,Fulham,48,League Cup,2,2,159,0,...,1,1,0,0,0,,,0,0,
8,105,F. Ballo-Touré,13,Senegal,,Africa Cup of Nations Qualification,2,1,70,1,...,,,0,0,0,,,,,
11,305,D. Origi,65,Nottingham Forest,39,Premier League,20,6,598,14,...,8,14,1,0,0,,,0,0,
13,305,D. Origi,65,Nottingham Forest,45,FA Cup,2,2,154,0,...,,,0,0,0,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3884,180128,S. Mitrović,598,FK Crvena Zvezda,286,Super Liga,9,9,579,0,...,1,1,0,0,0,,,0,0,
3885,180128,S. Mitrović,598,FK Crvena Zvezda,732,Cup,1,0,44,1,...,,,0,0,0,,,,,
3892,315026,Y. Kallon,508,Bari,136,Serie B,14,8,645,6,...,19,4,2,0,0,,,0,0,
3893,315026,Y. Kallon,504,Verona,135,Serie A,1,0,3,1,...,,,0,0,0,,,0,0,


In [55]:
players_stats_cleaned = players_stats_cleaned[~players_stats_cleaned['league_id'].isin(national_cup)]
players_stats_cleaned

Unnamed: 0,player_id,name,team_id,team_name,league_id,league_name,appereances,lineups,minutes,sub_in,...,drawn_fouls,committed_fouls,yellow_cards,yellowred_cards,red_cards,won_penalties,committed_penalties,scored_penalties,missed_penalties,saved_penalties
0,105,F. Ballo-Touré,36,Fulham,39,Premier League,6,0,66,6,...,2,4,1,0,0,,,0,0,
8,105,F. Ballo-Touré,13,Senegal,,Africa Cup of Nations Qualification,2,1,70,1,...,,,0,0,0,,,,,
11,305,D. Origi,65,Nottingham Forest,39,Premier League,20,6,598,14,...,8,14,1,0,0,,,0,0,
17,765,A. Mirante,489,AC Milan,135,Serie A,2,2,178,0,...,,,0,0,0,,,0,0,0
22,2045,S. Kjær,489,AC Milan,135,Serie A,20,14,1172,6,...,2,9,1,0,0,,,0,0,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3880,109806,D. Flakus Bosilj,199,De Graafschap,89,Eerste Divisie,37,19,2063,18,...,25,18,2,0,0,,,0,0,
3883,180128,S. Mitrović,504,Verona,135,Serie A,10,4,389,6,...,6,10,0,0,0,,,0,0,
3884,180128,S. Mitrović,598,FK Crvena Zvezda,286,Super Liga,9,9,579,0,...,1,1,0,0,0,,,0,0,
3892,315026,Y. Kallon,508,Bari,136,Serie B,14,8,645,6,...,19,4,2,0,0,,,0,0,


In [56]:
players_stats_cleaned = players_stats_cleaned.dropna(subset = ['league_id'])
players_stats_cleaned

Unnamed: 0,player_id,name,team_id,team_name,league_id,league_name,appereances,lineups,minutes,sub_in,...,drawn_fouls,committed_fouls,yellow_cards,yellowred_cards,red_cards,won_penalties,committed_penalties,scored_penalties,missed_penalties,saved_penalties
0,105,F. Ballo-Touré,36,Fulham,39,Premier League,6,0,66,6,...,2,4,1,0,0,,,0,0,
11,305,D. Origi,65,Nottingham Forest,39,Premier League,20,6,598,14,...,8,14,1,0,0,,,0,0,
17,765,A. Mirante,489,AC Milan,135,Serie A,2,2,178,0,...,,,0,0,0,,,0,0,0
22,2045,S. Kjær,489,AC Milan,135,Serie A,20,14,1172,6,...,2,9,1,0,0,,,0,0,
28,31054,R. Krunić,611,Fenerbahce,203,Süper Lig,12,9,774,3,...,12,11,0,0,0,,,0,0,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3880,109806,D. Flakus Bosilj,199,De Graafschap,89,Eerste Divisie,37,19,2063,18,...,25,18,2,0,0,,,0,0,
3883,180128,S. Mitrović,504,Verona,135,Serie A,10,4,389,6,...,6,10,0,0,0,,,0,0,
3884,180128,S. Mitrović,598,FK Crvena Zvezda,286,Super Liga,9,9,579,0,...,1,1,0,0,0,,,0,0,
3892,315026,Y. Kallon,508,Bari,136,Serie B,14,8,645,6,...,19,4,2,0,0,,,0,0,


In [57]:
players_stats_cleaned.isna().sum()

player_id                 0
name                      0
team_id                   0
team_name                 0
league_id                 0
league_name               0
appereances               0
lineups                   0
minutes                   0
sub_in                    0
sub_out                   0
bench                     0
total_shots             503
on_shots                568
scored_goals              0
conceded_goals          246
saved_goals            1119
assists                 580
total_passes            369
key_passes              504
passes_accuracy        1081
tackles                 460
blocks                  637
interceptions           522
total_duels             389
won_duels               403
attempted_dribbles      479
won_dribbles            517
drawn_fouls             452
committed_fouls         461
yellow_cards              0
yellowred_cards           0
red_cards                 0
won_penalties          1179
committed_penalties    1179
scored_penalties    

In [58]:
players_stats_cleaned = players_stats_cleaned.drop(columns = ['won_penalties', 'committed_penalties'])

In [60]:
players_stats_cleaned.to_csv('raw_data/cleaned_stats.csv')

In [62]:
players_stats_cleaned.player_id.nunique()

902

In [64]:
# Directory where are saved CSV
directory = 'raw_data/updated_teams'

# Usa glob per trovare tutti i file CSV nella directory
csv_files = glob.glob(os.path.join(directory, "*.csv"))

# Lista per contenere i DataFrame letti
df_list = []

# Leggi ogni file CSV e aggiungilo alla lista
for file in csv_files:
    df = pd.read_csv(file)
    df_list.append(df)

# Concatena tutti i DataFrame in un unico DataFrame
df_final = pd.concat(df_list, ignore_index=True)

df_final

Unnamed: 0,team_id,player_id,name,first_name,last_name,birth_date,country,nationality,height,weight,position
0,489,105,F. Ballo-Touré,Fodé,Ballo-Touré,1997-01-03,France,Senegal,182 cm,70 kg,Defender
1,489,305,D. Origi,Divock,Okoth Origi,1995-04-18,Belgium,Belgium,185 cm,75 kg,Attacker
2,489,765,A. Mirante,Antonio,Mirante,1983-07-08,Italy,Italy,193 cm,79 kg,Goalkeeper
3,489,2045,S. Kjær,Simon,Thorup Kjær,1989-03-26,Denmark,Denmark,191 cm,84 kg,Defender
4,489,31054,R. Krunić,Rade,Krunić,1993-10-07,Bosnia and Herzegovina,Bosnia and Herzegovina,184 cm,74 kg,Midfielder
...,...,...,...,...,...,...,...,...,...,...,...
930,504,30912,A. Berardi,Alessandro,Berardi,1991-01-16,Italy,Italy,185 cm,74 kg,Goalkeeper
931,504,30922,D. Faraoni,Marco Davide,Faraoni,1991-10-25,Italy,Italy,180 cm,71 kg,Defender
932,504,31383,S. Perilli,Simone,Perilli,1995-01-07,Italy,Italy,195 cm,88 kg,Goalkeeper
933,504,125715,F. Daniliuc,Flavius David,Daniliuc,2001-04-27,Austria,Austria,188 cm,77 kg,Defender


In [66]:
included_ids = players_stats_cleaned.player_id.unique().tolist()
df_filtered = df_final[~df_final['player_id'].isin(included_ids)]
df_filtered

Unnamed: 0,team_id,player_id,name,first_name,last_name,birth_date,country,nationality,height,weight,position
9,489,336687,A. Coubiș,Andrei,Coubiș,2003-09-29,Italy,Romania,189 cm,,Defender
16,489,462227,A. Longoni,Alessandro,Longoni,2008-01-31,Italy,Italy,,,Goalkeeper
66,497,446092,C. Cama,Cristian,Cama,2007-06-05,Italy,Italy,,,Defender
69,497,472079,W. Feola,William,Feola,2006-01-04,Italy,Italy,,,Defender
70,497,472082,M. Almaviva,Mattia,Almaviva,2006-02-03,Italy,Italy,172 cm,75 kg,Attacker
71,497,472083,G. Tumminelli,Gianmarco,Tumminelli,2006-02-01,Italy,Italy,182 cm,,Midfielder
72,497,476792,A. Sugamele,Alessandro,Sugamele,2007-02-22,Italy,Italy,,,Attacker
73,497,476796,T. Marchetti,Tommaso,Marchetti,2007-03-13,Italy,Italy,,,Defender
91,497,446126,A. Kehayov,Atanas,Eliyanov Kehayov,2007-05-16,Bulgaria,Bulgaria,196 cm,,Goalkeeper
92,497,472080,M. Della Rocca,Mattia,Della Rocca,2006-01-13,Italy,Italy,,,Midfielder


In [67]:
df_filtered.to_csv('raw_data/players_no_stats.csv')