In [None]:
import pandas as pd

# Funzione per leggere e concatenare i dataset da GitHub
def load_and_concat_from_github(base_url, base_folder, base_name, start_year=None, end_year=None, specific_files=None):
    """
    Funzione per leggere e concatenare file CSV da GitHub.
    
    - Se specific_files è fornito, ignora start_year e end_year e usa specific_files.
    - Se specific_files non è fornito, costruisce i nomi dei file basandosi su start_year e end_year.
    """
    dataframes = []
    if specific_files:
        # Usa i file specifici forniti
        for file_name in specific_files:
            file_url = f"{base_url}/{base_folder}/{file_name}"
            try:
                df = pd.read_csv(file_url)
                dataframes.append(df)
                print(f"File {file_url} letto con successo.")
            except Exception as e:
                print(f"Errore nella lettura del file {file_url}: {e}")
    else:
        # Usa intervallo di anni per costruire i file
        for year in range(start_year, end_year + 1):
            file_url = f"{base_url}/{base_folder}/{base_name}_{year}.csv"
            try:
                df = pd.read_csv(file_url)
                dataframes.append(df)
                print(f"File {file_url} letto con successo.")
            except Exception as e:
                print(f"Errore nella lettura del file {file_url}: {e}")
    return pd.concat(dataframes, ignore_index=True)

# URL base di GitHub
base_url = "https://raw.githubusercontent.com/Lupo433/HugeTennis/main"

# Caricamento e concatenazione dei dataset matches
df_matches = load_and_concat_from_github(base_url, "atp_matches", "atp_matches", 2015, 2024)

# Caricamento e concatenazione dei dataset rankings (specific_files usato)
ranking_files = [
    "atp_rankings_00s.csv", "atp_rankings_10s.csv", "atp_rankings_20s.csv",
    "atp_rankings_70s.csv", "atp_rankings_80s.csv", "atp_rankings_90s.csv",
    "atp_rankings_current.csv"
]
df_ranking = load_and_concat_from_github(base_url, "atp_rankings", None, specific_files=ranking_files)

# Caricamento del dataset players (un solo file)
try:
    df_players = pd.read_csv(f"{base_url}/atp_players.csv")
    print(f"File {base_url}/atp_players.csv letto con successo.")
except Exception as e:
    print(f"Errore nella lettura del file atp_players.csv: {e}")

# Stampa per confermare i risultati
print(f"Dataset matches contiene {len(df_matches)} righe e {len(df_matches.columns)} colonne.")
print(f"Dataset players contiene {len(df_players)} righe e {len(df_players.columns)} colonne.")
print(f"Dataset ranking contiene {len(df_ranking)} righe e {len(df_ranking.columns)} colonne.")

(2943, 49)
File https://raw.githubusercontent.com/Lupo433/HugeTennis/main/atp_matches/atp_matches_2015.csv letto con successo.
(2941, 49)
File https://raw.githubusercontent.com/Lupo433/HugeTennis/main/atp_matches/atp_matches_2016.csv letto con successo.
(2911, 49)
File https://raw.githubusercontent.com/Lupo433/HugeTennis/main/atp_matches/atp_matches_2017.csv letto con successo.
(2897, 49)
File https://raw.githubusercontent.com/Lupo433/HugeTennis/main/atp_matches/atp_matches_2018.csv letto con successo.
(2806, 49)
File https://raw.githubusercontent.com/Lupo433/HugeTennis/main/atp_matches/atp_matches_2019.csv letto con successo.
(1462, 49)
File https://raw.githubusercontent.com/Lupo433/HugeTennis/main/atp_matches/atp_matches_2020.csv letto con successo.
(2733, 49)
File https://raw.githubusercontent.com/Lupo433/HugeTennis/main/atp_matches/atp_matches_2021.csv letto con successo.
(2917, 49)
File https://raw.githubusercontent.com/Lupo433/HugeTennis/main/atp_matches/atp_matches_2022.csv lett

In [13]:
df_matches

Unnamed: 0,tourney_id,tourney_name,surface,draw_size,tourney_level,tourney_date,match_num,winner_id,winner_seed,winner_entry,...,l_1stIn,l_1stWon,l_2ndWon,l_SvGms,l_bpSaved,l_bpFaced,winner_rank,winner_rank_points,loser_rank,loser_rank_points
0,2015-339,Brisbane,Hard,28,A,20150104,1,105357,,WC,...,31.0,20.0,5.0,8.0,1.0,5.0,153.0,328.0,220.0,221.0
1,2015-339,Brisbane,Hard,28,A,20150104,2,103813,,,...,50.0,26.0,19.0,13.0,3.0,8.0,73.0,689.0,123.0,440.0
2,2015-339,Brisbane,Hard,28,A,20150104,3,105902,,WC,...,37.0,22.0,5.0,8.0,10.0,15.0,125.0,430.0,21.0,1730.0
3,2015-339,Brisbane,Hard,28,A,20150104,4,104871,,,...,38.0,30.0,8.0,10.0,1.0,3.0,31.0,1195.0,72.0,691.0
4,2015-339,Brisbane,Hard,28,A,20150104,5,105373,,,...,62.0,40.0,19.0,15.0,4.0,8.0,34.0,1094.0,110.0,505.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
26008,2024-M-DC-2024-WG2-PO-URU-MDA-01,Davis Cup WG2 PO: URU vs MDA,Clay,4,D,20240203,5,212051,,,...,30.0,17.0,7.0,6.0,8.0,14.0,1109.0,8.0,740.0,34.0
26009,2024-M-DC-2024-WG2-PO-VIE-RSA-01,Davis Cup WG2 PO: VIE vs RSA,Hard,4,D,20240202,1,122533,,,...,41.0,25.0,6.0,9.0,1.0,4.0,554.0,67.0,748.0,32.0
26010,2024-M-DC-2024-WG2-PO-VIE-RSA-01,Davis Cup WG2 PO: VIE vs RSA,Hard,4,D,20240202,2,144748,,,...,51.0,25.0,7.0,11.0,5.0,12.0,416.0,109.0,,
26011,2024-M-DC-2024-WG2-PO-VIE-RSA-01,Davis Cup WG2 PO: VIE vs RSA,Hard,4,D,20240202,4,122533,,,...,51.0,32.0,17.0,14.0,5.0,9.0,554.0,67.0,416.0,109.0


In [15]:
df_players

Unnamed: 0,player_id,name_first,name_last,hand,dob,ioc,height,wikidata_id
0,100001,Gardnar,Mulloy,R,19131122.0,USA,185.0,Q54544
1,100002,Pancho,Segura,R,19210620.0,ECU,168.0,Q54581
2,100003,Frank,Sedgman,R,19271002.0,AUS,180.0,Q962049
3,100004,Giuseppe,Merlo,R,19271011.0,ITA,,Q1258752
4,100005,Richard,Gonzalez,R,19280509.0,USA,188.0,Q53554
...,...,...,...,...,...,...,...,...
65014,212913,Pietro,Ricci,U,,ITA,,
65015,212914,Corey,Craig,U,,USA,,
65016,212915,Aleksandar,Ljubojevic,U,,SRB,,
65017,212916,Marko,Milosavljevic,U,,SRB,,


In [16]:
df_ranking

Unnamed: 0,ranking_date,rank,player,points
0,20000110,1,101736,4135.0
1,20000110,2,102338,2915.0
2,20000110,3,101948,2419.0
3,20000110,4,103017,2184.0
4,20000110,5,102856,2169.0
...,...,...,...,...
3235634,20240527,2070,211388,1.0
3235635,20240527,2071,210649,1.0
3235636,20240527,2071,208890,1.0
3235637,20240527,2073,202079,1.0
