In [1]:
import os
import pandas as pd
from datetime import datetime

def process_files(folder_path):
    # Step 1: Concatenare tutti i file CSV
    all_files = [os.path.join(folder_path, f) for f in os.listdir(folder_path) if f.endswith('.csv')]
    data_list = []

    for file in all_files:
        df = pd.read_csv(file, dtype=str)  # Leggere tutto come stringhe
        data_list.append(df)

    concatenated_data = pd.concat(data_list, ignore_index=True)

    # Step 2: Conversione tipi di dati
    # Convertire le date
    concatenated_data['game_date'] = pd.to_datetime(concatenated_data['game_date'], errors='coerce')

    # Convertire i minuti giocati ('mp') in un formato numerico (minuti decimali)
    def convert_minutes(mp):
        try:
            minutes, seconds = map(int, mp.split(':'))
            return minutes + seconds / 60
        except:
            return None

    concatenated_data['mp'] = concatenated_data['mp'].apply(convert_minutes)

    # Convertire le altre colonne numeriche
    numeric_columns = ['PTS', 'REB', 'AST', 'STL', 'BLK', 'TO', 'FGA', 'FTA', 'FG_PCT', 
                       'team_score', 'opponent_score', 'TS%', 'TOV%']

    for col in numeric_columns:
        concatenated_data[col] = pd.to_numeric(concatenated_data[col], errors='coerce')

    return concatenated_data

# Specifica il percorso della cartella contenente i file
folder_path = 'regular_season_games'

# Esegui il processo
data = process_files(folder_path)

# Salva il file concatenato per ispezioni future
data.to_csv('concatenated_nba_data.csv', index=False)

print("Processo completato. I dati sono stati salvati in 'concatenated_nba_data.csv'.")


Processo completato. I dati sono stati salvati in 'concatenated_nba_data.csv'.


In [2]:
# Specifica il percorso della cartella contenente i file
folder_path = 'playoff_games'

# Esegui il processo
data = process_files(folder_path)

# Salva il file concatenato per ispezioni future
data.to_csv('concatenated_nba_data_playoff.csv', index=False)

print("Processo completato. I dati sono stati salvati in 'concatenated_nba_data.csv'.")


Processo completato. I dati sono stati salvati in 'concatenated_nba_data.csv'.


In [3]:
rs = pd.read_csv('concatenated_nba_data.csv')

In [4]:
p = pd.read_csv('concatenated_nba_data_playoff.csv')

In [8]:
rs.dtypes

player                    object
team                      object
mp                       float64
PTS                      float64
REB                      float64
AST                      float64
STL                      float64
BLK                      float64
TO                       float64
FGA                      float64
FTA                      float64
FG_PCT                   float64
game_date         datetime64[ns]
team_score               float64
opponent_score           float64
TS%                      float64
TOV%                     float64
dtype: object

In [7]:
# Assumendo che 'df' sia il tuo DataFrame
rs['game_date'] = pd.to_datetime(rs['game_date'], errors='coerce')

In [9]:
# Assumendo che 'df' sia il tuo DataFrame
p['game_date'] = pd.to_datetime(p['game_date'], errors='coerce')

In [10]:
rs.head()

Unnamed: 0,player,team,mp,PTS,REB,AST,STL,BLK,TO,FGA,FTA,FG_PCT,game_date,team_score,opponent_score,TS%,TOV%
0,Max Strus,CLE,28.283333,10.0,1.0,0.0,1.0,1.0,1.0,9.0,2.0,0.444,2023-11-03,116.0,121.0,0.506073,0.091912
1,Evan Mobley,CLE,35.85,14.0,10.0,5.0,3.0,4.0,2.0,13.0,0.0,0.538,2023-11-03,116.0,121.0,0.538462,0.133333
2,Jarrett Allen,CLE,21.116667,10.0,7.0,0.0,1.0,2.0,1.0,6.0,3.0,0.667,2023-11-03,116.0,121.0,0.68306,0.120192
3,Donovan Mitchell,CLE,36.65,38.0,5.0,9.0,1.0,0.0,3.0,21.0,10.0,0.619,2023-11-03,116.0,121.0,0.748031,0.105634
4,Darius Garland,CLE,31.983333,14.0,0.0,6.0,2.0,1.0,4.0,11.0,4.0,0.455,2023-11-03,116.0,121.0,0.548589,0.238663


In [14]:
rs['FG_PCT'].max()

1.0

In [15]:
p['FG_PCT'].max()

1.0

In [16]:
# Concatenare i dataset lungo le righe (default axis=0)
df_concatenato = pd.concat([rs, p], axis=0)

# Esportare il dataframe concatenato in un file CSV
df_concatenato.to_csv('dataset_concatenato.csv', index=False)