In [1]:
# notebooks/data_cleaning.ipynb

# Import necessary libraries
import pandas as pd

# Load the data
wnba_data = pd.read_csv('../data/wnba_data.csv')
nba_data = pd.read_csv('../data/nba_data.csv')

# Function to process data
def process_data(df, league_name):
    try:
        # Ensure columns match exactly
        columns_needed = ['Player', 'FG%', 'FT%', '3P%']
        df = df[df['Player'] != 'Player']  # Remove any rows where 'Player' column value is 'Player'
        df = df[columns_needed].dropna()

        # Debug: Print the first few rows to inspect the data
        print(f"First few rows of {league_name} data before conversion:")
        print(df.head())

        # Remove '%' and convert to numeric
        df['FG%'] = pd.to_numeric(df['FG%'].str.replace('%','').astype(float) / 100, errors='coerce')
        df['FT%'] = pd.to_numeric(df['FT%'].str.replace('%','').astype(float) / 100, errors='coerce')
        df['3P%'] = pd.to_numeric(df['3P%'].str.replace('%','').astype(float) / 100, errors='coerce')

        # Debug: Print the first few rows after conversion
        print(f"First few rows of {league_name} data after conversion:")
        print(df.head())

        df['League'] = league_name
        df = df.dropna()  # Remove rows with NaN values after conversion
    except KeyError as e:
        print(f"Error processing data for {league_name}: {e}")
        return pd.DataFrame()  # Return an empty DataFrame in case of error
    return df

# Process the data
wnba_data = process_data(wnba_data, 'WNBA')
nba_data = process_data(nba_data, 'NBA')

# Combine both datasets
data = pd.concat([wnba_data, nba_data])

# Save the combined data to a CSV file
data.to_csv('../data/combined_data.csv', index=False)

print("Data cleaned and saved to combined_data.csv.")


First few rows of WNBA data before conversion:
Empty DataFrame
Columns: [Player, FG%, FT%, 3P%]
Index: []
First few rows of WNBA data after conversion:
Empty DataFrame
Columns: [Player, FG%, FT%, 3P%]
Index: []
First few rows of NBA data before conversion:
             Player   FG%   FT%   3P%
0  Precious Achiuwa  .485  .702  .269
1      Steven Adams  .597  .364  .000
2       Bam Adebayo  .540  .806  .083
3      Ochai Agbaji  .427  .812  .355
4      Santi Aldama  .470  .750  .353
First few rows of NBA data after conversion:
             Player      FG%      FT%      3P%
0  Precious Achiuwa  0.00485  0.00702  0.00269
1      Steven Adams  0.00597  0.00364  0.00000
2       Bam Adebayo  0.00540  0.00806  0.00083
3      Ochai Agbaji  0.00427  0.00812  0.00355
4      Santi Aldama  0.00470  0.00750  0.00353


OSError: Cannot save file into a non-existent directory: 'data'