In [None]:
import pandas as pd

def trim_nba_elo(
    input_path="nba_elo.csv",
    output_path="nba_elo_trimmed.csv",
    season_min=2010,
    season_max=2023
):
    # Columns to keep
    usecols = ["season", "date", "team1", "elo1_pre", "team2", "elo2_pre", "playoff"]
    chunks = []

    # Process in chunks to manage memory
    for chunk in pd.read_csv(input_path, usecols=usecols, chunksize=200_000, low_memory=True):
        # Filter to desired seasons
        chunk = chunk[
            (chunk["season"] >= season_min) &
            (chunk["season"] <= season_max)
        ]
        chunks.append(chunk)

    # Concatenate and write out
    df_trimmed = pd.concat(chunks, ignore_index=True)
    df_trimmed.to_csv(output_path, index=False)
    print(f"Trimmed file saved as: {output_path}")

if __name__ == "__main__":
    trim_nba_elo(
        input_path="nba_elo.csv",
        output_path="nba_elo_trimmed.csv",
        season_min=1981,
        season_max=2023
    )


Trimmed file saved as: nba_elo_trimmed.csv
