In [4]:
import pandas as pd

chunk_size = 2000
chunks = []

for chunk in pd.read_csv('powerlifting.csv', chunksize=chunk_size, low_memory=False, on_bad_lines='skip'):
    chunk.dropna(subset=['Bench1Kg', 'Bench2Kg', 'Bench3Kg',
                         'Squat1Kg', 'Squat2Kg', 'Squat3Kg',
                         'Deadlift1Kg', 'Deadlift2Kg', 'Deadlift3Kg'], 
                 how='all', inplace=True)
    
    chunk = chunk[~chunk[['Bench1Kg', 'Bench2Kg', 'Bench3Kg']].isna().all(axis=1)]
    chunk = chunk[~chunk[['Squat1Kg', 'Squat2Kg', 'Squat3Kg']].isna().all(axis=1)]
    chunk = chunk[~chunk[['Deadlift1Kg', 'Deadlift2Kg', 'Deadlift3Kg']].isna().all(axis=1)]

    chunk = chunk[~chunk['Place'].isin(['DQ', 'G'])]
    chunk = chunk[pd.to_numeric(chunk['Place'], errors='coerce').notna()]
    chunk['Place'] = chunk['Place'].astype(int)
    chunk = chunk[chunk['Place'] > 0]

    chunk['Sex'] = chunk['Sex'].fillna('Unknown')
    chunk['WeightClassKg'] = chunk['WeightClassKg'].fillna('Unknown')
    
    chunk['WeightClassKg'] = pd.to_numeric(chunk['WeightClassKg'], errors='coerce')
    chunk = chunk[(chunk['WeightClassKg'] >= 60) & (chunk['WeightClassKg'] <= 110)]

    def classify_lift(value):
        if value == '-' or (isinstance(value, (int, float)) and value < 0):
            return 0
        elif pd.isna(value) or value == 'No Attempt':
            return 2
        else:
            return 1

    for col in ['Bench1Kg', 'Bench2Kg', 'Bench3Kg', 'Squat1Kg', 'Squat2Kg', 'Squat3Kg',
                'Deadlift1Kg', 'Deadlift2Kg', 'Deadlift3Kg']:
        result_col = col.replace('Kg', 'Result')
        chunk[col] = chunk[col].replace('No Attempt', 0)
        chunk[result_col] = chunk[col].apply(classify_lift)

    chunk['TotalKg'] = chunk['TotalKg'].fillna(0)

    chunks.append(chunk)

df_combined = pd.concat(chunks, ignore_index=True)

sampled_df = df_combined.sample(n=25000, random_state=42)

sampled_df.to_csv('filtered_lifts_data.csv', index=False)

print("Data sampling complete. Saved to 'filtered_lifts_data..csv'.")


Data sampling complete. Saved to 'filtered_lifts_data..csv'.
