In [18]:
import pandas as pd
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
from scipy.optimize import curve_fit


## Importing the data from BIG CSV

In [68]:
gdata = pd.read_csv(
    'Raw data/openipf-2024-12-28-acdecc3a.csv',
    dtype={'BodyweightKg': 'float64', 'TotalKg': 'float64'},
    low_memory=False
)

In [53]:
# Parse the 'Date' column to datetime format
data['Date'] = pd.to_datetime(data['Date'], errors='coerce')


In [58]:
# Filter for raw, full-power meets (SBD), males, and international meets from 2011 onwards
filtered_data = data[
    (data['Event'] == 'SBD') &              # Full power (SBD)
    (data['Equipment'] == 'Raw') &         # Raw lifting
    (data['Sex'] == 'F') &
    (data['BodyweightKg'] >= 37.5)&
    # (data['Federation'].isin(['IPF', 'EPF'])) &  # International meets only
    (data['Date'] >= '2011-01-01') &
    (data['Date'] <= '2024-06-15')
    # (data['Date'] <= '2020-04-30')
]

In [59]:
# Ensure necessary columns are present and drop missing values
filtered_data = filtered_data[['Name', 'BodyweightKg', 'TotalKg', 'WeightClassKg', 'Federation', 'Date','MeetName']].dropna()

filtered_data['MeetName'] = filtered_data['MeetName'].str.strip().str.lower()

In [60]:
filtered_data.loc[:, 'Name'] = (
    filtered_data['Name']
    .str.strip()  # Remove leading/trailing spaces
    .str.lower()  # Convert to lowercase for uniformity
)

In [61]:

# Retain the highest total per person per weight class
filtered_data = filtered_data.loc[
    filtered_data.groupby(['Name', 'WeightClassKg'])['TotalKg'].idxmax()
].reset_index(drop=True)


In [62]:
# Save the filtered data to a CSV
filtered_data.to_csv('all_meets_beforeworldsfemale_ipf_epf.csv', index=False)
print("Filtered data saved as 'filtered_data_ipf_epf.csv'.")


Filtered data saved as 'filtered_data_ipf_epf.csv'.


In [124]:

# World records male 2024
world_records = {
     '59': 669.5, '66': 710.5, '74': 836.0, '83': 861.0, '93': 901.0,
    '105': 940.5, '120': 978.5, '120+': 1152.5  # Example values
}

In [111]:
# World records male 2020
world_records = {
    '59': 669.5, '66': 705.5, '74': 790.5, '83': 833, '93': 853.5,
    '105': 895.5, '120': 978.5, '120+': 1105.5
}

In [72]:

# World records female 2020
world_records = {'47': 407.5, '52': 432.5, '57': 473.5,
                 '63': 543.5, '72': 543.5, '84': 613, '84+': 671.5
}

In [63]:
# world records female 2024
world_records = {'47': 433.5, '52': 481, '57': 519.5,
                 '63': 557.5, '69': 600, '76': 613, '84': 647, '84+': 731
}

In [64]:
# filtering for gold standard data
filtered_data['WeightClassKg'] = filtered_data['WeightClassKg'].astype(str)
gold_standard_data = filtered_data[
    filtered_data.apply(
        lambda row: row['TotalKg'] >= 0.84 * world_records.get(row['WeightClassKg'], np.inf), axis=1
    )
]

In [65]:
# Standardize 'Name' column to ensure proper deduplication
gold_standard_data.loc[:, 'Name'] = (
    gold_standard_data['Name']
    .str.strip()  # Remove leading/trailing spaces
    .str.lower()  # Convert to lowercase for uniformity
)

In [66]:
# Deduplicate to retain only the top total for each lifter
# Deduplicate to retain only the top total for each lifter per weight class
gold_standard_data = gold_standard_data.loc[
    gold_standard_data.groupby(['Name', 'WeightClassKg'])['TotalKg'].idxmax()
].reset_index(drop=True)


In [67]:
# Save the gold-standard data to a CSV
gold_standard_data.to_csv('gold_standard_before_worlds_female_data_ipf_epf.csv', index=False)
print("Gold standard data saved as 'gold_standard_data_ipf_epf.csv'.")



Gold standard data saved as 'gold_standard_data_ipf_epf.csv'.
