In [3]:
# 02_feature_engineering.ipynb
# ==========================================
import pandas as pd
import numpy as np


In [12]:
# Load the cleaned dataset from previous notebook
df = pd.read_csv("../data/processed/cleaned_exercise_data.csv")

In [14]:
# 1. Filter for Female users only
# ------------------------------------------
df = df[df['gender'].str.lower() == 'female'].copy()
df.reset_index(drop=True, inplace=True)

print(f"Total Female Records: {len(df)}")

Total Female Records: 1964


In [15]:
# 2. Simulate menstrual cycle phases
# ------------------------------------------
# Typical cycle: 28 days → we’ll assign random cycle days for each record
np.random.seed(42)
df['Cycle_Day'] = np.random.randint(1, 29, len(df))

def get_cycle_phase(day):
    if 1 <= day <= 5:
        return "Menstrual"
    elif 6 <= day <= 13:
        return "Follicular"
    elif 14 <= day <= 17:
        return "Ovulatory"
    elif 18 <= day <= 28:
        return "Luteal"
    else:
        return "Unknown"

df['Cycle_Phase'] = df['Cycle_Day'].apply(get_cycle_phase)

In [16]:
# 3. Generate hormone levels (approximate)
# ------------------------------------------
def simulate_hormones(phase):
    if phase == "Menstrual":
        return np.random.uniform(10, 40), np.random.uniform(0.5, 2)
    elif phase == "Follicular":
        return np.random.uniform(40, 200), np.random.uniform(1, 3)
    elif phase == "Ovulatory":
        return np.random.uniform(200, 400), np.random.uniform(2, 6)
    elif phase == "Luteal":
        return np.random.uniform(100, 250), np.random.uniform(10, 25)
    else:
        return np.nan, np.nan

df[['Estrogen_Level', 'Progesterone_Level']] = df['Cycle_Phase'].apply(
    lambda p: pd.Series(simulate_hormones(p))
)

In [17]:
# 4. Estimate fatigue level and recommend intensity
# ------------------------------------------
def estimate_fatigue(row):
    if row['Cycle_Phase'] == 'Menstrual':
        return np.random.uniform(0.6, 0.9)
    elif row['Cycle_Phase'] == 'Luteal':
        return np.random.uniform(0.5, 0.8)
    elif row['Cycle_Phase'] == 'Follicular':
        return np.random.uniform(0.2, 0.5)
    elif row['Cycle_Phase'] == 'Ovulatory':
        return np.random.uniform(0.1, 0.4)
    else:
        return 0.5

df['Fatigue_Level'] = df.apply(estimate_fatigue, axis=1)

def recommend_intensity(fatigue):
    if fatigue > 0.7:
        return 'Low'
    elif fatigue > 0.4:
        return 'Moderate'
    else:
        return 'High'

df['Recommended_Intensity'] = df['Fatigue_Level'].apply(recommend_intensity)

In [20]:
# 5. Optional: Adjust calories burned slightly by phase
# ------------------------------------------
phase_calorie_adjust = {
    'Menstrual': 0.9,
    'Follicular': 1.1,
    'Ovulatory': 1.15,
    'Luteal': 1.05
}

df['Adjusted_Calories'] = df.apply(
    lambda x: x['calories_burn'] * phase_calorie_adjust.get(x['Cycle_Phase'], 1),
    axis=1
)

In [22]:
# 6. Save the new dataset
# ------------------------------------------
df.to_csv("../data/processed/womens_fitness_realistic.csv", index=False)
print(" Saved womens_fitness_realistic.csv successfully!")

 Saved womens_fitness_realistic.csv successfully!


In [23]:
# 7. Preview sample
# ------------------------------------------
df.head(10)

Unnamed: 0,id,exercise,calories_burn,dream_weight,actual_weight,age,gender,duration,heart_rate,bmi,...,exercise_intensity,calories_per_min,weight_diff,Cycle_Day,Cycle_Phase,Estrogen_Level,Progesterone_Level,Fatigue_Level,Recommended_Intensity,Adjusted_Calories
0,5,Exercise 10,416.318374,89.960226,85.643174,29,Female,34,118,23.286113,...,3,12.24,4.32,7,Follicular,99.630163,1.519508,0.331315,High,457.950211
1,6,Exercise 1,479.72269,78.887578,80.596592,60,Female,41,169,34.719336,...,10,11.7,-1.71,20,Luteal,208.513017,17.438136,0.583026,Moderate,503.708825
2,12,Exercise 1,376.552649,95.196283,97.368961,35,Female,28,158,34.565117,...,6,13.45,-2.17,15,Ovulatory,216.209243,2.880733,0.267451,High,433.035547
3,13,Exercise 1,311.106024,83.679152,84.428659,54,Female,48,173,34.522637,...,4,6.48,-0.75,11,Follicular,149.321402,1.152262,0.304101,High,342.216626
4,17,Exercise 2,216.866393,84.638075,88.314527,38,Female,46,119,33.678882,...,7,4.71,-3.68,8,Follicular,176.193106,1.990293,0.47986,Moderate,238.553033
5,19,Exercise 1,457.620321,70.260105,70.756163,40,Female,23,129,33.238613,...,2,19.9,-0.5,21,Luteal,172.087987,18.886117,0.776794,Low,480.501337
6,23,Exercise 1,187.214521,70.680088,69.65256,54,Female,26,117,30.599816,...,3,7.2,1.03,7,Follicular,171.948955,1.695618,0.350712,High,205.935973
7,25,Exercise 1,293.464795,60.083728,59.860135,45,Female,36,135,18.743767,...,7,8.15,0.22,26,Luteal,201.702423,18.485979,0.598365,Moderate,308.138035
8,30,Exercise 6,214.219356,50.812203,49.315613,55,Female,48,155,29.235561,...,1,4.46,1.5,19,Luteal,140.054241,23.17945,0.72114,Low,224.930324
9,33,Exercise 1,261.042778,87.938113,90.680682,57,Female,47,111,30.423267,...,8,5.55,-2.74,23,Luteal,219.613903,19.876778,0.511014,Moderate,274.094917
