In [1]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler

pd.set_option('display.max_columns', None)

# Входной файл после препроцессинга (без скейлинга)
DATA_PATH = "data/preprocessed_without_scaling.csv"

# Выходные файлы с фичами (после скейлинга)
WORKOUT_OUT = "data/workout.csv"
MEAL_OUT    = "data/meal.csv"



Meal feature engineering (C, P, MacroMatch, ED, F)

In [10]:
def add_meal_features(df: pd.DataFrame) -> pd.DataFrame:
    """
    Строим фичи для еды:
    C  - Calorie Fit (насколько блюдо попадает в целевые калории)
    P  - Proteins per meal
    MacroMatch - насколько БЖУ близки к целевым долям
    ED - Energy Density (kcal/gram)
    F  - Food Safety (по сахару, натрию, холестерину)
    """
    out = df.copy()

    # --- C: Calorie Fit ---
    # Целевая калорийность одного приёма пищи
    meal_target = new_user["Meal_target"].iloc[0]
    out["C"] = 1 - (out["Calories"]/out["Daily meals frequency"] - meal_target).abs() / meal_target


    # --- P: Proteins per meal ---
    P = out["Proteins"] / out["Daily meals frequency"]
    out["P"] = (P - P.min()) / (P.max() - P.min())

    # --- M: MacroMatch ---
    # пересчитываем БЖУ в калории
    out["cal_from_protein"] = out["Proteins"] * 4
    out["cal_from_carbs"] = out["Carbs"] * 4
    out["cal_from_fats"] = out["Fats"] * 9

    total_macro_cal = (
        out["cal_from_protein"] +
        out["cal_from_carbs"] +
        out["cal_from_fats"]
    ).replace(0, np.nan)

    out["pct_p"] = out["cal_from_protein"] / total_macro_cal
    out["pct_c"] = out["cal_from_carbs"] / total_macro_cal
    out["pct_f"] = out["cal_from_fats"] / total_macro_cal

    # Целевые доли
    if new_user['Goal'].iloc[0] == 'Loss':
        target_p, target_c, target_f = 0.3, 0.35, 0.35
    elif new_user['Goal'].iloc[0] == 'Maintain':
        target_p, target_c, target_f = 0.2, 0.5, 0.3
    elif new_user['Goal'].iloc[0] == 'Gain':
        target_p, target_c, target_f = 0.25, 0.55, 0.2

    out["M"] = 1 - (1 / 3) * (
        ((out["pct_p"] - target_p).abs())/target_p +
        ((out["pct_c"] - target_c).abs())/target_c +
        ((out["pct_f"] - target_f).abs())/target_f
    )

    # --- ED: Energy Density ---
    ED = out["Calories"] / (out["serving_size_g"] * out['Daily meals frequency'])
    out["ED"] = (ED - ED.min()) / (ED.max() - ED.min())

    # --- F: Food Safety ---
    sugar_90 = out["sugar_g"].quantile(0.9)
    sodium_90 = out["sodium_g"].quantile(0.9)
    chol_90   = out["cholesterol_g"].quantile(0.9)

    out["F"] = 1 - (1/3) * (
        (out["sugar_g"] / sugar_90).clip(upper=1) +
        (out["sodium_g"] / sodium_90).clip(upper=1) +
        (out["cholesterol_g"] / chol_90).clip(upper=1)
    )

    return out


df = add_meal_features(df)


In [11]:
df.head()

Unnamed: 0,Age,Gender,Weight (kg),Height (m),Max_BPM,Avg_BPM,Resting_BPM,Session_Duration (hours),Calories_Burned,Workout_Type,Water_Intake (liters),Workout_Frequency (days/week),Experience_Level,BMI,Daily meals frequency,Calories,sugar_g,serving_size_g,rating,Name of Exercise,Sets,Reps,Benefit,Burns Calories (per 30 min),Target Muscle Group,Equipment Needed,Difficulty Level,Body Part,Type of Muscle,Workout,sodium_g,cholesterol_g,Carbs,Proteins,Fats,meal_name,BMR,PAL,TDEE,E_raw,E_eff,E,pct_HRR,I,workload,S,Duration_min,D,pen_age,pen_bmi,pen_hrr,pen_skill,R,C,P,cal_from_protein,cal_from_carbs,cal_from_fats,pct_p,pct_c,pct_f,M,ED,F
0,35,Male,65.27,1.62,188.58,157.65,69.05,1.0,1080.9,Strength,1.5,4,2,24.870447,3,1806.0,31.77,120.47,1.31,Decline Push-ups,5,21,Improves shoulder health and posture,342.58,"Shoulders, Triceps",Cable Machine,3,Legs,Lats,Dumbbell flyes,1.72994,0.28505,267.68,106.05,71.63,Grilled Vegan Lunch,1495.2,1.9,2840.88,0.295123,0.845312,0.570217,0.741237,0.526466,105,0.25,60.0,0.333333,0.404762,0.336004,0.0,1,0.637294,0.521061,0.342572,424.2,1070.72,644.67,0.198262,0.500432,0.301305,0.697313,0.258372,0.173291
1,23,Female,56.41,1.55,179.43,131.75,73.18,1.37,1809.91,HIIT,1.9,4,2,23.479709,3,1577.0,12.34,109.15,1.92,Bear Crawls,4,16,Strengthens lower abs,357.16,"Back, Core, Shoulders",Step or Box,2,Chest,Lats,Lateral raises,0.69308,0.30061,214.32,85.41,56.97,Fried Vegetarian Lunch,1256.85,1.9,2388.015,0.579037,0.902949,0.740993,0.551247,0.256055,64,0.225,82.2,0.575163,0.119048,0.299589,0.0,0,0.862504,0.454991,0.211067,341.64,857.28,512.73,0.199597,0.50085,0.299553,0.69673,0.247696,0.471934
2,33,Female,58.98,1.67,175.04,123.95,54.96,0.91,802.26,Cardio,1.88,3,1,21.148123,2,1608.0,42.81,399.43,4.7,Dips,5,22,Builds chest strength,359.63,"Quadriceps, Glutes",Step or Box,2,Arms,Grip Strength,Standing calf raises,2.14248,0.21542,246.04,98.11,65.48,Boiled Paleo Breakfast,1307.55,1.725,2255.52375,0.186606,0.912713,0.54966,0.574534,0.289199,110,0.416667,54.6,0.27451,0.357143,0.23854,0.0,1,0.685581,0.695902,0.60453,392.44,984.16,589.32,0.199622,0.50061,0.299768,0.69719,0.082439,0.096575
3,39,Female,93.78,1.7,191.21,155.1,50.07,1.1,1450.79,HIIT,2.5,4,2,32.449827,3,2657.0,9.34,314.31,4.85,Mountain Climbers,4,17,Improves coordination and cardiovascular health,351.65,"Biceps, Forearms",Parallel Bars or Chair,3,Shoulders,Upper,Incline dumbbell flyes,0.1232,0.0097,203.22,80.84,54.56,Fried Paleo Lunch,1644.3,1.9,3124.17,0.439177,0.881167,0.660172,0.744155,0.53062,68,0.147222,66.0,0.398693,0.5,0.53446,0.0,1,0.539662,0.766588,0.18195,323.36,812.88,491.04,0.198712,0.499533,0.301755,0.699098,0.129894,0.899428
4,45,Male,52.42,1.88,193.58,152.88,70.84,1.08,1166.4,Strength,2.91,4,2,14.831372,3,1470.0,23.78,99.22,3.07,Bicep Curls,5,15,Targets obliques and improves core rotation,329.36,"Chest, Triceps",Wall,3,Abs,Wrist Flexors,Military press,1.93511,0.11689,332.79,133.05,88.43,Baked Vegan Breakfast,1479.2,1.9,2810.48,0.328421,0.79305,0.560736,0.668405,0.422805,75,0.166667,64.8,0.385621,0.642857,0.073143,0.0,1,0.620914,0.424119,0.514598,532.2,1331.16,795.87,0.200133,0.500581,0.299286,0.697327,0.254918,0.392439


We divide it into workout features and meal features.

In [12]:
# Минимальный набор для workout-модели:
workout_features = df[[
    "Name of Exercise",
    "Workout_Type",
    "Target Muscle Group",
    "Body Part",
    "Type of Muscle",
    "Difficulty Level",
    "Calories_Burned",
    "Sets", "Reps",
    "Burns Calories (per 30 min)",
    "E", "I", "S", "D", "R"
]].copy()

# Минимальный набор для meal-модели:
meal_features = df[[
    "meal_name",
    "Calories",
    "Proteins",
    "Carbs",
    "Fats",
    "sugar_g",
    "serving_size_g",
    "sodium_g",
    "cholesterol_g",
    "C", "P", "M", "ED", "F"
]].copy()


In [31]:
workout_features.to_csv(WORKOUT_OUT, index=False)
print("Saved workout features to:", WORKOUT_OUT)
meal_features.to_csv(MEAL_OUT, index=False)
print("Saved meal features to:", MEAL_OUT)

new_user.to_csv('data/new_user.csv', index=False)

df.to_csv('data/dataset_with_features.csv', index=False)

Saved workout features to: data/workout.csv
Saved meal features to: data/meal.csv


OSError: [Errno 22] Invalid argument: 'data/dataset_with_features.csv'

Scaling (StandardScaler)-------------------------------------------------------------------------------------------------------------------------------------
перенести в модельку

Workout Scaling

In [None]:
workout_num_cols = ["E", "I", "S", "D", "R", "Calories_Burned",
    "Sets", "Reps",
    "Burns Calories (per 30 min)"]

scaler_workout = StandardScaler()
workout_scaled = scaler_workout.fit_transform(workout_features[workout_num_cols])

workout_scaled_df = pd.DataFrame(
    workout_scaled,
    columns=[col + "_scaled" for col in workout_num_cols],
    index=workout_features.index
)

Meal Scaling

In [None]:
meal_num_cols = ["Calories", "Proteins", "Carbs", "Fats",
                 "serving_size_g", "sodium_g", "cholesterol_g",
                 "C", "P", "M", "ED", "F"]

scaler_meal = StandardScaler()
meal_scaled = scaler_meal.fit_transform(meal_features[meal_num_cols])

meal_scaled_df = pd.DataFrame(
    meal_scaled,
    columns=[col + "_scaled" for col in meal_num_cols],
    index=meal_features.index
)


ValueError: Shape of passed values is (19823, 13), indices imply (19823, 12)