In [20]:
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.ensemble import RandomForestRegressor
import joblib
import os

In [21]:
SL = pd.read_csv("C:/Users/josen/Documents/MASTER/TFM/SmartLifter.csv")

In [22]:
SL['Type'].unique()

array(['Strength', 'Strongman', 'Stretching', 'Plyometrics', 'Cardio',
       'Olympic Weightlifting', 'Powerlifting'], dtype=object)

In [23]:
SL.info()
SL.head()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4348 entries, 0 to 4347
Data columns (total 17 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   user_id          4348 non-null   int64  
 1   exercise_id      4348 non-null   int64  
 2   Age              4348 non-null   int64  
 3   Gender           4348 non-null   object 
 4   Weight_kg        4348 non-null   float64
 5   Height_m         4348 non-null   float64
 6   Workout_time     4348 non-null   float64
 7   Calories_Burned  4348 non-null   float64
 8   Fat_Percentage   4348 non-null   float64
 9   Frequency        4348 non-null   int64  
 10  BMI              4348 non-null   float64
 11  Title            4348 non-null   object 
 12  BodyPart         4348 non-null   object 
 13  Equipment        4348 non-null   object 
 14  BodyGroup        4348 non-null   object 
 15  Type             4348 non-null   object 
 16  Level            4348 non-null   object 
dtypes: float64(6),

Unnamed: 0,user_id,exercise_id,Age,Gender,Weight_kg,Height_m,Workout_time,Calories_Burned,Fat_Percentage,Frequency,BMI,Title,BodyPart,Equipment,BodyGroup,Type,Level
0,1,543,56,Male,88.3,1.71,1.69,1313.0,12.6,4,30.2,Holman Feet-Elevated Right Side Plank,Abdominals,Bodyweight,Core,Strength,Intermediate
1,1,1656,56,Male,88.3,1.71,1.69,1313.0,12.6,4,30.2,Barbell deadlift bent-over row complex,Middle Back,Barbell,Back,Strength,Intermediate
2,1,1390,56,Male,88.3,1.71,1.69,1313.0,12.6,4,30.2,HM Alternating Cross Lunge,Hamstrings,Bodyweight,Legs,Strength,Intermediate
3,1,420,56,Male,88.3,1.71,1.69,1313.0,12.6,4,30.2,30 Landmine Twist,Abdominals,Bodyweight,Core,Strength,Intermediate
4,1,960,56,Male,88.3,1.71,1.69,1313.0,12.6,4,30.2,FYR2 Push-Up to Kettlebell Hike Combo,Chest,Kettlebells,Upper Body,Strength,Intermediate


In [24]:
# 2. Codificar variables categóricas
cat_cols = [
    'Gender',
    'Level',
    'Type',
    'BodyGroup',
    'BodyPart',
    'Equipment'
]

encoders = {}

for col in cat_cols:
    le = LabelEncoder()
    SL[col + '_code'] = le.fit_transform(SL[col])
    encoders[col] = le

In [25]:
# 3. Definir features y target
feature_cols = [
  'Age', 'Gender_code', 'Weight_kg', 'Height_m', 'Frequency',
  'Level_code', 'Type_code', 'BodyGroup_code', 'BodyPart_code', 'Equipment_code'
]
X = SL[feature_cols]
y = SL['Title']

In [26]:
# 4. Entrenar modelo Random Forest
model = RandomForestClassifier(n_estimators=200, random_state=42)
model.fit(X, y)

In [27]:
accuracy = model.score(X, y)
accuracy

0.9448022079116836

In [123]:
def generate_routine(user_profile, all_ex_df, model, encoders, seed=42):
    """
    Genera una rutina personalizada en base al perfil del usuario, tiempo disponible y variedad de grupos musculares.
    Solo utiliza ejercicios de tipo Strength.
    """
    import numpy as np

    # Validar edad
    age = user_profile['Age']
    if not 17 <= age <= 70:
        raise ValueError("La edad debe estar entre 17 y 70 años.")

    # Mapear frecuencia a días
    week_days = ['Día 1','Día 2','Día 3','Día 4','Día 5','Día 6','Día 7']
    freq = user_profile['Frequency']
    if not 1 <= freq <= 7:
        raise ValueError("Frequency debe ser un entero entre 1 y 7.")
    days = week_days[:freq]

    # Calcular ejercicios según tiempo
    total_time = user_profile.get('Workout_minutes', 60)
    warmup = 10
    rest_per_exercise = 2
    available_time = total_time - warmup
    approx_exercise_time = 5
    exercises_per_day = max(3, min(6, available_time // (approx_exercise_time + rest_per_exercise)))

    # Codificar inputs
    g = encoders['Gender'].transform([user_profile['Gender']])[0]
    l = encoders['Level'].transform([user_profile['Level']])[0]

    # Filtrar ejercicios
    df_filtered = all_ex_df[
        (all_ex_df['Gender_code'] == g) &
        (all_ex_df['Level_code'] == l) &
        (abs(all_ex_df['Age'] - age) <= 3) &
        (abs(all_ex_df['Weight_kg'] - user_profile['weight_kg']) <= 5) &
        (abs(all_ex_df['Height_m'] - user_profile['Height_m']) <= 0.05)
    ].drop_duplicates('Title').copy()

    if df_filtered.empty:
        df_filtered = all_ex_df[(all_ex_df['Level_code'] == l)].drop_duplicates('Title').copy()
    if df_filtered.empty:
        print("[⚠️ Aviso] No se encontraron ejercicios adecuados para el perfil especificado. Aplicando filtro general.")
        df_filtered = all_ex_df.drop_duplicates('Title').copy()

    # Preparar predicción
    X_pred = df_filtered.copy()
    X_pred['Gender_code'] = g
    X_pred['Level_code'] = l
    X_pred['Age'] = age
    X_pred['Weight_kg'] = user_profile['weight_kg']
    X_pred['Height_m'] = user_profile['Height_m']
    X_pred['Frequency'] = freq
    X_pred['Type_code'] = encoders['Type'].transform(['Strength'])[0]

    feature_cols = [
        'Age', 'Gender_code', 'Weight_kg', 'Height_m', 'Frequency',
        'Level_code', 'Type_code', 'BodyGroup_code', 'BodyPart_code', 'Equipment_code'
    ]

    if X_pred.empty or len(X_pred) < 3:
        raise ValueError("No hay suficientes ejercicios compatibles para generar una rutina.")

    # Predecir score
    proba = model.predict_proba(X_pred[feature_cols])
    title_classes = model.classes_
    scores = []
    for i, (_, row) in enumerate(X_pred.iterrows()):
        title = row['Title']
        if title in title_classes:
            class_index = list(title_classes).index(title)
            scores.append(proba[i][class_index])
        else:
            scores.append(0)
    X_pred['score'] = scores

    # Selección de ejercicios
    ranked = X_pred.sort_values('score', ascending=False).reset_index(drop=True)
    rng = np.random.RandomState(seed)
    used = set()
    schedule = {}
    prev_bodyparts = set()
    prev_bodygroups = set()

    for day in days:
        chosen = []
        bodyparts_today = set()
        bodygroups_today = set()
        ptr = 0

        while len(chosen) < exercises_per_day and ptr < len(ranked):
            row = ranked.iloc[ptr]
            title = row['Title']
            bodypart = row['BodyPart']
            bodygroup = row['BodyGroup']
            if title not in used and bodypart not in prev_bodyparts and bodygroup not in prev_bodygroups:
                chosen.append(f"{title} ({bodypart})")
                used.add(title)
                bodyparts_today.add(bodypart)
                bodygroups_today.add(bodygroup)
            ptr += 1

        # Si faltan ejercicios, permitir completar
        if len(chosen) < exercises_per_day:
            pool = ranked.head(200)
            remaining = [r for r in pool['Title'].tolist() if r not in used]
            needed = exercises_per_day - len(chosen)

            if not remaining:
                extra = rng.choice(pool['Title'].tolist(), size=needed, replace=True).tolist()
            else:
                if len(remaining) >= needed:
                    extra = rng.choice(remaining, size=needed, replace=False).tolist()
                else:
                    extra = rng.choice(remaining, size=needed, replace=True).tolist()

            for ex in extra:
                row = ranked[ranked['Title'] == ex].iloc[0]
                chosen.append(f"{row['Title']} ({row['BodyPart']})")
                used.add(ex)


        # Validar diversidad de grupos musculares
        len(bodygroups_today) < 2
        #    print(f"[⚠️ Aviso] En {day}, solo se asignó 1 grupo muscular. Intenta ampliar el pool de ejercicios o ajustar filtros.")

        schedule[day] = chosen
        prev_bodyparts = bodyparts_today.copy()
        prev_bodygroups = bodygroups_today.copy()

    return schedule


In [124]:
if __name__ == '__main__':
    user_profile = {
        'Gender':         'Female', # Género del usuario
        'Level':          'Intermediate', # Los niveles pueden ser: Beginner, Intermediate, Advanced
        'Age':            26, # Edad del usuario (entre 17 y 70 años)
        'weight_kg':      70,
        'Height_m':       1.75,#Weight y Height para calcular el IMC y ajustar el entrenamiento a la complexión del usuario
        'Frequency':      4, # Frecuencia semanal de entrenamiento (entre 1 y 7 días)
        'Workout_minutes': 60 # tiempo total por sesión (Con tiempo de calentamiento y max 2 min de dedsanso entre ejercicios)
    }
 
    ejemplo = generate_routine(
        user_profile=user_profile,
        all_ex_df=SL,
        model=model,
        encoders=encoders,
        seed=42
    )

    print("Rutina semanal recomendada:")
    for dia, ejercicios in ejemplo.items():
        print(f"{dia}:")
        for i, ex in enumerate(ejercicios, 1):
            print(f"  {i}. {ex}")

Rutina semanal recomendada:
Día 1:
  1. Band seated row (Traps)
  2. 30 Arms Machine Triceps Dip (Triceps)
  3. Seated bar twist (Abdominals)
  4. Holman Weighted Squat Jump (Quadriceps)
  5. 30 Arms Seated Dumbbell Concentration Curl (Biceps)
  6. Dumbbell skullcrusher (Triceps)
Día 2:
  1. Push-Ups With Feet On An Exercise Ball (Chest)
  2. FYR2 Double-Under (Abdominals)
  3. UP Seated Dumbbell Overhead Press (Shoulders)
  4. FYR2 Dumbbell Cross-Body Front Raise (Shoulders)
  5. Holman Squat Pulse to Plyometric Jump (Abdominals)
  6. Paul Carter Rear-Delt Fly (Shoulders)
Día 3:
  1. Holman Squat to Biceps Curl (Quadriceps)
  2. Holman Right Lunge Jump with Right Overhead Press (Quadriceps)
  3. Barbell Ab Rollout - On Knees (Abdominals)
  4. HM Butterfly (Quadriceps)
  5. 30 Legs Lying Leg Curl (Quadriceps)
  6. Vertical jump (Quadriceps)
Día 4:
  1. UN Chest Supported Row (Middle Back)
  2. RG Alternating Seated Dumbbell Lateral Raise (Shoulders)
  3. High knees sprawl drill (Quadri