<a href="https://colab.research.google.com/github/SilverFoxMedia-Tesis/IA_02/blob/main/Modelo_Red_Neuronal.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import numpy as np
from google.colab import files
import matplotlib.pyplot as plt

In [None]:
#Lectura de datos
train = pd.read_csv('/dataset_peru_filtrado_v2.csv')
test = pd.read_csv('/dataset_peru_filtrado_v2.csv')

In [None]:
# Convertir a flotantes sin necesidad de reemplazar comas
train['Weight'] = pd.to_numeric(train['Weight'], errors='coerce')
train['Height'] = pd.to_numeric(train['Height'], errors='coerce')

# Convertir la edad a entero
train['Age'] = pd.to_numeric(train['Age'], downcast='integer', errors='coerce')

# Asegurar que 'Gender' y 'PhysicalActivity' sean tratados como cadenas de texto
# Esto puede no ser necesario si ya están correctamente como strings
train['Gender'] = train['Gender'].astype(str)
train['PhysicalActivity'] = train['PhysicalActivity'].astype(str)
train['Diabetes'] = train['Diabetes'].astype(str)

# Opcional: Manejar valores NaN generados por conversión fallida o datos faltantes
train.dropna(subset=['Weight', 'Height', 'Age', 'Gender', 'PhysicalActivity', 'Diabetes'], inplace=True)

In [None]:
def calcular_TMB_MIFFLIN(Weight, Height, Age, Gender, PhysicalActivity):

  # Cálculo de TMB según el género
  if Gender == 'MALE':
      partial_Tmb = (10 * Weight) + (6.25 * Height) - (5 * Age) + 5
  else: # Female
      partial_Tmb = (10 * Weight) + (6.25 * Height) - (5 * Age) - 161

  # Multiplicador según el nivel de actividad
  multiplicador = {
      'No': 1.2,
      'Slight': 1.375,
      'Moderate': 1.55,
      'Strong': 1.725,
      'Very_Strong': 1.9
  }

  # Calorías necesarias según el nivel de actividad
  TMB = partial_Tmb * multiplicador[PhysicalActivity]

  return round(TMB, 2)

In [None]:
def calcular_TMB_FAO_ONU(Weight, Age, Gender):

    if Gender == 'MALE':
        if Age >= 0 and Age <= 3:
            TMB = (60.9 * Weight) - 54
        elif Age >= 4 and Age <= 10:
            TMB = (22.7 * Weight) + 495
        elif Age >= 11 and Age <= 18:
            TMB = (17.5 * Weight) + 651
        elif Age >= 19 and Age <= 30:
            TMB = (15.3 * Weight) + 679
        elif Age >= 31 and Age <= 60:
            TMB = (11.6 * Weight) + 879
        else: # Age > 60
            TMB = (13.5 * Weight) + 487
    else: # Gender == 'FEMALE'
        if Age >= 0 and Age <= 3:
            TMB = (61.0 * Weight) - 51
        elif Age >= 4 and Age <= 10:
            TMB = (22.5 * Weight) + 499
        elif Age >= 11 and Age <= 18:
            TMB = (12.2 * Weight) + 746
        elif Age >= 19 and Age <= 30:
            TMB = (14.7 * Weight) + 496
        elif Age >= 31 and Age <= 60:
            TMB = (8.7 * Weight) + 829
        else: # edad > 60
            TMB = (10.5 * Weight) + 596

    return round(TMB, 2)

In [None]:
def calcular_TMB_harris_benedict_simplificada(Weight, Height, Age, Gender, PhysicalActivity):
    if Gender == 'MALE':
        partial_TMB = 66.5 + (13.75 * Weight) + (5.003 * Height) - (6.78 * Age)
    else:
        partial_TMB = 655 + (9.56 * Weight) + (1.85 * Height) - (4.68 * Age)
  # Multiplicador según el nivel de actividad
    multiplicador = {
      'No': 1.2,
      'Slight': 1.375,
      'Moderate': 1.55,
      'Strong': 1.725,
      'Very_Strong': 1.9
    }

    # Calorías necesarias según el nivel de actividad
    TMB = partial_TMB * multiplicador[PhysicalActivity]

    return round(TMB, 2)

In [None]:
def calcular_clasificacion_IMC(Weight, Height):
    Height_meters = Height / 100  # Convertir altura a metros
    IMC = Weight / (Height_meters ** 2)

    if IMC < 18.5:
        classificationIMC = 'Insufficient_Weight'
    elif IMC <= 24.9:
        classificationIMC = 'Normal_Weight'
    elif IMC <= 29.9:
        classificationIMC = 'Overweight'
    elif IMC <= 34.9:
        classificationIMC = 'Obesity_Type_I'
    elif IMC <= 39.9:
        classificationIMC = 'Obesity_Type_II'
    else:
        classificationIMC = 'Obesity_Type_III'

    return classificationIMC


In [None]:
def calcular_TMB(Weight, Height, Age, Gender, PhysicalActivity):
  classificationIMC = calcular_clasificacion_IMC(Weight, Height)

  if classificationIMC in ['Obesity_Type_II', 'Obesity_Type_III']:
        return calcular_TMB_MIFFLIN(Weight, Height, Age, Gender, PhysicalActivity)
  else:
      if PhysicalActivity == 'No':
          return calcular_TMB_FAO_ONU(Weight, Age, Gender)
      else:
          return calcular_TMB_harris_benedict_simplificada(Weight, Height, Age, Gender, PhysicalActivity)

In [None]:
def calcular_meta_mensual(pesoActual):
  return round(pesoActual * 0.02, 2)

In [None]:
def calcular_deficit(TMB):
  deficit = TMB - (TMB * 0.20)
  return round(deficit, 2)

In [None]:
def calcular_macronutrientes(deficit, Diabetes):
    c_carbohidratos = deficit * 0.50
    c_grasas = deficit * 0.30

    # Para diabetes, ajustamos la cantidad de proteína según el total calórico
    if Diabetes == 'YES':
        # Ajustamos directamente los gramos de proteínas basándonos en el total calórico
        g_proteinas = deficit * 0.2 / 4 / 0.8
    else:
        c_proteinas = deficit * 0.20
        g_proteinas = c_proteinas / 4

    g_carbohidratos = c_carbohidratos / 4
    g_grasas = c_grasas / 9

    return {
        "Carbohidratos_G": round(g_carbohidratos, 2),
        "Proteinas_G": round(g_proteinas, 2),
        "Grasas_G": round(g_grasas, 2)
    }

In [None]:
train['TMB'] = train.apply(lambda row: calcular_TMB(
    row['Weight'],
    row['Height'],
    row['Age'],
    row['Gender'],
    row['PhysicalActivity']
    ), axis=1)

# Asumiendo que 'train' es tu DataFrame
train['ClassificationIMC'] = train.apply(lambda row: calcular_clasificacion_IMC(row['Weight'], row['Height']), axis=1)

# Calcular el déficit para cada fila y crear la columna 'Deficit'
train['Deficit'] = train['TMB'].apply(calcular_deficit)

def get_macronutrientes(row):
    deficit = row['Deficit']
    diabetes_status = row['Diabetes']  # Asume que 'Diabetes' es el nombre de tu columna
    return calcular_macronutrientes(deficit, diabetes_status)

# Asumiendo que 'train' es tu DataFrame y ya tiene las columnas 'Deficit' y 'Diabetes'
macronutrientes = train.apply(get_macronutrientes, axis=1)

# Convertir el diccionario de resultados en un DataFrame y unirlo al DataFrame original
macronutrientes_df = pd.DataFrame(macronutrientes.tolist(), index=train.index)
# Verifica si las columnas ya existen y, de ser así, elimínalas
for col in ['Carbohidratos_G', 'Proteinas_G', 'Grasas_G']:
    if col in train.columns:
        train.drop(col, axis=1, inplace=True)

# Ahora puedes unir sin problemas
train = train.join(macronutrientes_df)

# Asumiendo que tienes un DataFrame llamado 'train' y una columna 'Weight' con el peso actual de la persona en kilogramos
train['PesoAperder'] = train['Weight'].apply(calcular_meta_mensual)


In [None]:
# Especifica un nombre para el archivo de salida
output_filename = 'train_last_modified.csv'

# Exportar el DataFrame modificado a un archivo CSV
train.to_csv(output_filename, index=False)

# Descargar el archivo a tu máquina local
files.download(output_filename)


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [None]:
train.head(60)

Unnamed: 0,Gender,Age,Diabetes,Weight,Height,AbdominalCircumference,PhysicalActivity,TMB,ClassificationIMC,Deficit,PesoAperder,Carbohidratos_G,Proteinas_G,Grasas_G
0,MALE,39,NO,75.0,171.0,95.6,No,1749.0,Overweight,1399.2,1.5,174.9,69.96,46.64
1,FEMALE,22,NO,65.9,158.1,89.5,Slight,2027.48,Overweight,1621.98,1.32,202.75,81.1,54.07
2,FEMALE,20,NO,58.3,161.5,82.9,Slight,1949.09,Normal_Weight,1559.27,1.17,194.91,77.96,51.98
3,MALE,33,NO,73.8,170.7,93.7,No,1735.08,Overweight,1388.06,1.48,173.51,69.4,46.27
4,FEMALE,38,NO,53.9,151.5,74.9,No,1297.93,Normal_Weight,1038.34,1.08,129.79,51.92,34.61
5,MALE,35,NO,90.3,176.1,103.2,No,1926.48,Overweight,1541.18,1.81,192.65,77.06,51.37
6,FEMALE,26,NO,62.4,156.1,84.5,Moderate,2198.91,Overweight,1759.13,1.25,219.89,87.96,58.64
7,FEMALE,24,NO,68.3,155.7,89.2,Moderate,2299.69,Overweight,1839.75,1.37,229.97,91.99,61.32
8,MALE,34,NO,81.5,169.0,98.0,No,1824.4,Overweight,1459.52,1.63,182.44,72.98,48.65
9,MALE,34,NO,52.9,164.6,71.5,Slight,1906.92,Normal_Weight,1525.54,1.06,190.69,76.28,50.85


carbohidratos = 187.80
proteinas = 75.12
grasas = 50.08

# Desayuno (25%)
desayuno_carbs = carbohidratos * 0.25

desayuno_prot = proteinas * 0.25

desayuno_grasas = grasas * 0.25

# Almuerzo (35%)
almuerzo_carbs = carbohidratos * 0.35

almuerzo_prot = proteinas * 0.35

almuerzo_grasas = grasas * 0.35

# Cena (40%)
cena_carbs = carbohidratos * 0.40

cena_prot = proteinas * 0.40

cena_grasas = grasas * 0.40

# Resultados
(desayuno_carbs, desayuno_prot, desayuno_grasas), (almuerzo_carbs, almuerzo_prot, almuerzo_grasas), (cena_carbs, cena_prot, cena_grasas)

In [None]:
# Cargar el nuevo dataset para revisar su contenido
food = pd.read_csv('/nutrients_csvfile.csv', encoding='ISO-8859-1')

# Mostrar las primeras filas del nuevo dataset para entender su estructura
food



Unnamed: 0,Food,Measure,Grams,Calories,Protein,Fat,Sat.Fat,Fiber,Carbs,Category
0,Cows' milk,1 qt.,976.000,660,32,40,36.0,0,48.0,Dairy_products
1,Buttermilk,1 cup,246.000,127,9,5,4.0,0,13.0,Dairy_products
2,Evaporated undiluted,1 cup,252.000,345,16,20,18.0,0,24.0,Dairy_products
3,Fortified milk,6 cups,1.419,1.373,89,42,23.0,1.4,119.0,Dairy_products
4,Powdered milk,1 cup,103.000,515,27,28,24.0,0,39.0,Dairy_products
...,...,...,...,...,...,...,...,...,...,...
238,Cream soups,1 cup,255.000,200,7,12,11.0,1.20,18.0,Soups
239,Noodle,1 cup,250.000,115,6,4,3.0,0.20,13.0,Soups
240,Split-pea soup,1 cup,250.000,147,8,3,3.0,0.50,25.0,Soups
241,Tomato soup,1 cup,245.000,175,6,7,6.0,0.50,22.0,Soups


In [None]:
# Load the dataset to view its contents
file_path = '/modified_train_data_2.csv'
data = pd.read_csv(file_path)

# Display the first few rows of the dataset and its structure
data.info(), data.head()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 29397 entries, 0 to 29396
Data columns (total 14 columns):
 #   Column                  Non-Null Count  Dtype  
---  ------                  --------------  -----  
 0   Gender                  29397 non-null  int64  
 1   Age                     29397 non-null  int64  
 2   Diabetes                29374 non-null  float64
 3   Weight                  29397 non-null  float64
 4   Height                  29397 non-null  float64
 5   AbdominalCircumference  29397 non-null  float64
 6   PhysicalActivity        29397 non-null  int64  
 7   TMB                     29397 non-null  float64
 8   ClassificationIMC       29397 non-null  int64  
 9   Deficit                 29397 non-null  float64
 10  PesoAperder             29397 non-null  float64
 11  Carbohidratos_G         29397 non-null  float64
 12  Proteinas_G             29397 non-null  float64
 13  Grasas_G                29397 non-null  float64
dtypes: float64(10), int64(4)
memory usage:

(None,
    Gender  Age  Diabetes  Weight  Height  AbdominalCircumference  \
 0       1   39       2.0    75.0   171.0                    95.6   
 1       2   22       2.0    65.9   158.1                    89.5   
 2       2   20       2.0    58.3   161.5                    82.9   
 3       1   33       2.0    73.8   170.7                    93.7   
 4       2   38       2.0    53.9   151.5                    74.9   
 
    PhysicalActivity      TMB  ClassificationIMC  Deficit  PesoAperder  \
 0                 1  1749.00                  3  1399.20         1.50   
 1                 2  2027.48                  3  1621.98         1.32   
 2                 2  1949.09                  2  1559.27         1.17   
 3                 1  1735.08                  3  1388.06         1.48   
 4                 1  1297.93                  2  1038.34         1.08   
 
    Carbohidratos_G  Proteinas_G  Grasas_G  
 0           174.90        69.96     46.64  
 1           202.75        81.10     54.0

In [None]:
# Check the unique categories available in the second dataset
unique_categories = food['Category'].unique()

# Preview the structure and first few rows of both datasets again to ensure we have all necessary fields
data.head(), unique_categories


(   Gender  Age  Diabetes  Weight  Height  AbdominalCircumference  \
 0       1   39       2.0    75.0   171.0                    95.6   
 1       2   22       2.0    65.9   158.1                    89.5   
 2       2   20       2.0    58.3   161.5                    82.9   
 3       1   33       2.0    73.8   170.7                    93.7   
 4       2   38       2.0    53.9   151.5                    74.9   
 
    PhysicalActivity      TMB  ClassificationIMC  Deficit  PesoAperder  \
 0                 1  1749.00                  3  1399.20         1.50   
 1                 2  2027.48                  3  1621.98         1.32   
 2                 2  1949.09                  2  1559.27         1.17   
 3                 1  1735.08                  3  1388.06         1.48   
 4                 1  1297.93                  2  1038.34         1.08   
 
    Carbohidratos_G  Proteinas_G  Grasas_G  
 0           174.90        69.96     46.64  
 1           202.75        81.10     54.07  
 2 

In [None]:
data['desayuno_carbs'] = (data['Carbohidratos_G'] * 0.25).round(2)
data['desayuno_proten'] = (data['Proteinas_G'] * 0.25).round(2)
data['desayuno_grasas'] = (data['Grasas_G'] * 0.25).round(2)

data['almuerzo_carbs'] = (data['Carbohidratos_G'] * 0.35).round(2)
data['almuerzo_proten'] = (data['Proteinas_G'] * 0.35).round(2)
data['almuerzo_grasas'] = (data['Grasas_G'] * 0.35).round(2)

data['cena_carbs'] = (data['Carbohidratos_G'] * 0.40).round(2)
data['cena_proten'] = (data['Proteinas_G'] * 0.40).round(2)
data['cena_grasas'] = (data['Grasas_G'] * 0.40).round(2)


In [None]:
# Mostrar las primeras filas del dataset con las nuevas columnas añadidas
print(data.head())

   Gender  Age  Diabetes  Weight  Height  AbdominalCircumference  \
0       1   39       2.0    75.0   171.0                    95.6   
1       2   22       2.0    65.9   158.1                    89.5   
2       2   20       2.0    58.3   161.5                    82.9   
3       1   33       2.0    73.8   170.7                    93.7   
4       2   38       2.0    53.9   151.5                    74.9   

   PhysicalActivity      TMB  ClassificationIMC  Deficit  ...  Grasas_G  \
0                 1  1749.00                  3  1399.20  ...     46.64   
1                 2  2027.48                  3  1621.98  ...     54.07   
2                 2  1949.09                  2  1559.27  ...     51.98   
3                 1  1735.08                  3  1388.06  ...     46.27   
4                 1  1297.93                  2  1038.34  ...     34.61   

   desayuno_carbs  desayuno_proten  desayuno_grasas  almuerzo_carbs  \
0           43.72            17.49            11.66           61.22  

In [None]:
# Especifica un nombre para el archivo de salida
output_filename = 'data_with_all2.csv'

# Exportar el DataFrame modificado a un archivo CSV
data.to_csv(output_filename, index=False)

# Descargar el archivo a tu máquina local
files.download(output_filename)

In [None]:
data = pd.read_csv('/data_with_all2.csv')

In [None]:
data.head()

Unnamed: 0,Gender,Age,Diabetes,Weight,Height,AbdominalCircumference,PhysicalActivity,TMB,ClassificationIMC,Deficit,...,Grasas_G,desayuno_carbs,desayuno_proten,desayuno_grasas,almuerzo_carbs,almuerzo_proten,almuerzo_grasas,cena_carbs,cena_proten,cena_grasas
0,1,39,2.0,75.0,171.0,95.6,1,1749.0,3,1399.2,...,46.64,43.72,17.49,11.66,61.22,24.49,16.32,69.96,27.98,18.66
1,2,22,2.0,65.9,158.1,89.5,2,2027.48,3,1621.98,...,54.07,50.69,20.27,13.52,70.96,28.38,18.92,81.1,32.44,21.63
2,2,20,2.0,58.3,161.5,82.9,2,1949.09,2,1559.27,...,51.98,48.73,19.49,13.0,68.22,27.29,18.19,77.96,31.18,20.79
3,1,33,2.0,73.8,170.7,93.7,1,1735.08,3,1388.06,...,46.27,43.38,17.35,11.57,60.73,24.29,16.19,69.4,27.76,18.51
4,2,38,2.0,53.9,151.5,74.9,1,1297.93,2,1038.34,...,34.61,32.45,12.98,8.65,45.43,18.17,12.11,51.92,20.77,13.84


In [None]:
import numpy as np

# Filter the second dataset based on the categories specified for breakfast
categories_breakfast = ['Dairy_products', 'Fats_Oils_Shortenings', 'Fruits_A-F', 'Fruits_G-P', 'Fruits_R-Z']
breakfast_foods = food[food['Category'].isin(categories_breakfast)]

# Function to randomly select food item that meets the nutritional requirements
def select_food_for_meal(meal_nutrients, category, nutrient_cols):
    """
    meal_nutrients: Dict with 'carbs', 'proteins', 'fats'
    category: Category of food to select from
    nutrient_cols: Columns in the dataset to compare (['Carbs', 'Protein', 'Fat'])
    """
    # Filter the foods by category
    filtered_foods = breakfast_foods[breakfast_foods['Category'] == category]

    # Convert 'Calories' and 'Fat' columns to numeric, errors are coerced into NaN
    filtered_foods['Calories'] = pd.to_numeric(filtered_foods['Calories'], errors='coerce')
    filtered_foods['Fat'] = pd.to_numeric(filtered_foods['Fat'], errors='coerce')

    # Drop rows with NaN values in nutrient columns to ensure clean comparison
    filtered_foods = filtered_foods.dropna(subset=nutrient_cols)

    # Check the foods whose all nutrient values are below or equal to the meal nutrients
    valid_foods = filtered_foods[
        (filtered_foods['Carbs'] <= meal_nutrients['carbs']) &
        (filtered_foods['Protein'] <= meal_nutrients['proteins']) &
        (filtered_foods['Fat'] <= meal_nutrients['fats'])
    ]

    # If no valid food is found, return None
    if valid_foods.empty:
        return None

    # Randomly select a food item from the valid foods
    return valid_foods.sample(1).index[0]

# Create new columns for breakfast food IDs
data['desayuno_1'] = None
data['desayuno_2'] = None
data['desayuno_3'] = None

# Apply the function to each row in the first dataset
for index, row in data.iterrows():
    meal_nutrients = {
        'carbs': row['desayuno_carbs'],
        'proteins': row['desayuno_proten'],
        'fats': row['desayuno_grasas']
    }

    # Select a Dairy Product
    data.at[index, 'desayuno_1'] = select_food_for_meal(meal_nutrients, 'Dairy_products', ['Carbs', 'Protein', 'Fat'])

    # Select a Fat/Oil/Shortening
    data.at[index, 'desayuno_2'] = select_food_for_meal(meal_nutrients, 'Fats_Oils_Shortenings', ['Carbs', 'Protein', 'Fat'])

    # Randomly choose a fruit category and select a fruit
    fruit_category = np.random.choice(['Fruits_A-F', 'Fruits_G-P', 'Fruits_R-Z'])
    data.at[index, 'desayuno_3'] = select_food_for_meal(meal_nutrients, fruit_category, ['Carbs', 'Protein', 'Fat'])

data[['desayuno_carbs', 'desayuno_proten', 'desayuno_grasas', 'desayuno_1', 'desayuno_2', 'desayuno_3']].head()


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_foods['Calories'] = pd.to_numeric(filtered_foods['Calories'], errors='coerce')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_foods['Fat'] = pd.to_numeric(filtered_foods['Fat'], errors='coerce')


Unnamed: 0,desayuno_carbs,desayuno_proten,desayuno_grasas,desayuno_1,desayuno_2,desayuno_3
0,43.72,17.49,11.66,8,25,156
1,50.69,20.27,13.52,12,22,158
2,48.73,19.49,13.0,19,25,168
3,43.38,17.35,11.57,18,21,187
4,32.45,12.98,8.65,1,21,138


In [None]:
# Especifica un nombre para el archivo de salida
output_filename = 'data_with_breakfast.csv'

# Exportar el DataFrame modificado a un archivo CSV
data.to_csv(output_filename, index=False)

# Descargar el archivo a tu máquina local
files.download(output_filename)

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [None]:
# Filtrar por categorías para el almuerzo
categories_lunch = ['Meat_Poultry', 'Fish_Seafood', 'Vegetables_A-E', 'Vegetables_F-P', 'Vegetables_R-Z', 'Breads_cereals_fastfood_grains']
lunch_foods = food[food['Category'].isin(categories_lunch)]

# Convertir columnas relevantes a numérico para evitar problemas con tipos de datos
lunch_foods['Calories'] = pd.to_numeric(lunch_foods['Calories'], errors='coerce')
lunch_foods['Fat'] = pd.to_numeric(lunch_foods['Fat'], errors='coerce')
lunch_foods.dropna(subset=['Calories', 'Fat', 'Protein', 'Carbs'], inplace=True)

# Función para seleccionar un alimento que cumpla con los requerimientos nutricionales para el almuerzo
def select_food_for_lunch(meal_nutrients, category):
    # Filtrar alimentos por categoría
    filtered_foods = lunch_foods[lunch_foods['Category'] == category]
    # Seleccionar alimentos que cumplan con los requerimientos nutricionales
    valid_foods = filtered_foods[(filtered_foods['Carbs'] <= meal_nutrients['carbs']) &
                                 (filtered_foods['Protein'] <= meal_nutrients['proteins']) &
                                 (filtered_foods['Fat'] <= meal_nutrients['fats'])]
    if valid_foods.empty:
        return None
    return valid_foods.sample(1).index[0]

# Aplicar la función a cada fila para las nuevas columnas del almuerzo
for index, row in data.iterrows():
    nutrients = {
        'carbs': row['almuerzo_carbs'],
        'proteins': row['almuerzo_proten'],
        'fats': row['almuerzo_grasas']
    }
    # Primera columna: Meat_Poultry o Fish_Seafood
    meat_fish_category = np.random.choice(['Meat_Poultry', 'Fish_Seafood'])
    data.at[index, 'almuerzo_1'] = select_food_for_lunch(nutrients, meat_fish_category)

    # Segunda columna: una de las categorías de vegetales
    vegetable_category = np.random.choice(['Vegetables_A-E', 'Vegetables_F-P', 'Vegetables_R-Z'])
    data.at[index, 'almuerzo_2'] = select_food_for_lunch(nutrients, vegetable_category)

    # Tercera columna: Breads_cereals_fastfood_grains
    data.at[index, 'almuerzo_3'] = select_food_for_lunch(nutrients, 'Breads_cereals_fastfood_grains')

data[['almuerzo_carbs', 'almuerzo_proten', 'almuerzo_grasas', 'almuerzo_1', 'almuerzo_2', 'almuerzo_3']].head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  lunch_foods['Calories'] = pd.to_numeric(lunch_foods['Calories'], errors='coerce')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  lunch_foods['Fat'] = pd.to_numeric(lunch_foods['Fat'], errors='coerce')
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  lunch_foods.dropna(subset=['Calories', 'Fat', 'Protein', 'Carbs'], inplace=True)


Unnamed: 0,almuerzo_carbs,almuerzo_proten,almuerzo_grasas,almuerzo_1,almuerzo_2,almuerzo_3
0,61.22,24.49,16.32,51.0,82.0,201.0
1,70.96,28.38,18.92,65.0,133.0,221.0
2,68.22,27.29,18.19,75.0,129.0,216.0
3,60.73,24.29,16.19,36.0,101.0,214.0
4,45.43,18.17,12.11,40.0,107.0,212.0


In [None]:
# Especifica un nombre para el archivo de salida
output_filename = 'data_with_lunch.csv'

# Exportar el DataFrame modificado a un archivo CSV
data.to_csv(output_filename, index=False)

# Descargar el archivo a tu máquina local
files.download(output_filename)

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [None]:
# Filtrar por categorías para la cena
categories_dinner = ['Soups', 'Fruits_A-F', 'Fruits_G-P', 'Fruits_R-Z']
dinner_foods = food[food['Category'].isin(categories_dinner)]

# Convertir columnas relevantes a numérico para evitar problemas con tipos de datos
dinner_foods['Calories'] = pd.to_numeric(dinner_foods['Calories'], errors='coerce')
dinner_foods['Fat'] = pd.to_numeric(dinner_foods['Fat'], errors='coerce')
dinner_foods.dropna(subset=['Calories', 'Fat', 'Protein', 'Carbs'], inplace=True)

# Función para seleccionar un alimento que cumpla con los requerimientos nutricionales para la cena
def select_food_for_dinner(meal_nutrients, category):
    # Filtrar alimentos por categoría
    filtered_foods = dinner_foods[dinner_foods['Category'] == category]
    # Seleccionar alimentos que cumplan con los requerimientos nutricionales
    valid_foods = filtered_foods[(filtered_foods['Carbs'] <= meal_nutrients['carbs']) &
                                 (filtered_foods['Protein'] <= meal_nutrients['proteins']) &
                                 (filtered_foods['Fat'] <= meal_nutrients['fats'])]
    if valid_foods.empty:
        return None
    return valid_foods.sample(1).index[0]

# Aplicar la función a cada fila para las nuevas columnas de la cena
for index, row in data.iterrows():
    nutrients = {
        'carbs': row['cena_carbs'],
        'proteins': row['cena_proten'],
        'fats': row['cena_grasas']
    }
    # Primera columna: Soups
    data.at[index, 'cena_1'] = select_food_for_dinner(nutrients, 'Soups')

    # Segunda columna: una de las categorías de frutas
    fruit_category = np.random.choice(['Fruits_A-F', 'Fruits_G-P', 'Fruits_R-Z'])
    remaining_nutrients = {
        'carbs': nutrients['carbs'] - dinner_foods.at[data.at[index, 'cena_1'], 'Carbs'] if data.at[index, 'cena_1'] is not None else nutrients['carbs'],
        'proteins': nutrients['proteins'] - dinner_foods.at[data.at[index, 'cena_1'], 'Protein'] if data.at[index, 'cena_1'] is not None else nutrients['proteins'],
        'fats': nutrients['fats'] - dinner_foods.at[data.at[index, 'cena_1'], 'Fat'] if data.at[index, 'cena_1'] is not None else nutrients['fats']
    }
    data.at[index, 'cena_2'] = select_food_for_dinner(remaining_nutrients, fruit_category)

data[['cena_carbs', 'cena_proten', 'cena_grasas', 'cena_1', 'cena_2']].head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dinner_foods['Calories'] = pd.to_numeric(dinner_foods['Calories'], errors='coerce')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dinner_foods['Fat'] = pd.to_numeric(dinner_foods['Fat'], errors='coerce')
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dinner_foods.dropna(subset=['Calories', 'Fat', 'Protein', 'Carbs'], inplace=True)


Unnamed: 0,cena_carbs,cena_proten,cena_grasas,cena_1,cena_2
0,69.96,27.98,18.66,236.0,183.0
1,81.1,32.44,21.63,238.0,149.0
2,77.96,31.18,20.79,239.0,192.0
3,69.4,27.76,18.51,241.0,163.0
4,51.92,20.77,13.84,241.0,193.0


In [None]:
# Especifica un nombre para el archivo de salida
output_filename = 'data_with_dinner.csv'

# Exportar el DataFrame modificado a un archivo CSV
data.to_csv(output_filename, index=False)

# Descargar el archivo a tu máquina local
files.download(output_filename)

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [None]:
# Especifica un nombre para el archivo de salida
output_filename = 'train_last_modified_v2.csv'

# Exportar el DataFrame modificado a un archivo CSV
data.to_csv(output_filename, index=False)

# Descargar el archivo a tu máquina local
files.download(output_filename)

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [None]:
# Cargar el nuevo dataset para revisar su contenido
wk = pd.read_csv('/Workout.csv', encoding='ISO-8859-1')

# Mostrar las primeras filas del nuevo dataset para entender su estructura
wk

Unnamed: 0,BodyPart,RepsPerSet,Sets,TypeExercise,Workout
0,Chest,8-12,3-4,Upper_Chest,Press inclinado con mancuernas
1,Chest,10-15,3-4,Upper_Chest,Cruces de cables inclinados
2,Chest,10-15,3-4,Upper_Chest,Volantes inclinados con mancuernas
3,Chest,8-12,3-4,Lower_Chest,Press declinado con mancuernas
4,Chest,10-15,3-4,Lower_Chest,Rechazar cruces de cables
5,Chest,10-15,3-4,Lower_Chest,Declinar aperturas con mancuernas
6,Chest,10-15,3-4,Chest_Isolation,Moscas en el pecho
7,Chest,10-15,3-4,Chest_Isolation,Cruces de cables
8,Chest,10-15,3-4,Chest_Isolation,Aperturas con mancuernas
9,Back,8-12,3-4,Upper,Dominadas


In [None]:
# Modify the function to use TypeExercise ensuring no type repetition within each row.

# Create a dictionary to map workout IDs to type of exercise
id_to_type = wk.set_index(wk.index)['TypeExercise'].to_dict()

# Function to assign unique workout IDs ensuring no exercise type repetition within each row
def assign_unique_workout_ids_no_repeat_type(row, workout_data, id_to_type):
    unique_types_used = set()
    available_ids = workout_data.index.tolist()

    # Randomly choose IDs ensuring no type repetition
    choices = []
    for _ in range(5):  # We need five unique IDs, including the new column
        # Filter available IDs based on unused types
        available_ids = [wid for wid in available_ids if id_to_type[wid] not in unique_types_used]
        if not available_ids:
            break  # In case we run out of IDs to use
        choice = np.random.choice(available_ids)
        choices.append(choice)
        unique_types_used.add(id_to_type[choice])

    # Assign the choices to the row
    for i, choice in enumerate(choices):
        row[f'ejercicio_{i+1}'] = choice
    return row

# Add a new column 'ejercicio_5' initialized with None
data['ejercicio_5'] = None

# Apply the function to each row in the additional dataset
data = data.apply(assign_unique_workout_ids_no_repeat_type, axis=1, workout_data=wk, id_to_type=id_to_type)
data.head()


Unnamed: 0,Gender,Age,Diabetes,Weight,Height,AbdominalCircumference,PhysicalActivity,TMB,ClassificationIMC,Deficit,...,almuerzo_1,almuerzo_2,almuerzo_3,cena_1,cena_2,ejercicio_5,ejercicio_1,ejercicio_2,ejercicio_3,ejercicio_4
0,1,39,2.0,75.0,171.0,95.6,1,1749.0,3,1399.2,...,51.0,82.0,201.0,236.0,183.0,32,43,14,20,34
1,2,22,2.0,65.9,158.1,89.5,2,2027.48,3,1621.98,...,65.0,133.0,221.0,238.0,149.0,40,35,26,25,50
2,2,20,2.0,58.3,161.5,82.9,2,1949.09,2,1559.27,...,75.0,129.0,216.0,239.0,192.0,45,6,26,37,20
3,1,33,2.0,73.8,170.7,93.7,1,1735.08,3,1388.06,...,36.0,101.0,214.0,241.0,163.0,15,34,11,32,25
4,2,38,2.0,53.9,151.5,74.9,1,1297.93,2,1038.34,...,40.0,107.0,212.0,241.0,193.0,51,22,34,12,16


In [None]:
# Especifica un nombre para el archivo de salida
output_filename = 'dataset_final.csv'

# Exportar el DataFrame modificado a un archivo CSV
data.to_csv(output_filename, index=False)

# Descargar el archivo a tu máquina local
files.download(output_filename)

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>