In [1]:
import pandas as pd
import numpy as np
pd.set_option('display.max_columns', None)  # me muestre todas las columnas
#pd.set_option('display.max_rows', None)  # me muestre todas las filas

import matplotlib.pyplot as plt
import seaborn as sns
import re

import warnings
warnings.filterwarnings('ignore')

In [2]:
food_original = pd.read_csv(r'..\data\FastFoodNutritionMenu_original.csv')
df = food_original.copy()
df.head()

Unnamed: 0,Company,Item,Calories,Calories from\nFat,Total Fat\n(g),Saturated Fat\n(g),Trans Fat\n(g),Cholesterol\n(mg),Sodium \n(mg),Carbs\n(g),Fiber\n(g),Sugars\n(g),Protein\n(g),Weight Watchers\nPnts
0,McDonald’s,Hamburger,250,80,9,3.5,0.5,25,520,31,2,6,12,247.5
1,McDonald’s,Cheeseburger,300,110,12,6.0,0.5,40,750,33,2,6,15,297.0
2,McDonald’s,Double Cheeseburger,440,210,23,11.0,1.5,80,1150,34,2,7,25,433.0
3,McDonald’s,McDouble,390,170,19,8.0,1.0,65,920,33,2,7,22,383.0
4,McDonald’s,Quarter Pounder® with Cheese,510,230,26,12.0,1.5,90,1190,40,3,9,29,502.0


In [3]:
df.info(memory_usage ='deep')

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1159 entries, 0 to 1158
Data columns (total 14 columns):
 #   Column                Non-Null Count  Dtype 
---  ------                --------------  ----- 
 0   Company               1159 non-null   object
 1   Item                  1159 non-null   object
 2   Calories              1157 non-null   object
 3   Calories from
Fat     1098 non-null   object
 4   Total Fat
(g)         1101 non-null   object
 5   Saturated Fat
(g)     1101 non-null   object
 6   Trans Fat
(g)         1101 non-null   object
 7   Cholesterol
(mg)      1157 non-null   object
 8   Sodium 
(mg)          1157 non-null   object
 9   Carbs
(g)             1101 non-null   object
 10  Fiber
(g)             1101 non-null   object
 11  Sugars
(g)            1157 non-null   object
 12  Protein
(g)           1027 non-null   object
 13  Weight Watchers
Pnts  524 non-null    object
dtypes: object(14)
memory usage: 978.8 KB


In [4]:
columnas_float = ['Calories', 'Calories from\nFat', 'Total Fat\n(g)',
       'Saturated Fat\n(g)', 'Trans Fat\n(g)', 'Cholesterol\n(mg)',
       'Sodium \n(mg)', 'Carbs\n(g)', 'Fiber\n(g)', 'Sugars\n(g)',
       'Protein\n(g)', 'Weight Watchers\nPnts']  # Lista de nombres de las columnas a convertir a float
# Remover espacios en blanco antes de convertir a float
df[columnas_float] = df[columnas_float].replace('\xa0', '', regex=True)
# Convertir a float utilizando to_numeric y 'coerce'
df[columnas_float] = df[columnas_float].apply(pd.to_numeric, errors='coerce')

In [5]:
df.columns

Index(['Company', 'Item', 'Calories', 'Calories from\nFat', 'Total Fat\n(g)',
       'Saturated Fat\n(g)', 'Trans Fat\n(g)', 'Cholesterol\n(mg)',
       'Sodium \n(mg)', 'Carbs\n(g)', 'Fiber\n(g)', 'Sugars\n(g)',
       'Protein\n(g)', 'Weight Watchers\nPnts'],
      dtype='object')

# Diagrama de barras - Calorías por artículo y restaurante
## Un diagrama de barras que muestre las calorías de los diferentes artículos en cada restaurante. 
## Esto podría ayudar a los consumidores a tomar decisiones más informadas sobre lo que comen.
plt.figure(figsize=(12, 6))
sns.barplot(x='Item', y='Calories', hue='Company', data=df)
plt.title('Calorías de los diferentes artículos en cada restaurante')
plt.xlabel('Artículo')
plt.ylabel('Calorías')
plt.xticks(rotation=90)
plt.legend(title='Restaurante')
plt.show()

# Mapa de calor - Distribución de los nutrientes por restaurante
## Un mapa de calor que muestre la distribución de los diferentes nutrientes en los diferentes restaurantes. 
## Esto podría ayudar a los consumidores a encontrar los restaurantes que ofrecen opciones más saludables.
nutrientes = ['Total Fat\n(g)', 'Saturated Fat\n(g)', 'Trans Fat\n(g)', 'Cholesterol\n(mg)', 'Sodium \n(mg)',
              'Carbs\n(g)', 'Fiber\n(g)', 'Sugars\n(g)', 'Protein\n(g)']
plt.figure(figsize=(10, 6))
sns.heatmap(df[nutrientes].corr(), annot=True, cmap='coolwarm')
plt.title('Distribución de los nutrientes en los diferentes restaurantes')
plt.show()

# Gráfico de líneas - Evolución de los valores nutricionales por artículo y restaurante a lo largo del tiempo
## Un gráfico de líneas que muestre la evolución de los valores nutricionales de los diferentes artículos en cada 
## restaurante a lo largo del tiempo. Esto podría ayudar a los consumidores a ver cómo han cambiado las opciones de 
## comida rápida a lo largo de los años.
plt.figure(figsize=(12, 6))
sns.lineplot(x='Año', y='Valor Nutricional', hue='Artículo', style='Restaurante', data=df)
plt.title('Evolución de los valores nutricionales de los diferentes artículos en cada restaurante')
plt.xlabel('Año')
plt.ylabel('Valor Nutricional')
plt.legend(title='Artículo', loc='upper left')
plt.show()

# Gráfico de dispersión - Relación entre calorías y contenido de grasa por artículo y restaurante
## Un gráfico de dispersión que muestre la relación entre las calorías y el contenido de grasa de los diferentes artículos 
## en cada restaurante. Esto podría ayudar a los consumidores a encontrar artículos que sean ricos en nutrientes pero bajos 
## en calorías.
plt.figure(figsize=(10, 6))
sns.scatterplot(x='Calories', y='Total Fat\n(g)', hue='Company', data=df)
plt.title('Relación entre calorías y contenido de grasa de los diferentes artículos en cada restaurante')
plt.xlabel('Calorías')
plt.ylabel('Total Fat (g)')
plt.legend(title='Restaurante')
plt.show()

# Histograma - Distribución del contenido de azúcar por artículo y restaurante
## Un histograma que muestre la distribución del contenido de azúcar de los diferentes artículos en cada restaurante. 
## Esto podría ayudar a los consumidores a encontrar artículos que sean bajos en azúcar.
plt.figure(figsize=(10, 6))
sns.histplot(x='Sugars\n(g)', hue='Company', data=df, multiple='stack', kde=True)
plt.title('Distribución del contenido de azúcar de los diferentes artículos en cada restaurante')
plt.xlabel('Contenido de azúcar (g)')
plt.ylabel('Número de artículos')
plt.legend(title='Restaurante')
plt.show()

In [6]:
df = df.dropna()
df

Unnamed: 0,Company,Item,Calories,Calories from\nFat,Total Fat\n(g),Saturated Fat\n(g),Trans Fat\n(g),Cholesterol\n(mg),Sodium \n(mg),Carbs\n(g),Fiber\n(g),Sugars\n(g),Protein\n(g),Weight Watchers\nPnts
0,McDonald’s,Hamburger,250.0,80.0,9.0,3.5,0.5,25.0,520.0,31.0,2.0,6.0,12.0,247.5
1,McDonald’s,Cheeseburger,300.0,110.0,12.0,6.0,0.5,40.0,750.0,33.0,2.0,6.0,15.0,297.0
2,McDonald’s,Double Cheeseburger,440.0,210.0,23.0,11.0,1.5,80.0,1150.0,34.0,2.0,7.0,25.0,433.0
3,McDonald’s,McDouble,390.0,170.0,19.0,8.0,1.0,65.0,920.0,33.0,2.0,7.0,22.0,383.0
4,McDonald’s,Quarter Pounder® with Cheese,510.0,230.0,26.0,12.0,1.5,90.0,1190.0,40.0,3.0,9.0,29.0,502.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
524,Burger King,BK® Café Mocha Frappe- 16 fl oz,400.0,90.0,10.0,7.0,0.0,40.0,270.0,68.0,1.0,58.0,9.0,456.0
525,Burger King,BK® Café Mocha Frappe- 20 fl oz,510.0,110.0,13.0,8.0,0.0,50.0,350.0,87.0,1.0,71.0,12.0,577.0
526,Burger King,BK® Café Caramel Frappe- 12 fl oz,300.0,80.0,8.0,5.0,0.0,35.0,210.0,50.0,0.0,41.0,7.0,339.0
527,Burger King,BK® Café Caramel Frappe- 16 fl oz,400.0,90.0,10.0,7.0,0.0,40.0,280.0,68.0,0.0,56.0,9.0,454.0


In [7]:
df.Calories.max()

1220.0

In [8]:
df['Cholesterol\n(mg)'] = df['Cholesterol\n(mg)'] / 1000

df

Unnamed: 0,Company,Item,Calories,Calories from\nFat,Total Fat\n(g),Saturated Fat\n(g),Trans Fat\n(g),Cholesterol\n(mg),Sodium \n(mg),Carbs\n(g),Fiber\n(g),Sugars\n(g),Protein\n(g),Weight Watchers\nPnts
0,McDonald’s,Hamburger,250.0,80.0,9.0,3.5,0.5,0.025,520.0,31.0,2.0,6.0,12.0,247.5
1,McDonald’s,Cheeseburger,300.0,110.0,12.0,6.0,0.5,0.040,750.0,33.0,2.0,6.0,15.0,297.0
2,McDonald’s,Double Cheeseburger,440.0,210.0,23.0,11.0,1.5,0.080,1150.0,34.0,2.0,7.0,25.0,433.0
3,McDonald’s,McDouble,390.0,170.0,19.0,8.0,1.0,0.065,920.0,33.0,2.0,7.0,22.0,383.0
4,McDonald’s,Quarter Pounder® with Cheese,510.0,230.0,26.0,12.0,1.5,0.090,1190.0,40.0,3.0,9.0,29.0,502.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
524,Burger King,BK® Café Mocha Frappe- 16 fl oz,400.0,90.0,10.0,7.0,0.0,0.040,270.0,68.0,1.0,58.0,9.0,456.0
525,Burger King,BK® Café Mocha Frappe- 20 fl oz,510.0,110.0,13.0,8.0,0.0,0.050,350.0,87.0,1.0,71.0,12.0,577.0
526,Burger King,BK® Café Caramel Frappe- 12 fl oz,300.0,80.0,8.0,5.0,0.0,0.035,210.0,50.0,0.0,41.0,7.0,339.0
527,Burger King,BK® Café Caramel Frappe- 16 fl oz,400.0,90.0,10.0,7.0,0.0,0.040,280.0,68.0,0.0,56.0,9.0,454.0


In [9]:
df['Sodium \n(mg)'] = df['Sodium \n(mg)'] / 1000

df

Unnamed: 0,Company,Item,Calories,Calories from\nFat,Total Fat\n(g),Saturated Fat\n(g),Trans Fat\n(g),Cholesterol\n(mg),Sodium \n(mg),Carbs\n(g),Fiber\n(g),Sugars\n(g),Protein\n(g),Weight Watchers\nPnts
0,McDonald’s,Hamburger,250.0,80.0,9.0,3.5,0.5,0.025,0.52,31.0,2.0,6.0,12.0,247.5
1,McDonald’s,Cheeseburger,300.0,110.0,12.0,6.0,0.5,0.040,0.75,33.0,2.0,6.0,15.0,297.0
2,McDonald’s,Double Cheeseburger,440.0,210.0,23.0,11.0,1.5,0.080,1.15,34.0,2.0,7.0,25.0,433.0
3,McDonald’s,McDouble,390.0,170.0,19.0,8.0,1.0,0.065,0.92,33.0,2.0,7.0,22.0,383.0
4,McDonald’s,Quarter Pounder® with Cheese,510.0,230.0,26.0,12.0,1.5,0.090,1.19,40.0,3.0,9.0,29.0,502.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
524,Burger King,BK® Café Mocha Frappe- 16 fl oz,400.0,90.0,10.0,7.0,0.0,0.040,0.27,68.0,1.0,58.0,9.0,456.0
525,Burger King,BK® Café Mocha Frappe- 20 fl oz,510.0,110.0,13.0,8.0,0.0,0.050,0.35,87.0,1.0,71.0,12.0,577.0
526,Burger King,BK® Café Caramel Frappe- 12 fl oz,300.0,80.0,8.0,5.0,0.0,0.035,0.21,50.0,0.0,41.0,7.0,339.0
527,Burger King,BK® Café Caramel Frappe- 16 fl oz,400.0,90.0,10.0,7.0,0.0,0.040,0.28,68.0,0.0,56.0,9.0,454.0


In [10]:
df.columns

Index(['Company', 'Item', 'Calories', 'Calories from\nFat', 'Total Fat\n(g)',
       'Saturated Fat\n(g)', 'Trans Fat\n(g)', 'Cholesterol\n(mg)',
       'Sodium \n(mg)', 'Carbs\n(g)', 'Fiber\n(g)', 'Sugars\n(g)',
       'Protein\n(g)', 'Weight Watchers\nPnts'],
      dtype='object')

In [11]:
df.columns = ['Company', 'Item', 'Calories', 'Calories from\nFat', 'Total Fat\n(g)',
       'Saturated Fat\n(g)', 'Trans Fat\n(g)', 'Cholesterol\n(g)',
       'Sodium \n(g)', 'Carbs\n(g)', 'Fiber\n(g)', 'Sugars\n(g)',
       'Protein\n(g)', 'Weight Watchers\nPnts']
df

Unnamed: 0,Company,Item,Calories,Calories from\nFat,Total Fat\n(g),Saturated Fat\n(g),Trans Fat\n(g),Cholesterol\n(g),Sodium \n(g),Carbs\n(g),Fiber\n(g),Sugars\n(g),Protein\n(g),Weight Watchers\nPnts
0,McDonald’s,Hamburger,250.0,80.0,9.0,3.5,0.5,0.025,0.52,31.0,2.0,6.0,12.0,247.5
1,McDonald’s,Cheeseburger,300.0,110.0,12.0,6.0,0.5,0.040,0.75,33.0,2.0,6.0,15.0,297.0
2,McDonald’s,Double Cheeseburger,440.0,210.0,23.0,11.0,1.5,0.080,1.15,34.0,2.0,7.0,25.0,433.0
3,McDonald’s,McDouble,390.0,170.0,19.0,8.0,1.0,0.065,0.92,33.0,2.0,7.0,22.0,383.0
4,McDonald’s,Quarter Pounder® with Cheese,510.0,230.0,26.0,12.0,1.5,0.090,1.19,40.0,3.0,9.0,29.0,502.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
524,Burger King,BK® Café Mocha Frappe- 16 fl oz,400.0,90.0,10.0,7.0,0.0,0.040,0.27,68.0,1.0,58.0,9.0,456.0
525,Burger King,BK® Café Mocha Frappe- 20 fl oz,510.0,110.0,13.0,8.0,0.0,0.050,0.35,87.0,1.0,71.0,12.0,577.0
526,Burger King,BK® Café Caramel Frappe- 12 fl oz,300.0,80.0,8.0,5.0,0.0,0.035,0.21,50.0,0.0,41.0,7.0,339.0
527,Burger King,BK® Café Caramel Frappe- 16 fl oz,400.0,90.0,10.0,7.0,0.0,0.040,0.28,68.0,0.0,56.0,9.0,454.0


In [12]:
# Crear columna con clave primaria
df.insert(0, 'item_id', df.reset_index().index)

In [13]:
df

Unnamed: 0,item_id,Company,Item,Calories,Calories from\nFat,Total Fat\n(g),Saturated Fat\n(g),Trans Fat\n(g),Cholesterol\n(g),Sodium \n(g),Carbs\n(g),Fiber\n(g),Sugars\n(g),Protein\n(g),Weight Watchers\nPnts
0,0,McDonald’s,Hamburger,250.0,80.0,9.0,3.5,0.5,0.025,0.52,31.0,2.0,6.0,12.0,247.5
1,1,McDonald’s,Cheeseburger,300.0,110.0,12.0,6.0,0.5,0.040,0.75,33.0,2.0,6.0,15.0,297.0
2,2,McDonald’s,Double Cheeseburger,440.0,210.0,23.0,11.0,1.5,0.080,1.15,34.0,2.0,7.0,25.0,433.0
3,3,McDonald’s,McDouble,390.0,170.0,19.0,8.0,1.0,0.065,0.92,33.0,2.0,7.0,22.0,383.0
4,4,McDonald’s,Quarter Pounder® with Cheese,510.0,230.0,26.0,12.0,1.5,0.090,1.19,40.0,3.0,9.0,29.0,502.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
524,499,Burger King,BK® Café Mocha Frappe- 16 fl oz,400.0,90.0,10.0,7.0,0.0,0.040,0.27,68.0,1.0,58.0,9.0,456.0
525,500,Burger King,BK® Café Mocha Frappe- 20 fl oz,510.0,110.0,13.0,8.0,0.0,0.050,0.35,87.0,1.0,71.0,12.0,577.0
526,501,Burger King,BK® Café Caramel Frappe- 12 fl oz,300.0,80.0,8.0,5.0,0.0,0.035,0.21,50.0,0.0,41.0,7.0,339.0
527,502,Burger King,BK® Café Caramel Frappe- 16 fl oz,400.0,90.0,10.0,7.0,0.0,0.040,0.28,68.0,0.0,56.0,9.0,454.0


In [14]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 504 entries, 0 to 528
Data columns (total 15 columns):
 #   Column                Non-Null Count  Dtype  
---  ------                --------------  -----  
 0   item_id               504 non-null    int64  
 1   Company               504 non-null    object 
 2   Item                  504 non-null    object 
 3   Calories              504 non-null    float64
 4   Calories from
Fat     504 non-null    float64
 5   Total Fat
(g)         504 non-null    float64
 6   Saturated Fat
(g)     504 non-null    float64
 7   Trans Fat
(g)         504 non-null    float64
 8   Cholesterol
(g)       504 non-null    float64
 9   Sodium 
(g)           504 non-null    float64
 10  Carbs
(g)             504 non-null    float64
 11  Fiber
(g)             504 non-null    float64
 12  Sugars
(g)            504 non-null    float64
 13  Protein
(g)           504 non-null    float64
 14  Weight Watchers
Pnts  504 non-null    float64
dtypes: float64(12), int64(1

In [15]:
df.columns = df.columns.str.replace('\n', ' ')
df.columns = df.columns.str.replace(' ', '_')
df.columns = df.columns.str.replace('(', '')
df.columns = df.columns.str.replace(')', '')

In [16]:
df

Unnamed: 0,item_id,Company,Item,Calories,Calories_from_Fat,Total_Fat_g,Saturated_Fat_g,Trans_Fat_g,Cholesterol_g,Sodium__g,Carbs_g,Fiber_g,Sugars_g,Protein_g,Weight_Watchers_Pnts
0,0,McDonald’s,Hamburger,250.0,80.0,9.0,3.5,0.5,0.025,0.52,31.0,2.0,6.0,12.0,247.5
1,1,McDonald’s,Cheeseburger,300.0,110.0,12.0,6.0,0.5,0.040,0.75,33.0,2.0,6.0,15.0,297.0
2,2,McDonald’s,Double Cheeseburger,440.0,210.0,23.0,11.0,1.5,0.080,1.15,34.0,2.0,7.0,25.0,433.0
3,3,McDonald’s,McDouble,390.0,170.0,19.0,8.0,1.0,0.065,0.92,33.0,2.0,7.0,22.0,383.0
4,4,McDonald’s,Quarter Pounder® with Cheese,510.0,230.0,26.0,12.0,1.5,0.090,1.19,40.0,3.0,9.0,29.0,502.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
524,499,Burger King,BK® Café Mocha Frappe- 16 fl oz,400.0,90.0,10.0,7.0,0.0,0.040,0.27,68.0,1.0,58.0,9.0,456.0
525,500,Burger King,BK® Café Mocha Frappe- 20 fl oz,510.0,110.0,13.0,8.0,0.0,0.050,0.35,87.0,1.0,71.0,12.0,577.0
526,501,Burger King,BK® Café Caramel Frappe- 12 fl oz,300.0,80.0,8.0,5.0,0.0,0.035,0.21,50.0,0.0,41.0,7.0,339.0
527,502,Burger King,BK® Café Caramel Frappe- 16 fl oz,400.0,90.0,10.0,7.0,0.0,0.040,0.28,68.0,0.0,56.0,9.0,454.0


In [17]:
import re

def limpiar_texto(texto):
    # Eliminar caracteres especiales y espacios innecesarios
    texto_limpio = re.sub(r"[^a-zA-Z0-9\s]", "", texto)
    texto_limpio = re.sub(r"\s+", " ", texto_limpio).strip()
    
    # Escapar comillas simples
    texto_limpio = re.sub(r"'", "''", texto_limpio)
    
    return texto_limpio

In [18]:
df['Company'] = df['Company'].apply(limpiar_texto)

In [19]:
df['Item'] = df['Item'].apply(limpiar_texto)

In [20]:
df

Unnamed: 0,item_id,Company,Item,Calories,Calories_from_Fat,Total_Fat_g,Saturated_Fat_g,Trans_Fat_g,Cholesterol_g,Sodium__g,Carbs_g,Fiber_g,Sugars_g,Protein_g,Weight_Watchers_Pnts
0,0,McDonalds,Hamburger,250.0,80.0,9.0,3.5,0.5,0.025,0.52,31.0,2.0,6.0,12.0,247.5
1,1,McDonalds,Cheeseburger,300.0,110.0,12.0,6.0,0.5,0.040,0.75,33.0,2.0,6.0,15.0,297.0
2,2,McDonalds,Double Cheeseburger,440.0,210.0,23.0,11.0,1.5,0.080,1.15,34.0,2.0,7.0,25.0,433.0
3,3,McDonalds,McDouble,390.0,170.0,19.0,8.0,1.0,0.065,0.92,33.0,2.0,7.0,22.0,383.0
4,4,McDonalds,Quarter Pounder with Cheese,510.0,230.0,26.0,12.0,1.5,0.090,1.19,40.0,3.0,9.0,29.0,502.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
524,499,Burger King,BK Caf Mocha Frappe 16 fl oz,400.0,90.0,10.0,7.0,0.0,0.040,0.27,68.0,1.0,58.0,9.0,456.0
525,500,Burger King,BK Caf Mocha Frappe 20 fl oz,510.0,110.0,13.0,8.0,0.0,0.050,0.35,87.0,1.0,71.0,12.0,577.0
526,501,Burger King,BK Caf Caramel Frappe 12 fl oz,300.0,80.0,8.0,5.0,0.0,0.035,0.21,50.0,0.0,41.0,7.0,339.0
527,502,Burger King,BK Caf Caramel Frappe 16 fl oz,400.0,90.0,10.0,7.0,0.0,0.040,0.28,68.0,0.0,56.0,9.0,454.0


In [21]:
df = df.to_csv(r'..\data\FastFood.csv', index=False)