In [12]:
import pandas as pd

# Data Loading and Cleaning

# Load the dataset
file_path = '/content/menu.csv'
data = pd.read_csv(file_path)

# Display the first few rows to understand the structure of the dataset
print("First few rows of the dataset:\n", data.head())

# Check for missing values
print("\nMissing values in each column:\n", data.isnull().sum())

# Drop rows with any missing values (if applicable)
data_cleaned = data.dropna()

# Display the cleaned dataset
print("\nFirst few rows of the cleaned dataset:\n", data_cleaned.head())



# Descriptive Statistics

# # Calculate basic statistics
# mean_values = data_cleaned.mean()
# median_values = data_cleaned.median()
# mode_values = data_cleaned.mode().iloc[0]  # mode() returns a DataFrame, so we take the first row
# # std_dev_values = data_cleaned.std()

print("Mean, Median, Mode and Standard Deviation : ", data_cleaned.describe())

# # Display the statistics
# print("Mean Values:\n", mean_values)
# print("\nMedian Values:\n", median_values)
# print("\nMode Values:\n", mode_values)
# print("\nStandard Deviation Values:\n", std_dev_values)


# Analysis

# Basic statistics summary
print("\nSummary Statistics:")
print(data.describe())

# Nutrient Preferences - Stacked bar plot
nutrients = ['Total Fat', 'Saturated Fat', 'Cholesterol', 'Sodium', 'Carbohydrates', 'Dietary Fiber', 'Sugars', 'Protein']
data[nutrients] = data[nutrients].div(data[nutrients].sum(axis=1), axis=0) * 100  # Convert to percentage of total
plt.figure(figsize=(16, 9))
data[nutrients].plot(kind='bar', stacked=True, colormap='viridis')
plt.title('Nutrient Composition')
plt.xlabel('Item')
plt.ylabel('Percentage of Total')
plt.xticks(range(len(data['Item'])), data['Item'], rotation=45)
plt.legend(loc='upper right')
plt.tight_layout()
plt.show()

# Correlation matrix
import matplotlib.pyplot as plt
import seaborn as sns

# Create a DataFrame
df = pd.DataFrame(data)

# Correlation matrix
corr_matrix = df[['Calories', 'Calories from Fat', 'Total Fat', 'Saturated Fat', 'Cholesterol', 'Sodium',
                  'Carbohydrates', 'Dietary Fiber', 'Sugars', 'Protein']].corr()

# Plotting the heatmap
plt.figure(figsize=(16, 9))
sns.heatmap(corr_matrix, annot=True, cmap='coolwarm', vmin=-1, vmax=1)
plt.title('Correlation Heatmap of Nutritional Content')
plt.tight_layout()
plt.show()


# Visualization

# Correlation heatmap
# Create a DataFrame
df = pd.DataFrame(data)

# Selecting numeric columns for correlation matrix
numeric_cols = ['Calories', 'Calories from Fat', 'Total Fat', 'Saturated Fat', 'Trans Fat', 'Cholesterol',
                'Sodium', 'Carbohydrates', 'Dietary Fiber', 'Sugars', 'Protein',
                'Vitamin A (% Daily Value)', 'Vitamin C (% Daily Value)', 'Calcium (% Daily Value)', 'Iron (% Daily Value)']

# Compute correlation matrix
corr_matrix = df[numeric_cols].corr()

# Plotting the heatmap
plt.figure(figsize=(16, 9))
sns.heatmap(corr_matrix, annot=True, cmap='coolwarm', vmin=-1, vmax=1)
plt.title('Correlation Heatmap')
plt.tight_layout()
plt.show()


# Plotting

# Bar plot for Calories comparison
plt.figure(figsize=(16, 9))
sns.barplot(x='Item', y='Calories', data=data, palette='viridis')
plt.title('Calories Comparison')
plt.xlabel('Item')
plt.ylabel('Calories')
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()

# Line plot for Nutritional Components (Total Fat, Cholesterol, Sodium)
plt.figure(figsize=(16, 9))
plt.plot(data['Item'], data['Total Fat'], marker='o', linestyle='-', color='b', label='Total Fat')
plt.plot(data['Item'], data['Cholesterol'], marker='o', linestyle='-', color='g', label='Cholesterol')
plt.plot(data['Item'], data['Sodium'], marker='o', linestyle='-', color='r', label='Sodium')
plt.title('Nutritional Components Comparison')
plt.xlabel('Item')
plt.ylabel('Amount')
plt.xticks(rotation=45)
plt.legend()
plt.tight_layout()
plt.show()

# Heatmap for Nutritional Content
nutrients = ['Calories', 'Total Fat', 'Saturated Fat', 'Cholesterol', 'Sodium', 'Carbohydrates', 'Dietary Fiber', 'Sugars', 'Protein']
plt.figure(figsize=(16, 9))
sns.heatmap(data[nutrients].corr(), annot=True, cmap='coolwarm', vmin=-1, vmax=1)
plt.title('Correlation Heatmap of Nutritional Content')
plt.tight_layout()
plt.show()

# Recommendations based on EDA
print("Recommendations based on EDA:")
print("- Consider reducing the sodium content across all items.")
print("- Increase options with lower calories and fat for healthier choices.")
print("- Highlight items with higher protein content for customers seeking higher protein diets.")




Output hidden; open in https://colab.research.google.com to view.