## Task: prep_time_distribution

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
df = pd.read_csv('data/cleaned_salad_data.csv')
plt.figure(figsize=(10, 6))
plt.hist(df['prep_time'], bins=20, edgecolor='black')
plt.xlabel('Preparation Time (minutes)')
plt.ylabel('Frequency')
plt.title('Distribution of Preparation Times')
plt.savefig('prep_time_distribution.png')

### Plot
![](/Users/boubker/Documents/intelligent-reporting/sandbox/output/prep_time_distribution_2e45_prep_time_distribution.png)

## Task: rating_distribution

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
np.random.seed(42)
df = pd.read_csv('data/cleaned_salad_data.csv')
rating_counts = df['n_star'].value_counts().sort_index()
plt.figure(figsize=(10, 6))
rating_counts.plot(kind='bar')
plt.xlabel('Star Rating')
plt.ylabel('Frequency')
plt.title('Distribution of Star Ratings')
plt.tight_layout()
plt.savefig('rating_distribution.png')

### Plot
![](/Users/boubker/Documents/intelligent-reporting/sandbox/output/rating_distribution_0574_rating_distribution.png)

## Task: calories_vs_rating_box_plot

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

df = pd.read_csv('data/cleaned_salad_data.csv')
plt.figure(figsize=(10, 6))
sns.boxplot(x='n_star', y='Calories', data=df)
plt.title('Calorie Distribution by Star Rating')
plt.xlabel('Star Rating')
plt.ylabel('Calories')
plt.savefig('calories_vs_rating_box_plot.png')

### Plot
![](/Users/boubker/Documents/intelligent-reporting/sandbox/output/calories_vs_rating_box_plot_6309_calories_vs_rating_box_plot.png)

## Task: reviews_vs_prep_time

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

df = pd.read_csv('data/cleaned_salad_data.csv')
df = df[['prep_time', 'n_review']].dropna()

plt.figure(figsize=(10, 6))
sns.scatterplot(data=df, x='prep_time', y='n_review')
sns.regplot(data=df, x='prep_time', y='n_review', scatter=False, color='red')
plt.xlabel('Preparation Time')
plt.ylabel('Number of Reviews')
plt.title('Correlation between Preparation Time and Number of Reviews')
plt.savefig('reviews_vs_prep_time.png')

### Plot
![](/Users/boubker/Documents/intelligent-reporting/sandbox/output/reviews_vs_prep_time_b6f9_reviews_vs_prep_time.png)

## Task: nutrition_correlations

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

df = pd.read_csv('data/cleaned_salad_data.csv')
nutrition_columns = ["Calories", "Total Fat", "Saturated Fat", "Carbohydrates", "Dietary Fiber", "Sugar", "Protein", "Cholesterol", "Sodium"]
available_columns = [col for col in nutrition_columns if col in df.columns]
correlation_matrix = df[available_columns].corr()

plt.figure(figsize=(10, 8))
sns.heatmap(correlation_matrix, annot=True, cmap='coolwarm', center=0, square=True)
plt.title('Nutritional Components Correlation Matrix')
plt.tight_layout()
plt.savefig('nutrition_correlations_heatmap.png')

### Plot
![](/Users/boubker/Documents/intelligent-reporting/sandbox/output/nutrition_correlations_91ea_nutrition_correlations_heatmap.png)

## Task: top_calorie_recipes

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
np.random.seed(42)
df = pd.read_csv('data/cleaned_salad_data.csv')
df = df.sort_values('Calories', ascending=False).head(10)
plt.figure(figsize=(10, 6))
plt.barh(df['salad_title'], df['Calories'])
plt.xlabel('Calories')
plt.title('Top 10 Highest Calorie Salad Recipes')
plt.tight_layout()
plt.savefig('top_calorie_recipes.png')

### Plot
![](/Users/boubker/Documents/intelligent-reporting/sandbox/output/top_calorie_recipes_2309_top_calorie_recipes.png)

## Task: nutrition_summary_stats

In [None]:
import pandas as pd
import numpy as np

df = pd.read_csv('data/cleaned_salad_data.csv')
nutrition_cols = ["Calories", "Total Fat", "Saturated Fat", "Carbohydrates", "Dietary Fiber", "Sugar", "Protein", "Cholesterol", "Sodium"]
available_cols = [col for col in nutrition_cols if col in df.columns]
summary_stats = df[available_cols].describe().loc[['mean', '50%', 'std', 'min', 'max']]
summary_stats.index = ['Mean', 'Median', 'Std Dev', 'Min', 'Max']
print(summary_stats.round(2))

### Output
```
         Calories  Total Fat  Saturated Fat  ...  Protein  Cholesterol  Sodium
Mean       404.75      27.11           5.91  ...    13.95         0.98    2.74
Median     349.00      23.00           5.00  ...    10.00         0.00    0.60
Std Dev    265.29      22.95           5.43  ...    12.94         3.12    6.65
Min          0.00       0.00           0.00  ...     0.40         0.00    0.00
Max       1759.00     170.00          34.00  ...    86.00        25.00   56.00

[5 rows x 9 columns]

```