# Recipe Site Traffic EDA

This notebook performs an Exploratory Data Analysis (EDA) on the recipe site dataset.

In [None]:

import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.impute import SimpleImputer

# Load dataset
df = pd.read_csv("recipe_site_traffic_2212.csv")

# Convert servings to numeric
df["servings"] = pd.to_numeric(df["servings"], errors="coerce")

# Impute missing values in numeric columns
num_cols = ["calories", "carbohydrate", "sugar", "protein", "servings"]
imputer = SimpleImputer(strategy="median")
df[num_cols] = imputer.fit_transform(df[num_cols])

df.head()


In [None]:

# Category distribution
plt.figure(figsize=(10,5))
sns.countplot(data=df, x="category", order=df["category"].value_counts().index)
plt.xticks(rotation=45)
plt.title("Recipe Category Distribution")
plt.show()


In [None]:

# Calories distribution
plt.figure(figsize=(8,5))
sns.histplot(df["calories"], bins=30, kde=True)
plt.title("Calories Distribution")
plt.show()


In [None]:

# Correlation heatmap
plt.figure(figsize=(8,6))
sns.heatmap(df[num_cols].corr(), annot=True, cmap="coolwarm")
plt.title("Correlation Heatmap (Nutritional Values)")
plt.show()


In [None]:

# Calories by category
plt.figure(figsize=(10,6))
sns.boxplot(data=df, x="category", y="calories")
plt.xticks(rotation=45)
plt.title("Calories by Category")
plt.show()


In [None]:

# Save cleaned dataset
df.to_csv("cleaned_recipe_data.csv", index=False)
print("Cleaned dataset saved as cleaned_recipe_data.csv")
