In [None]:
# Step 1: Import Required Libraries
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Step 2: Load the Dataset
df = pd.read_csv("/content/Age_and_health_data.csv")  # Replace with your actual file name


In [None]:
# Step 3: Initial Data Overview
print("🔹 First 5 Rows:")
print(df.head())

print("\n🔹 Dataset Info:")
print(df.info())

print("\n🔹 Summary Statistics (Numerical):")
print(df.describe())

In [None]:
#checkin missing values
print("\n🔹 Missing Values Check:")
print(df.isnull().sum())

In [None]:
# Step 4: Value Counts for Categorical Columns
categorical_cols = ['Gender', 'Activity Level', 'Dietary Preference', 'Disease']
for col in categorical_cols:
    print(f"\n🔹 Value counts for {col}:")
    print(df[col].value_counts())

In [None]:
# Step 5: Correlation Matrix for Numerical Columns
plt.figure(figsize=(12, 8))
sns.heatmap(df.select_dtypes(include='number').corr(), annot=True, cmap='coolwarm')
plt.title("Correlation Heatmap")
plt.show()

# Step 6: Distribution Plots
numeric_features = ['Ages', 'Height', 'Weight', 'Calories', 'Protein', 'Sugar', 'Sodium', 'Fat']

for col in numeric_features:
    plt.figure(figsize=(6, 4))
    sns.histplot(df[col], kde=True, color='skyblue')
    plt.title(f"Distribution of {col}")
    plt.xlabel(col)
    plt.ylabel("Count")
    plt.show()

# Step 7: Boxplots for Outlier Detection
for col in numeric_features:
    plt.figure(figsize=(6, 4))
    sns.boxplot(x=df[col])
    plt.title(f"Boxplot of {col}")
    plt.show()

# Step 8: Relationship Analysis
plt.figure(figsize=(8, 6))
sns.scatterplot(x='Weight', y='Calories', hue='Gender', data=df)
plt.title("Calories vs Weight by Gender")
plt.show()

plt.figure(figsize=(8, 6))
sns.barplot(x='Activity Level', y='Calories', data=df)
plt.title("Average Calories by Activity Level")
plt.xticks(rotation=45)
plt.show()


In [None]:
# Step 9: Grouped Analysis
print("\n🔹 Average Nutrient Intake by Dietary Preference:")
print(df.groupby('Dietary Preference')[['Calories', 'Protein', 'Fat', 'Sugar']].mean())

# Step 10: Countplot for Disease
plt.figure(figsize=(10, 6))
sns.countplot(y='Disease', data=df, order=df['Disease'].value_counts().index)
plt.title("Disease Occurrence")
plt.show()