## Exploratory Data Analysis (EDA) and Insights

### Subtask:
Conduct exploratory data analysis to uncover patterns, relationships, and key insights related to obesity. Present these findings in markdown format.

**Reasoning**:
To analyze the distribution of the 'NObeyesdad' column, I will first calculate its value counts and then visualize these counts using a bar plot to understand the prevalence of different obesity levels.


In [None]:
import matplotlib.pyplot as plt
import seaborn as sns

# Distribution of 'NObeyesdad'
print("Distribution of Obesity Levels:\n", df['NObeyesdad'].value_counts())

plt.figure(figsize=(10, 6))
sns.countplot(data=df, x='NObeyesdad', palette='viridis', order=df['NObeyesdad'].value_counts().index)
plt.title('Distribution of Obesity Levels')
plt.xlabel('Obesity Level')
plt.ylabel('Count')
plt.xticks(rotation=45, ha='right')
plt.tight_layout()
plt.show()

<img src="../assets/distribution of obesity level.png" width=auto>

In [None]:
plt.figure(figsize=(12, 7))
sns.countplot(data=df, x='NObeyesdad', hue='Gender', palette='tab10', order=df['NObeyesdad'].cat.categories)
plt.title('Obesity Levels by Gender')
plt.xlabel('Obesity Level')
plt.ylabel('Count')
plt.xticks(rotation=45, ha='right')
plt.legend(title='Gender')
plt.tight_layout()
plt.show()

<img src="../assets/obesitylevelbygender.png" width=auto>

In [None]:
numerical_features = ['Age', 'Height', 'Weight']

plt.figure(figsize=(18, 6))
for i, feature in enumerate(numerical_features):
    plt.subplot(1, 3, i + 1)
    sns.violinplot(data=df, x='NObeyesdad', y=feature, palette='coolwarm', order=df['NObeyesdad'].cat.categories)
    plt.title(f'Distribution of {feature} by Obesity Level')
    plt.xlabel('Obesity Level')
    plt.ylabel(feature)
    plt.xticks(rotation=45, ha='right')
plt.tight_layout()
plt.show()

<img src="../assets/distribution.png" width=auto>

In [None]:
categorical_features = ['family_history_with_overweight', 'FAVC', 'CAEC', 'SMOKE', 'MTRANS']

plt.figure(figsize=(20, 15))
for i, feature in enumerate(categorical_features):
    plt.subplot(3, 2, i + 1) # Arrange plots in 3 rows, 2 columns
    sns.countplot(data=df, x='NObeyesdad', hue=feature, palette='tab10', order=df['NObeyesdad'].cat.categories)
    plt.title(f'Obesity Levels by {feature}')
    plt.xlabel('Obesity Level')
    plt.ylabel('Count')
    plt.xticks(rotation=45, ha='right')
    plt.legend(title=feature)
plt.tight_layout()
plt.show()

<img src="../assets/obesitylevels.png" width=auto>