In [None]:
# 01-first-logical-notebook.ipynb

# 1. Import libraries and functions
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from src.load_data import load_data
from src.clean_data import clean_data

# 2. Load the data
data_df = load_data('data/raw/South_East_Asia_Social_Media_MentalHealth.csv')

# 3. Initial preview of the data
data_df.head()

# 4. Check for missing values
missing_values = data_df.isna().sum()
print("Missing values per column:")
print(missing_values)

# 5. Clean the data
data_df = clean_data(data_df)

# Verify cleaning
data_df.head()

# 6. Summary statistics
data_df.describe()

# 7. Data Exploration

# a. Daily Social Media Usage Across Age Groups
sns.boxplot(x='Age Group', y='Daily SM Usage (hrs)', data=data_df)
plt.title('Daily SM Usage Across Age Groups')
plt.show()

# b. Most Used Social Media Platform by Gender
plt.figure(figsize=(12, 6))
sns.countplot(x='Most Used SM Platform', hue='Gender', data=data_df, palette='pastel')
plt.title('Most Used Social Media Platform by Gender')
plt.show()

# c. Daily SM Usage in Urban vs. Rural Areas
sns.boxplot(x='Urban/Rural', y='Daily SM Usage (hrs)', data=data_df)
plt.title('Daily SM Usage in Urban vs. Rural Areas')
plt.show()

# 8. Conclusion
print("In this notebook we loaded, cleaned, and performed initial data exploration.")
