In [None]:
# ============================================================
# 📚 1. Import Libraries
# ============================================================
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

# Set plot style
sns.set(style="whitegrid")
plt.rcParams['figure.figsize'] = (12, 7)  # Bigger figures

# Load the dataset
df = pd.read_csv('mobile_addiction_factors_50cols.csv')

# ============================================================
# 🔍 2. Basic Data Exploration
# ============================================================

# View first 5 rows
print("🔹 First 5 rows:")
display(df.head())

# Dataset Info
print("🔹 Dataset Information:")
df.info()

# Statistical Summary
print("🔹 Statistical Summary:")
display(df.describe())

# Check Missing Values
print("🔹 Missing Values:")
display(df.isnull().sum())

# ============================================================
# 📈 3. Univariate Analysis
# ============================================================

# Age Distribution
plt.figure()
sns.histplot(df['Age'], bins=10, kde=True, color='dodgerblue')
plt.title('Age Distribution of Students')
plt.xlabel('Age')
plt.ylabel('Number of Students')
plt.show()

# Gender Count
plt.figure()
sns.countplot(x='Gender', data=df, palette='Set2')
plt.title('Gender Distribution')
plt.xlabel('Gender')
plt.ylabel('Count')
plt.show()

# Year of Study Count
plt.figure()
sns.countplot(x='Year_of_Study', data=df, palette='coolwarm')
plt.title('Year of Study Distribution')
plt.show()

# Average Screen Time Distribution
plt.figure()
sns.histplot(df['Average_Screen_Time_per_Day'], bins=20, kde=True, color='purple')
plt.title('Average Screen Time per Day')
plt.xlabel('Hours')
plt.ylabel('Number of Students')
plt.show()

# Pie Chart of Main Purpose of Mobile Usage
plt.figure()
df['Main_Purpose_of_Use'].value_counts().plot.pie(autopct='%1.1f%%')
plt.title('Main Purpose of Mobile Usage')
plt.ylabel('')
plt.show()

# ============================================================
# 📈 4. Bivariate Analysis
# ============================================================

# Boxplot of Average Screen Time
plt.figure()
sns.boxplot(x=df['Average_Screen_Time_per_Day'], color='skyblue')
plt.title('Boxplot of Average Screen Time per Day')
plt.show()

# Boxplot of Academic Performance (CGPA)
plt.figure()
sns.boxplot(x=df['Academic_Performance_CGPA'], color='lightgreen')
plt.title('Boxplot of Academic Performance (CGPA)')
plt.show()

# Sleep Hours vs Addiction Level (Violin Plot)
plt.figure()
sns.violinplot(x='Self_Reported_Addiction_Level', y='Sleep_Hours_Per_Night', data=df, palette='Set1')
plt.title('Sleep Hours vs Addiction Level')
plt.xlabel('Addiction Level')
plt.ylabel('Sleep Hours')
plt.show()

# Screen Time vs CGPA (Scatterplot)
plt.figure()
sns.scatterplot(x='Average_Screen_Time_per_Day', y='Academic_Performance_CGPA', hue='Gender', data=df)
plt.title('Screen Time vs Academic Performance')
plt.xlabel('Average Screen Time (hours)')
plt.ylabel('CGPA')
plt.show()

# Daily Social Media Usage vs Sleep Hours
plt.figure()
sns.scatterplot(x='Daily_Social_Media_Usage_Hours', y='Sleep_Hours_Per_Night', data=df, color='orange')
plt.title('Social Media Usage vs Sleep Hours')
plt.show()

# Notifications vs Addiction Level
plt.figure()
sns.boxplot(x='Self_Reported_Addiction_Level', y='Notifications_Received_Per_Day', data=df)
plt.title('Notifications vs Addiction Level')
plt.show()

# Family Background vs Addiction Level
plt.figure()
sns.countplot(x='Family_Background', hue='Self_Reported_Addiction_Level', data=df)
plt.title('Family Background vs Addiction Level')
plt.xlabel('Family Background')
plt.ylabel('Count')
plt.show()

# ============================================================
# 📈 5. Multivariate Analysis
# ============================================================

# Correlation Heatmap
plt.figure(figsize=(18,15))
sns.heatmap(df.corr(numeric_only=True), annot=True, fmt='.2f', cmap='coolwarm')
plt.title('Correlation Matrix - Mobile Addiction Factors')
plt.show()

# Pairplot of important columns
sns.pairplot(df[['Average_Screen_Time_per_Day', 'Daily_Social_Media_Usage_Hours', 'Sleep_Hours_Per_Night', 'Academic_Performance_CGPA']], diag_kind='kde')
plt.suptitle('Pairplot of Key Factors', y=1.02)
plt.show()

# ============================================================
# 📈 6. Advanced Group Analysis
# ============================================================

# Average CGPA by Year of Study
plt.figure()
df.groupby('Year_of_Study')['Academic_Performance_CGPA'].mean().plot(kind='bar', color='green')
plt.title('Average CGPA by Year of Study')
plt.ylabel('Average CGPA')
plt.show()

# Hostel vs Day Scholar Screen Time Comparison
plt.figure()
sns.boxplot(x='Hostel_or_Day_Scholar', y='Average_Screen_Time_per_Day', data=df, palette='Set3')
plt.title('Hostel vs Day Scholar Screen Time Comparison')
plt.show()

# Physical Activity vs Screen Time
plt.figure()
sns.boxplot(x='Participation_in_Physical_Activities', y='Average_Screen_Time_per_Day', data=df, palette='pastel')
plt.title('Participation in Physical Activities vs Screen Time')
plt.show()

# Weekly Gaming Hours
plt.figure()
sns.histplot(df['Gaming_Hours_Per_Week'], bins=20, color='teal', kde=True)
plt.title('Weekly Gaming Hours Distribution')
plt.xlabel('Gaming Hours')
plt.show()

# ============================================================
# 📈 7. Fun / Deep Insights
# ============================================================

# Favorite Apps
plt.figure()
df['Favorite_App'].value_counts().head(10).plot(kind='bar', color='coral')
plt.title('Top 10 Favorite Apps')
plt.ylabel('Number of Students')
plt.show()

# Mobile Expenditure vs Year
plt.figure()
sns.boxplot(x='Year_of_Study', y='Expenditure_on_Mobile_Per_Month', data=df, palette='Set2')
plt.title('Monthly Mobile Expenditure by Year of Study')
plt.show()

# Mobile Usage Before Sleep (Outlier detection)
plt.figure()
sns.boxplot(y=df['Mobile_Usage_Before_Sleep'], color='lightcoral')
plt.title('Mobile Usage Before Sleep - Outlier Detection')
plt.show()

# Mindfulness Practice Status
plt.figure()
df['Mindfulness_Practice'].value_counts().plot(kind='pie', autopct='%1.1f%%', colors=['lightblue', 'lightgreen'])
plt.title('Mindfulness Practice Status')
plt.ylabel('')
plt.show()
