
# 📊 Social Media Usage Analysis: Student Dataset

This notebook performs end-to-end exploration and analysis on a dataset containing student information, including age, usage hours, mental health scores, and academic performance.

## Covered Sections:
1. Basic Data Exploration
2. Missing Data Investigation
3. Duplicate Check
4. Data Validation
5. Descriptive Statistics
6. Platform Stats
7. Usage Patterns
8. Academic Impact
9. Sleep & Mental Health
10. Country Analysis
11. Relationship Status
12. New Categories (Binning)


In [None]:

import numpy as np
import pandas as pd

# Load CSV dataset
df = pd.read_csv("kaggle/one/one.csv")


## 1. 📌 Basic Data Exploration

In [None]:

print(df.info())
print(df.head())
print(df.tail())

print("Mean Age:", df["Age"].mean())
print("Max Daily Usage (Hours):", df["Avg_Daily_Usage_Hours"].max())
print("Min Daily Usage (Hours):", df["Avg_Daily_Usage_Hours"].min())
print("Mean Mental Health Score:", df["Mental_Health_Score"].mean())


## 2. 🔍 Missing Data Investigation

In [None]:

print("Missing Values per Column:")
print(df.isnull().sum())


## 3. 📑 Duplicate Check

In [None]:

print("Duplicate Student_IDs:", df['Student_ID'].duplicated().sum())
print("Duplicate Rows:", df.duplicated().sum())


## 4. ✅ Data Validation

In [None]:

print("Invalid Age Entries:", (df['Age'] < 0).sum())
print("Unrealistic Daily Usage (>24hrs):", (df['Avg_Daily_Usage_Hours'] > 24).sum())
print("Invalid Sleep Hours (<0):", (df['Sleep_Hours_Per_Night'] < 0).sum())
print("Invalid Sleep Hours (>24):", (df['Sleep_Hours_Per_Night'] > 24).sum())


## 5. 📊 Descriptive Statistics

In [None]:

print("Age - Mean:", df["Age"].mean(), ", Mode:", df["Age"].mode()[0])
print("Gender Count:", df["Gender"].value_counts())
print("Avg Daily Usage - Mean:", df["Avg_Daily_Usage_Hours"].mean(), ", Mode:", df["Avg_Daily_Usage_Hours"].mode()[0])
print("Mental Health Score - Mean:", df["Mental_Health_Score"].mean())


## 6. 🧵 Platform Stats

In [None]:

pf = df['Most_Used_Platform'].value_counts()
print(pf)
print("Most Used Platform:", pf.idxmax(), "| Count:", pf.max())

print("Gender vs Platform Crosstab:")
print(pd.crosstab(df['Most_Used_Platform'], df['Gender']))


## 7. 📈 Usage Patterns

In [None]:

print(df.groupby('Academic_Level')["Avg_Daily_Usage_Hours"].mean())
print(df.groupby('Gender')["Avg_Daily_Usage_Hours"].mean())

df["Age_Group"] = pd.cut(df["Age"], bins=[17, 19, 21, 24, 100], labels=["18-19", "20-21", "22-24", "25+"])
print(df.groupby("Age_Group")["Avg_Daily_Usage_Hours"].mean())


## 8. 🎓 Academic Impact

In [None]:

print(df['Affects_Academic_Performance'].value_counts())
print(df.groupby('Affects_Academic_Performance')['Avg_Daily_Usage_Hours'].mean())


## 9. 💤 Sleep & Mental Health

In [None]:

print("Correlation between Usage & Sleep:", df["Avg_Daily_Usage_Hours"].corr(df["Sleep_Hours_Per_Night"]))
print("Correlation between Usage & Mental Health:", df["Avg_Daily_Usage_Hours"].corr(df["Mental_Health_Score"]))

df["Sleep_Group"] = pd.cut(df["Sleep_Hours_Per_Night"], bins=[0, 6, 8, 24], labels=["<6", "6-8", "8+"])
print(df.groupby("Sleep_Group")["Mental_Health_Score"].mean())


## 10. 🌍 Country-wise Usage Analysis

In [None]:

ct = df["Country"].value_counts()
print(ct.head())

top = ct.head().index
print(df[df["Country"].isin(top)].groupby("Country")["Avg_Daily_Usage_Hours"].mean())


## 11. 💑 Relationship Status

In [None]:

rs = df["Relationship_Status"].value_counts()
print(rs)
print(df.groupby("Relationship_Status")["Avg_Daily_Usage_Hours"].mean())
print(df.groupby("Relationship_Status")["Addicted_Score"].mean())


## 12. 🧮 Creating New Categories

In [None]:

df["Usage_Category"] = pd.cut(df["Avg_Daily_Usage_Hours"], bins=[0,3,6,float("inf")], labels=["Light","Moderate","Heavy"])
print(df["Usage_Category"].value_counts())
print(df["Age_Group"].value_counts())
