<a href="https://colab.research.google.com/github/Neyder2502/salud-mental-analisis-datos/blob/main/notebooks/Neyder.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# CODIGO PRINCIPAL

## LIBRERIAS

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
from scipy.stats import pearsonr, linregress

## ARCHIVO CSV

In [None]:
from google.colab import files
uploaded = files.upload()
filename = list(uploaded.keys())[0]
df = pd.read_csv(filename)
print("Archivo cargado correctamente")

## ESTRUCTURA Y CALIDAD DE DATOS

In [None]:
df.head()

In [None]:
df.info()

In [None]:
df.columns

In [None]:
df.isna().sum()

## DETECCION DE VALORES ATIPICOS



In [None]:
df.boxplot(column=['Daily_Screen_Time(hrs)'])
plt.title('Outliers - Horas de pantalla')
plt.show()

df.boxplot(column=['Sleep_Quality(1-10)'])
plt.title('Outliers - Calidad de sueño')
plt.show()

df.boxplot(column=['Stress_Level(1-10)'])
plt.title('Outliers - Estrés')
plt.show()

df.boxplot(column=['Happiness_Index(1-10)'])
plt.title('Outliers - Felicidad')
plt.show()

In [None]:
print("Edades fuera de rango:")
display(df[(df['Age'] < 10) | (df['Age'] > 80)])

print("\nHoras de pantalla fuera de rango:")
display(df[(df['Daily_Screen_Time(hrs)'] < 0) | (df['Daily_Screen_Time(hrs)'] > 24)])

print("\nCalidad de sueño fuera de 1-10:")
display(df[(df['Sleep_Quality(1-10)'] < 1) | (df['Sleep_Quality(1-10)'] > 10)])

print("\nEstrés fuera de 1-10:")
display(df[(df['Stress_Level(1-10)'] < 1) | (df['Stress_Level(1-10)'] > 10)])

print("\nFelicidad fuera de 1-10:")
display(df[(df['Happiness_Index(1-10)'] < 1) | (df['Happiness_Index(1-10)'] > 10)])

print("\nDías sin redes fuera de rango:")
display(df[(df['Days_Without_Social_Media'] < 0) | (df['Days_Without_Social_Media'] > 30)])

## ESTADISTICAS Y DISTRIBUICION DE VARIABLES

### estadistica

In [None]:
df.describe().round(3)

### distribucion

In [None]:
df['Age'].value_counts()

In [None]:
df['Gender'].value_counts()

In [None]:
df['Daily_Screen_Time(hrs)'].value_counts()

In [None]:
df['Sleep_Quality(1-10)'].value_counts()

In [None]:
df['Stress_Level(1-10)'].value_counts()

In [None]:
df['Days_Without_Social_Media'].value_counts()

In [None]:
df['Exercise_Frequency(week)'].value_counts()

In [None]:
df['Social_Media_Platform'].value_counts()

In [None]:
df['Happiness_Index(1-10)'].value_counts()

# HABITOS DIGITALES Y SALUD MENTAL

In [None]:
plt.style.use('seaborn-v0_8-whitegrid')  #pone el fondo y las rejas
sns.set_palette("husl")  #rosadita :p

## EDAD VS HORAS EN PANTALLA


In [None]:
# edad x pantalla
plt.figure(figsize=(7,5))
sns.scatterplot(data=df, x="Age", y="Daily_Screen_Time(hrs)")
plt.title("Edad vs tiempo de pantalla")
plt.show()  # eso es el scatterplot

# promedio x edad
prom = df.groupby("Age")["Daily_Screen_Time(hrs)"].mean().reset_index()
plt.figure(figsize=(7,5))
sns.lineplot(data=prom, x="Age", y="Daily_Screen_Time(hrs)", marker="o")
plt.title("Promedio horas x edad")
plt.show()  # ese es el lineplot

# por rangos
bins = [15, 25, 35, 45, 50]
labels = ["16-25", "26-35", "36-45", "46-50"]
df["grupo_edad"] = pd.cut(df["Age"], bins=bins, labels=labels)
plt.figure(figsize=(7,5))
sns.boxplot(data=df, x="grupo_edad", y="Daily_Screen_Time(hrs)")
plt.title("Tiempo pantalla x grupo edad")
plt.show()  # ese es el boxplot o sea lo de las cajas

# la relacion:
corr, p = pearsonr(df["Age"], df["Daily_Screen_Time(hrs)"])
print(f"Pearson: {corr:.3f}, p={p:.4f}")
print("Hay correlación" if p < 0.05 else "No hay correlación")

## HORAS DE PANTALLA VS ESTRES

In [None]:
# pantalla x estres
plt.figure(figsize=(7,5))
sns.regplot(data=df, x="Daily_Screen_Time(hrs)", y="Stress_Level(1-10)",
            scatter_kws={"alpha":0.5})
plt.title("Horas pantalla vs estrés")
plt.show()   # regplot

# agrupar por uso
bins = [0, 3, 6, 12]
labels = ["Bajo", "Medio", "Alto"]
df["cat_pantalla"] = pd.cut(df["Daily_Screen_Time(hrs)"], bins=bins, labels=labels)
plt.figure(figsize=(7,5))
sns.boxplot(data=df, x="cat_pantalla", y="Stress_Level(1-10)")
plt.title("Estrés x categoría uso")
plt.show()  # otro boxplot

# cosito de densidad
tabla = df.groupby(["cat_pantalla", "Stress_Level(1-10)"]).size().unstack(fill_value=0)
plt.figure(figsize=(9,5))
sns.heatmap(tabla, annot=True, fmt='d', cmap="YlOrRd")
plt.title("Densidad pantalla vs estrés")
plt.show()  # heatmap

# relacion
corr, p = pearsonr(df["Daily_Screen_Time(hrs)"], df["Stress_Level(1-10)"])
print(f"Pearson: {corr:.3f}, p={p:.4f}")
print("Significativo" if p < 0.05 else "No significativo")

# promedio x categoria
print("\nPromedio estres:")
print(df.groupby("cat_pantalla")["Stress_Level(1-10)"].mean().round(2))

## HORAS DE PANTALLA VS FELICIDAD

In [None]:
# pantalla por felicidad
x = df['Daily_Screen_Time(hrs)']
y = df['Happiness_Index(1-10)']

reg = linregress(x, y)

plt.figure(figsize=(7,5))
sns.scatterplot(x=x, y=y, alpha=0.5)
plt.plot(x, reg.intercept + reg.slope*x, color='red')
plt.title("Pantalla vs Felicidad")
plt.show()  #scatterplot

# felicidad por uso
plt.figure(figsize=(7,5))
sns.boxplot(data=df, x="cat_pantalla", y="Happiness_Index(1-10)")
plt.title("Felicidad x uso pantalla")
plt.show()  # boxplot (es la misma chimbada anterior pero otro grafico :D)

# resultados
print(f"Pendiente: {reg.slope:.4f}")
print(f"Intercepto: {reg.intercept:.4f}")
print(f"ecuacion: {reg.slope:.4f}x + {reg.intercept:.4f}")
print(f"R2: {reg.rvalue**2:.4f}")
print(f"p-valor: {reg.pvalue:.4f}")
print("Significativo" if reg.pvalue < 0.05 else "No significativo")

## DIAS SIN REDES VS ESTRES/FELICIDAD

In [None]:
# categorias de dias sin redes
bins = [-1, 2, 5, 10]
labels = ['Pocos', 'Moderado', 'Muchos']
df['cat_descanso'] = pd.cut(df['Days_Without_Social_Media'], bins=bins, labels=labels)

# estres x dias sin redes
plt.figure(figsize=(7,5))
sns.boxplot(data=df, x='cat_descanso', y='Stress_Level(1-10)')
plt.title("Estrés vs días sin redes")
plt.show()  #bloxpot (pa estres)

# felicidad x dias sin redes
plt.figure(figsize=(7,5))
sns.boxplot(data=df, x='cat_descanso', y='Happiness_Index(1-10)')
plt.title("Felicidad vs días sin redes")
plt.show()  #bloxpot (pa felicidad)

# barras comparativas
prom = df.groupby('cat_descanso')[['Stress_Level(1-10)', 'Happiness_Index(1-10)']].mean()
plt.figure(figsize=(7,5))
prom.plot(kind='bar', color=['salmon', 'lightgreen'], edgecolor='black')
plt.title("Promedio estrés y felicidad")
plt.xticks(rotation=0)
plt.show()

# promedios
print("Promedio estres:")
print(prom['Stress_Level(1-10)'].round(2))
print("\nPromedio felicidad:")
print(prom['Happiness_Index(1-10)'].round(2))

## PLATAFORMA FAVORITA VS ESTRES

In [None]:
# estres promedio x plataforma
estres_plat = df.groupby('Social_Media_Platform')['Stress_Level(1-10)'].mean().sort_values(ascending=False)

# barras estres
plt.figure(figsize=(8,5))
sns.barplot(x=estres_plat.index, y=estres_plat.values, palette="Purples_r")
plt.axhline(y=df['Stress_Level(1-10)'].mean(), color="black", linestyle="--")
plt.title("Estrés x plataforma")
plt.xticks(rotation=45)
plt.show()

# boxplot estres
plt.figure(figsize=(8,5))
sns.boxplot(data=df, x='Social_Media_Platform', y='Stress_Level(1-10)')
plt.title("Distribución estrés x plataforma")
plt.xticks(rotation=45)
plt.show()

# comparar estres y felicidad
comp = df.groupby('Social_Media_Platform')[['Stress_Level(1-10)', 'Happiness_Index(1-10)']].mean()
comp = comp.sort_values('Stress_Level(1-10)', ascending=False)

plt.figure(figsize=(9,6))
comp.plot(kind='bar', color=['lightblue', 'lightgreen'], edgecolor='black')
plt.title("Estrés vs felicidad x plataforma")
plt.xticks(rotation=45)
plt.show()

# estadisticas
stats = df.groupby('Social_Media_Platform').agg({
    'Stress_Level(1-10)': ['mean', 'std', 'count'],
    'Happiness_Index(1-10)': ['mean', 'std']
}).round(2)

print("Stats por plataforma:")
print(stats)
print("\nRanking estres:")
print(estres_plat.round(2))