En este archivo se crearán las funciones correspondientes para la visualización y estadísticas del DataFrame resultante del preprocessing.

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

import matplotlib.dates as mdates
from math import ceil

In [2]:
%run "C:/Users/isaij/Documents/Projects/Feelings/notebooks/utils/vars.ipynb"

In [3]:
def plot_moods_2024_2025(date_col, mood_numeric_col, colors, mood_data=mood_data, moods_sorted=moods_sorted):
    plt.figure(figsize=(12, 6))

    plt.plot(date_col, mood_numeric_col, c='gainsboro', zorder=1)
    plt.scatter(date_col, mood_numeric_col, c=colors, s=12, zorder=2)
    plt.grid(True, alpha=0.2)

    plt.xlabel("Días del año")
    plt.ylabel("Mood")

    # Reemplazar números por categorías originales
    plt.yticks(ticks=range(0, len(mood_data)), labels=moods_sorted) 

    # Etiquetas con mes y año
    unique_months = date_col.dt.to_period('M').unique()
    xticks = [month.to_timestamp() for month in unique_months]
    xlabels = [month.to_timestamp().strftime('%b %Y') for month in unique_months]
    plt.xticks(ticks=xticks, labels=xlabels, rotation=45)

    plt.title("Emociones durante 2024-2025")

    plt.show()

In [4]:
def plot_moods_by_month_2024(df, mood_data=mood_data, moods_sorted=moods_sorted):
    fig, axes = plt.subplots(6, 3, figsize=(15, 16))
    axes = axes.flatten()

    # Lista de meses con sus años correspondientes
    months_years = []
    for year in [2024, 2025]:
        end_month = 12 if year == 2024 else 6  # 2024: enero-diciembre, 2025: enero-junio
        for month in range(1, end_month + 1):
            months_years.append((year, month))

    for i, (year, month) in enumerate(months_years):
        ax = axes[i]
    
        # Filtrar los datos por año y mes
        df_month = df[(df['full_date'].dt.year == year) & (df['full_date'].dt.month == month)]
    
        # Solo graficar si hay datos para ese mes
        if not df_month.empty:
            ax.scatter(df_month['full_date'], df_month['mood_numeric'], 
                      c=df_month['colors'], s=12, zorder=2)
            ax.plot(df_month['full_date'], df_month['mood_numeric'], 
                   c='gainsboro', zorder=1)
            ax.grid(True, alpha=0.2)

        ax.set_title(f'{calendar.month_name[month]} {year}')
        ax.set_yticks(range(0, len(mood_data)))
        ax.set_yticklabels(moods_sorted)
        ax.set_xticks([])

    fig.suptitle("Emociones por mes (Enero 2024 - Junio 2025)", fontsize=16)
    plt.tight_layout()

    plt.show()

In [5]:
def plot_moods_pie(mood_col):
    sum_moods = mood_col.value_counts()
    moods = sum_moods.index

    colors = [mood_data[mood] for mood in moods]

    plt.pie(
        sum_moods, 
        labels=moods,
        colors=colors,
        textprops=dict(color="w")
    )

    handles = [plt.Line2D([0], [0], marker='o', color=color, markersize=10, linestyle='') 
               for color in colors]
    labels = [f"{mood}: {sum_moods[mood]}" for mood in moods]
    plt.legend(handles, labels, title="Moods", loc='upper right')

    plt.title("Cantidad de emociones anual")
    plt.text(0, 1.15, f'Total: {df.shape[0]}', ha='center', fontsize=12)

    plt.show()

In [6]:
def plot_freq_emotions(mood_col):
    sum_moods = mood_col.value_counts()
    
    plt.bar(sum_moods.index, sum_moods.values)  
    plt.xlabel('Emoción')
    plt.ylabel('Frecuencia') 
    plt.title('Frecuencia de emociones durante el año')

    plt.show()

In [7]:
def plot_mean_mood_by_month(df):
    mean_month = df[df['year'] == 2024].groupby('month')['mood_numeric'].mean()

    max_value = mean_month.max()
    min_value = mean_month.min()

    max_index = mean_month.idxmax()
    min_index = mean_month.idxmin()

    colors = ['green' if i == max_index else 'red' if i == min_index else 'C0' for i in mean_month.index]

    bars = plt.bar([calendar.month_name[i].capitalize() for i in mean_month.index], mean_month.values, color=colors)

    plt.bar_label(bars, labels=[f'{val:.2f}' for val in mean_month.values])

    plt.ylabel('Promedio')
    plt.title('Promedio de emoción por mes (2024)')
    plt.xticks(rotation=45)

    plt.show()

In [8]:
# Grafica de mood promedio por mes, pero a partir del total de días registrados

def plot_mean_mood_by_month_tot_days(df):
    # Cantidad de registros por mes
    size_month = pd.Series({1: 31, 2: 29, 3: 31, 4: 30, 5: 31, 6: 30, 7: 31, 8: 31, 9: 30, 10: 31, 11: 30, 12: 31})

    # Suma de valores de mood por mes
    sum_month = df[df['year'] == 2024].groupby('month')['mood_numeric'].sum()
    
    mean_month = sum_month / size_month
    mean_month = mean_month.fillna(0)

    max_value = mean_month.max()

    max_index = mean_month.idxmax()

    colors = ['green' if i == max_index else 'red' if i == 6 else 'C0' for i in mean_month.index]

    bars = plt.bar([calendar.month_name[i].capitalize() for i in mean_month.index], mean_month.values, color=colors)

    plt.bar_label(bars, labels=[f'{val:.2f}' for val in mean_month.values])

    plt.ylabel('Promedio')
    plt.title('Promedio de emoción por mes (2024)')
    plt.xticks(rotation=45)
    
    plt.show()

In [9]:
def moods_by_month(df):
    print("Cantidad de moods por mes en 2024")
    df[df['year'] == 2024].groupby(['month', 'mood']).size().unstack(fill_value=0)

In [10]:
def plot_mean_by_week(df):
    mean_weekday = df.groupby('weekday')['mood_numeric'].mean()
    mean_weekday.index = mean_weekday.index.map(numeric_to_spanish)
    mean_weekday = mean_weekday.reindex(days_order)  # Reordenar días
    
    max_value = mean_weekday.max()
    min_value = mean_weekday.min()
    max_index = mean_weekday.idxmax()
    min_index = mean_weekday.idxmin()
    
    colors = ['green' if i == max_index else 'red' if i == min_index else 'C0' for i in mean_weekday.index]
    bars = plt.bar(mean_weekday.index, mean_weekday.values, color=colors)
    plt.bar_label(bars, labels=[f'{val:.2f}' for val in mean_weekday.values])
    plt.ylabel('Promedio')
    plt.title('Promedio semanal 2024-2025')
    plt.xticks(rotation=45)
    
    plt.show()

In [11]:
def plot_quan_activities_freq_moods(activities_count_col, mood_numeric_col):
    fig, axes = plt.subplots(1, 2, figsize=(10, 4))

    axes[0].hist(activities_count_col, edgecolor='black')
    axes[0].xaxis.set_major_locator(plt.MultipleLocator(2))
    axes[0].set_title("Cantidad de actividades")

    axes[1].hist(mood_numeric_col, edgecolor='black', bins=5)
    axes[1].xaxis.set_major_locator(plt.MaxNlocator(5))
    axes[1].set_title("Moods")

    plt.tight_layout()
    plt.show()

In [12]:
def plot_quan_activities_moods(activities_count_col, mood_numeric_col):
    plt.scatter(activities_count_col, mood_numeric_col)
    plt.title('Cantidad de actividades vs. Mood')
    plt.xlabel('Cantidad de actividades')
    plt.ylabel('Mood numeric')
    plt.xaxis.set_major_locator(plt.MultipleLocator(2))
    plt.yaxis.set_major_locator(plt.MultipleLocator(2))

    plt.show()

In [13]:
def plot_qual_sleep(sleep_level_col):
    plt.hist(sleep_level_col, edgecolor='black', bins=5)
    plt.xaxis.set_major_locator(plt.MaxNLocator(5))
    plt.title("Calidad del sueño")

    plt.show()

In [14]:
def plot_dist_emotions(positive_emotions_col, neutral_emotions_col, negative_emotions_col):
    fig, axes = plt.subplots(1, 3, figsize=(15, 4))

    axes[0].hist(positive_emotions_col, edgecolor='black', bins=7)
    axes[0].set_title("Histograma de emociones positivas")

    axes[1].hist(neutral_emotions_col, edgecolor='black', bins=3)
    axes[1].xaxis.set_major_locator(plt.MaxNLocator(3))
    axes[1].set_title("Histograma de emociones neutras")

    axes[2].hist(negative_emotions_col, edgecolor='black', bins=8)
    axes[2].set_title("Historgrama de emociones negativas")

    plt.tight_layout()
    plt.show()

In [15]:
def create_hists(df, cols):
    n_plots = len(cols)
    
    # Calcular filas y columnas
    n_cols = min(3, n_plots)
    n_rows = ceil(n_plots / n_cols)
    
    # Crear figura
    fig, axes = plt.subplots(n_rows, n_cols, figsize=(5*n_cols, 4*n_rows))
    
    # Asegurar que axes sea array
    if n_plots == 1:
        axes = [axes]
    else:
        axes = axes.flatten()
    
    # Crear histogramas
    for i, col in enumerate(cols):
        axes[i].hist(df[col], edgecolor='black')
        axes[i].set_title(f'Histograma de {col}')
        axes[i].set_xlabel(col)
        axes[i].set_ylabel('Frecuencia')
        axes[i].xaxis.set_major_locator(plt.MultipleLocator(1))
    
    # Ocultar subplots vacíos
    for i in range(n_plots, len(axes)):
        axes[i].set_visible(False)
    
    plt.tight_layout()
    plt.show()

In [16]:
def create_scrolleable_table(df, table_id, title):
    html = f'<h3>{title}</h3>'
    html += f'<div id="{table_id}" style="height: 220px; overflow: auto;">'
    html += df.to_html()
    html += '</div>'
    return html