In [2]:
# Working with Dataframes
# -----------------------------------------------------------------------
import pandas as pd

# Path configuration for custom module imports
# -----------------------------------------------------------------------
import sys
sys.path.append('../')  # Adds the parent directory to the path for custom module imports

# Import custom functions to work with databases
# -----------------------------------------------------------------------
from src.support_db import sql_query
from src.support_queries import query

### Data loading

In [24]:
data, columns = sql_query(query)

if data and columns:
    df = pd.DataFrame(data, columns=columns)
else:
    print("No data returned from the query.")

Query performed successfully.
Database connection closed.


In [23]:
# View date to date time
df['view_date'] = pd.to_datetime(df['view_date'])

KeyError: 'view_date'

---

In [4]:
df.columns

Index(['user_id', 'user_type', 'country', 'recommendation_system',
       'content_id', 'content_duration', 'content_type', 'content_rating',
       'view_id', 'duration_viewed', 'recommendation_id', 'recommended_by',
       'interaction_id', 'interaction_type'],
      dtype='object')

In [21]:
df_users = df.groupby("user_id").agg(
    {
    'user_type': 'first', # Get first (and only)
    'recommendation_system': 'first', # Get first (and only)
    'content_id': 'nunique', # Number of different contents
    'content_duration': 'sum', # Sum of minutes
    'content_type': 'nunique', # Number of different content types
    'content_rating': 'mean', # Mean rating
    'view_id': 'nunique', # Number of views
    'duration_viewed': 'sum', # Sum of minutes viewed
    'recommendation_id':'nunique', # Number of recommendations
    'recommended_by': 'first', # Get first (and only)
    'interaction_id': 'nunique', # Number of interactions
    'interaction_type': 'nunique' # Number of distinct interactions
    }
).reset_index()

In [22]:
df_users

Unnamed: 0,user_id,user_type,recommendation_system,content_id,content_duration,content_type,content_rating,view_id,duration_viewed,recommendation_id,recommended_by,interaction_id,interaction_type
0,1,moderator,Recomendeitor_Plus,5,1304,4,3.26,5,1380,0,,4,3
1,2,moderator,Recomendeitor,1,23,1,2.2,1,129,0,,1,1
2,3,moderator,Recomendeitor_Plus,3,210,3,3.5,3,188,0,,1,1
3,4,admin,Recomendeitor_Plus,2,254,2,2.7,2,119,0,,1,1
4,5,consumer,Recomendeitor,3,104,3,3.266667,3,199,0,,1,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...
944,996,content_creator,Recomendeitor_Plus,1,39,1,4.2,1,79,0,,1,1
945,997,moderator,Recomendeitor_Plus,1,22,1,1.0,1,122,0,,1,1
946,998,moderator,Recomendeitor_Plus,1,226,1,1.9,1,318,0,,2,1
947,999,content_creator,Recomendeitor,3,169,3,2.633333,3,404,0,,1,1


Calcula métricas semanales o promedio para cada usuario, por ejemplo:

Promedio de session_duration por semana.

Promedio de clicks_on_recommendations por semana.

Frecuencia semanal (number_of_sessions_weekly).

Porcentaje de contenido completado (content_completed) como el total de contenidos completados dividido entre el total de sesiones.

Calculamos nuevas métricas:
valoración promedio de los contenidos
duración promedio de la sesión
porcentaje de contenido finalizado
sesiones promedio por semana
click through rate (CTR)

In [None]:
df_users["avg_valoracion"] = df_users["valoraciones"].apply(lambda x: round(np.mean(x),2) if isinstance(x, list) else np.nan)
df_users.drop(columns="valoraciones", inplace=True)

df_users["duracion_prom_sesion"] = round(df_users["minutos_sesion"]/df_users["numero_sesion"],2)
df_users["porcentaje_finalizado"] = round(df_users["minutos_sesion"]/df_users["duracion_contenido"]*100,2)
df_users["avg_sesiones_semana"] = df_users["fecha_sesion"].apply(
    lambda fechas: len(fechas) / pd.to_datetime(fechas, errors='coerce').to_series().dt.to_period("W").nunique()
    if pd.to_datetime(fechas, errors='coerce').to_series().dt.to_period("W").nunique() > 0 else 0
)
df_users["click_rate"] = round(df_users["recomendaciones_vistas"] / df_users["recomendaciones_totales"],2)