In [None]:
#### import pandas as pd
import numpy as np
import math
import random
import pandas as pd
from scipy.stats import pearsonr
from sklearn.metrics.pairwise import cosine_similarity

# =============================================================================
# FUNCIONES DE CARGA DE DATOS
# =============================================================================
def load_data():
    """
    Carga los datasets necesarios:
      - Hist√≥rico de valoraciones de usuarios.
      - Informaci√≥n de √≠tems (nombre, visitas, etc.).
      - Preferencias de usuarios.
      - Informaci√≥n de categor√≠as (padres).
      - Clasificaci√≥n de √≠tems seg√∫n preferencias.
    """
    usuarios_historico = pd.read_csv("puntuaciones_usuario_base.csv", sep=";", header=None)
    usuarios_historico.columns = ['id_user', 'id_item', 'valoracion']
    
    items_names = pd.read_csv("items.csv", sep=";", header=None, encoding="latin-1")
    items_names.columns = ['id_item', 'name_item', 'visitas']
    
    preferencias = pd.read_csv("usuarios_preferencias.csv", sep=";", header=None)
    preferencias.columns = ['id_user', 'id_preferencia', 'score']
    
    padres = pd.read_csv("preferencias.csv")
    if 'id' not in padres.columns:
        padres.rename(columns={padres.columns[0]: 'id', padres.columns[1]: 'name'}, inplace=True)
    
    items_clasificacion = pd.read_csv("clasificacion_items.csv", sep=";", header=None)
    items_clasificacion.columns = ['id_item', 'id_preferencia', 'score']
    
    return usuarios_historico, items_names, preferencias, padres, items_clasificacion

# =============================================================================
# RECOMENDACI√ìN COLABORATIVA
# =============================================================================
def compute_user_similarity(user_item_matrix, min_intersection=3):
    """
    Calcula la matriz de similitud entre usuarios usando el coeficiente de Pearson.
    Si la cantidad de √≠tems en com√∫n es menor que min_intersection, se usa la uni√≥n
    (rellenando con 0 los faltantes).
    """
    users = user_item_matrix.index.tolist()
    n_users = len(users)
    sim_matrix = pd.DataFrame(index=users, columns=users, dtype=float)
    
    for i in range(n_users):
        for j in range(i, n_users):
            u1 = users[i]
            u2 = users[j]
            v1 = user_item_matrix.loc[u1]
            v2 = user_item_matrix.loc[u2]
            inter_mask = v1.notna() & v2.notna()
            
            if inter_mask.sum() >= min_intersection:
                try:
                    sim = pearsonr(v1[inter_mask], v2[inter_mask])[0]
                except Exception:
                    sim = np.nan
            else:
                union_mask = v1.notna() | v2.notna()
                v1_union = v1[union_mask].fillna(0)
                v2_union = v2[union_mask].fillna(0)
                try:
                    sim = pearsonr(v1_union, v2_union)[0]
                except Exception:
                    sim = np.nan
            
            sim_matrix.loc[u1, u2] = sim
            sim_matrix.loc[u2, u1] = sim
    return sim_matrix

def get_collaborative_recommendations(user_item_matrix, sim_matrix, target_user, n_neighbors=20, min_threshold=0.7):
    """
    Obtiene recomendaciones colaborativas:
      - Se buscan los vecinos (usuarios) similares que cumplan un umbral m√≠nimo.
      - Se acumulan scores para cada √≠tem (excluyendo los ya valorados por el usuario).
      - Se devuelve un diccionario {id_item: score} y la cantidad total de vecinos utilizados.
    """
    neighbors = sim_matrix.loc[target_user].drop(target_user).dropna()
    valid_neighbors = neighbors[neighbors >= min_threshold].sort_values(ascending=False)
    top_neighbors = valid_neighbors.iloc[:n_neighbors]
    
    target_items = user_item_matrix.loc[target_user].dropna().index.tolist()
    rec_scores = {}
    rec_weights = {}
    neighbor_count = {}
    
    for neighbor, sim in top_neighbors.iteritems():
        neighbor_ratings = user_item_matrix.loc[neighbor]
        # Umbral din√°mico: promedio de ratings del vecino
        dynamic_threshold = neighbor_ratings.mean()
        favorable_items = neighbor_ratings[neighbor_ratings > dynamic_threshold].dropna()
        
        for item, rating in favorable_items.items():
            if item in target_items:
                continue
            rec_scores[item] = rec_scores.get(item, 0) + sim * rating
            rec_weights[item] = rec_weights.get(item, 0) + abs(sim)
            neighbor_count[item] = neighbor_count.get(item, 0) + 1
            
    # Definir una escala para normalizar (score m√≠nimo y m√°ximo esperado)
    min_ratio = 1
    max_ratio = 7 + np.log(n_neighbors + 1)
    final_scores = {item: ((rec_scores[item] / rec_weights[item]) + np.log(1 + neighbor_count[item]) - min_ratio) / (max_ratio - min_ratio)
                    for item in rec_scores if rec_weights[item] != 0}
    
    total_neighbors = len(top_neighbors)
    return final_scores, len(final_scores)*(total_neighbors/n_neighbors)

# =============================================================================
# RECOMENDACI√ìN BASADA EN CONTENIDO (CON SCORE DIN√ÅMICO)
# =============================================================================
def get_content_recommendations(usuarios_historico, items_names, preferencias, padres, items_clasificacion,
                                target_user, N=10, alpha=0.33, beta=0.33, gamma=0.34, dynamic_threshold_factor=0.9):
    """
    Genera recomendaciones basadas en contenido mediante un enfoque aditivo.
    Se construyen las matrices de preferencias y de √≠tems, se calcula la similitud
    y se combina la similitud con el historial y la popularidad (visitas).

    Ahora, en lugar de devolver siempre N recomendaciones, se aplica un umbral din√°mico
    (por defecto el percentil 80) sobre el score final; si se obtienen pocas, se usa
    el fallback a las N mejores.
    
    Retorna un diccionario {id_item: score} y el n√∫mero de √≠tems recomendados.
    """
    # Matriz de preferencias
    matriz_preferencias = preferencias.pivot(index='id_user', columns='id_preferencia', values='score')
    columnas_deseadas = list(range(1, 116))
    matriz_preferencias = matriz_preferencias.reindex(columns=columnas_deseadas, fill_value=0).fillna(0)
    matriz_preferencias = matriz_preferencias.T  # filas: id_preferencia, columnas: id_user
    matriz_preferencias['padre'] = padres.set_index('id').loc[matriz_preferencias.index, 'name']
    
    # Filtrado: conservar el top porcentaje de puntuaciones en cada categor√≠a
    def filtrar_top_porcentaje(df, porcentaje=0.2):
        df_filtrado = df.copy()
        columnas_usuario = [col for col in df.columns if col != 'padre']
        for padre, grupo in df.groupby('padre'):
            for col in columnas_usuario:
                conteo = (grupo[col] > 0).sum()
                n_items = max(1, int(conteo * porcentaje))
                if n_items > 0:
                    indices_top = grupo[col].nlargest(n_items).index
                    indices_no_top = grupo.index.difference(indices_top)
                    df_filtrado.loc[indices_no_top, col] = 0
        return df_filtrado
    
    matriz_filtrada = filtrar_top_porcentaje(matriz_preferencias, porcentaje=0.2)
    
    # Matriz de √≠tems a partir de clasificacion_items
    items_group = items_clasificacion.groupby(['id_item', 'id_preferencia'], as_index=False).mean()
    matriz_items = items_group.pivot(index='id_item', columns='id_preferencia', values='score')
    matriz_items = matriz_items.reindex(columns=columnas_deseadas, fill_value=0).fillna(0)
    
    # Similitud entre √≠tems y preferencias filtradas
    matriz_filtradaT = matriz_filtrada.drop(columns=['padre']).T
    similitud_items = cosine_similarity(matriz_items.values, matriz_filtradaT.values)
    matriz_similitud_items_df = pd.DataFrame(similitud_items, index=matriz_items.index,
                                             columns=matriz_filtradaT.index).T
    
    # Hist√≥rico de interacciones y popularidad
    interacciones = pd.merge(usuarios_historico, items_names[['id_item', 'visitas']],
                             on='id_item', how='left')
    interacciones['visitas'] = interacciones['visitas'].fillna(0)
    interacciones['weighted_rating'] = interacciones.apply(
        lambda row: row['valoracion'] if row['valoracion'] >= 4 else -row['valoracion'], axis=1)
    
    matriz_interacciones = interacciones.pivot(index='id_user', columns='id_item', values='weighted_rating').fillna(0)
    
    # Vector del usuario por categor√≠a
    items_clasificacion_cp = items_clasificacion.copy()
    items_clasificacion_cp.columns = ['id_item', 'id_padre', 'score']
    interacciones_con_padre = pd.merge(interacciones, items_clasificacion_cp[['id_item', 'id_padre', 'score']],
                                       on='id_item', how='left')
    interacciones_con_padre['total_score'] = interacciones_con_padre.groupby('id_item')['score'].transform('sum')
    interacciones_con_padre['weighted_rating_final'] = interacciones_con_padre['weighted_rating'] * \
        (interacciones_con_padre['score'] / interacciones_con_padre['total_score'])
    agrupado = interacciones_con_padre.groupby(['id_user', 'id_padre'])['weighted_rating_final'].sum().reset_index()
    vector_por_usuario = agrupado.pivot(index='id_user', columns='id_padre', values='weighted_rating_final').fillna(0)
    vector_por_usuario = vector_por_usuario.reindex(columns=columnas_deseadas, fill_value=0)
    
    matriz_similitud_interacciones = cosine_similarity(vector_por_usuario, matriz_items)
    df_similitud_interacciones = pd.DataFrame(matriz_similitud_interacciones,
                                              index=vector_por_usuario.index,
                                              columns=matriz_items.index)
    # Transponer para tener items en el √≠ndice
    matriz_similitud_items_df = matriz_similitud_items_df.T

    if target_user not in matriz_similitud_items_df.columns or target_user not in df_similitud_interacciones.index:
        return {}, 0
    score_pref = matriz_similitud_items_df[target_user]
    score_hist = df_similitud_interacciones.loc[target_user]
    score_vis = items_names.set_index('id_item')['visitas'].apply(lambda x: np.log(1 + x))
    
    common_items = score_pref.index.intersection(score_hist.index).intersection(score_vis.index)
    score_pref = score_pref.loc[common_items]
    score_hist = score_hist.loc[common_items]
    score_vis = score_vis.loc[common_items]
    
    # Normalizaci√≥n de cada score
    def normalize_series(s):
        if s.max() == s.min():
            return s
        return (s - s.min()) / (s.max() - s.min())
    
    score_pref_norm = normalize_series(score_pref)
    score_hist_norm = normalize_series(score_hist)
    score_vis_norm = normalize_series(score_vis)
    
    final_score = alpha * score_pref_norm + beta * score_hist_norm + gamma * score_vis_norm
    
    # Excluir √≠tems ya visitados
    historial = usuarios_historico.groupby('id_user')['id_item'].apply(list).to_dict()
    items_visitados = historial.get(target_user, [])
    final_score = final_score.drop(labels=items_visitados, errors='ignore')
    
    # Aplicar un umbral din√°mico basado en el percentil 80
    dynamic_threshold = final_score.quantile(dynamic_threshold_factor)
    dynamic_recs = final_score[final_score >= dynamic_threshold].sort_values(ascending=False)
    
    # Si se obtienen pocas recomendaciones, se hace fallback a las top N
    if len(dynamic_recs) < N:
        dynamic_recs = final_score.nlargest(N)
    
    num_recs = len(dynamic_recs)
    return dynamic_recs.to_dict(), num_recs

# =============================================================================
# RECOMENDACI√ìN DEMOGR√ÅFICA (STUB)
# =============================================================================
def get_demographic_recommendations(target_user, N=10):
    """
    Funci√≥n stub para recomendaci√≥n demogr√°fica. Aqu√≠ se deber√≠a implementar la l√≥gica
    basada en informaci√≥n demogr√°fica. Por el momento se retorna un diccionario vac√≠o.
    """
    demo_recs = {}  # Ejemplo: {id_item: score}
    return demo_recs, len(demo_recs)

# =============================================================================
# C√ÅLCULO DE PESOS DIN√ÅMICOS
# =============================================================================
def compute_dynamic_weights(collab_neighbors, content_count, demo_count, base_weights):
    """
    Ajusta los pesos base seg√∫n la disponibilidad de informaci√≥n.
    Si el usuario tiene pocos vecinos similares, se reduce el peso colaborativo,
    y se compensan con los otros sistemas.
    """
    expected_neighbors = collab_neighbors+content_count
    factor_collab = min(1, collab_neighbors / expected_neighbors) if expected_neighbors > 0 else 1
    
    expected_content = collab_neighbors+content_count
    factor_content = min(1, content_count / expected_content) if expected_content > 0 else 1
    
    expected_demo = 5  # Umbral esperado para el demogr√°fico
    factor_demo = min(1, demo_count / expected_demo) if expected_demo > 0 else 1
    
    dynamic_weights = {
        'collaborative': base_weights.get('collaborative', 0) * factor_collab,
        'content': base_weights.get('content', 0) * factor_content,
        'demographic': base_weights.get('demographic', 0) * factor_demo
    }
    total = sum(dynamic_weights.values())
    if total > 0:
        for key in dynamic_weights:
            dynamic_weights[key] /= total
    else:
        dynamic_weights = {key: 1/3 for key in ['collaborative', 'content', 'demographic']}
    print("Pesos din√°micos:", dynamic_weights)
    return dynamic_weights

# =============================================================================
# FUNCI√ìN H√çBRIDA PARA COMBINAR RECOMENDACIONES
# =============================================================================
def hybrid_recommender(target_user, 
                       user_item_matrix, sim_matrix, 
                       usuarios_historico, items_names, 
                       preferencias, padres, items_clasificacion,
                       base_weights={'collaborative': 0.33, 'content': 0.33, 'demographic': 0.34},
                       bonus_factor=0.1,
                       top_n=10):
    """
    Combina de forma h√≠brida (y de manera din√°mica) m√∫ltiples sistemas recomendadores:
      - Colaborativo
      - Basado en contenido
      - Demogr√°fico (stub)
    
    Se ajustan los pesos din√°micamente seg√∫n la informaci√≥n disponible
    (por ejemplo, si el usuario tiene pocos vecinos similares, se le da menos peso al colaborativo).
    
    Retorna una lista de tuplas (id_item, score_h√≠brido) ordenadas de mayor a menor.
    """
    # Obtener recomendaciones de cada sistema
    rec_collab, collab_count = get_collaborative_recommendations(user_item_matrix, sim_matrix, target_user)
    rec_content, content_count = get_content_recommendations(usuarios_historico, items_names,
                                                             preferencias, padres, items_clasificacion,
                                                             target_user, N=top_n)
    
    print(rec_collab, rec_content)
    rec_demo, demo_count = get_demographic_recommendations(target_user, N=top_n)
    # Calcular pesos din√°micos
    dynamic_w = compute_dynamic_weights(collab_count, content_count, demo_count, base_weights)
    print("Vecinos colaborativos:", collab_count, "Recs contenido:", content_count, "Recs demogr√°ficas:", demo_count)
    
    # Combinar scores de todos los sistemas
    all_items = set(rec_collab.keys()) | set(rec_content.keys()) | set(rec_demo.keys())
    hybrid_scores = {}
    for item in all_items:
        score = 0
        sources = 0
        if item in rec_collab:
            score += dynamic_w['collaborative'] * rec_collab[item]
            sources += 1
        if item in rec_content:
            score += dynamic_w['content'] * rec_content[item]
            sources += 1
        if item in rec_demo:
            score += dynamic_w['demographic'] * rec_demo[item]
            sources += 1
        hybrid_scores[item] = score
    
    # Ordenar y retornar las top_n recomendaciones
    hybrid_sorted = sorted(hybrid_scores.items(), key=lambda x: x[1], reverse=True)
    return hybrid_sorted[:top_n]

# =============================================================================
# BLOQUE PRINCIPAL
# =============================================================================
def main():
    # Cargar todos los datos
    usuarios_historico, items_names, preferencias, padres, items_clasificacion = load_data()
    
    # Crear la matriz usuario-√≠tem para el colaborativo
    user_item_matrix = usuarios_historico.pivot_table(index='id_user', columns='id_item', values='valoracion')
    all_items = items_names['id_item'].unique()
    user_item_matrix = user_item_matrix.reindex(columns=all_items)
    
    # Calcular la matriz de similitud entre usuarios
    sim_matrix = compute_user_similarity(user_item_matrix)
    
    try:
        target_user = int(input("Ingrese su ID de usuario: ").strip())
    except Exception as e:
        print("ID inv√°lido:", e)
        return
    
    # Ejecutar el recomendador h√≠brido
    recommendations = hybrid_recommender(target_user, user_item_matrix, sim_matrix,
                                         usuarios_historico, items_names,
                                         preferencias, padres, items_clasificacion,
                                         base_weights={'collaborative': 0.33,
                                                       'content': 0.33,
                                                       'demographic': 0.33},
                                         bonus_factor=0.1,
                                         top_n=10)
    
    print(f"\nRecomendaciones h√≠bridas para el usuario {target_user}:")
    for item, score in recommendations:
        # Se obtiene el nombre del √≠tem a partir de su id
        item_name = items_names.loc[items_names['id_item'] == item, 'name_item'].values[0]
        print(f"{item_name} (Score: {score:.4f})")
    
if __name__ == "__main__":
    main()


FileNotFoundError: [Errno 2] No such file or directory: 'puntuaciones_usuario_base.csv'

In [26]:
def aggregate_recommendations_by_type(user_recs_dict, group_seen_items, top_n=10):
    combined = {'collaborative': {}, 'content': {}, 'demographic': {}}
    user_ids = list(user_recs_dict.keys())

    for rec_type in combined.keys():
        all_scores = {}

        # Recolectar scores por √≠tem
        for uid in user_ids:
            recs = user_recs_dict[uid].get(rec_type, [])
            recommended_items = dict(recs)

            for item in recommended_items:
                if item in group_seen_items:
                    continue
                if item not in all_scores:
                    all_scores[item] = {}
                all_scores[item][uid] = recommended_items[item]

        # Calcular media (llenando ceros donde falte)
        scores_combined = {}
        print(all_scores)
        for item, user_scores in all_scores.items():
            full_scores = [user_scores.get(uid, 0.0) for uid in user_ids]
            scores_combined[item] = np.mean(full_scores)
        top_items = sorted(scores_combined.items(), key=lambda x: x[1], reverse=True)[:top_n]
        combined[rec_type] = dict(top_items)
        print(combined)
    return combined



In [27]:
def group_hybrid_recommender_with_aggregation(user_ids,
                                              user_item_matrix, sim_matrix,
                                              usuarios_historico, items_names,
                                              preferencias, padres, items_clasificacion,
                                              base_weights={'collaborative': 0.33, 'content': 0.33, 'demographic': 0.34},
                                              bonus_factor=0.1,
                                              top_n=50):
    """
    Recomendaci√≥n grupal avanzada con agregaci√≥n por tipo y fusi√≥n h√≠brida.
    """
    user_recs_by_type = {}

    for uid in user_ids:
        # Individual recommendations
        rec_collab, _ = get_collaborative_recommendations(user_item_matrix, sim_matrix, uid)
        rec_content, _ = get_content_recommendations(usuarios_historico, items_names,
                                                     preferencias, padres, items_clasificacion,
                                                     uid, N=top_n * 2)
        rec_demo, _ = get_demographic_recommendations(uid, N=top_n * 2)

        # Convert to list of tuples and truncate
        user_recs_by_type[uid] = {
            'collaborative': sorted(rec_collab.items(), key=lambda x: x[1], reverse=True)[:top_n * 2],
            'content': sorted(rec_content.items(), key=lambda x: x[1], reverse=True)[:top_n * 2],
            'demographic': sorted(rec_demo.items(), key=lambda x: x[1], reverse=True)[:top_n * 2]
        }

        
        
    # Combinar cada tipo de recomendador
    group_seen_items = get_group_seen_items(usuarios_historico, user_ids)

    aggregated_by_type = aggregate_recommendations_by_type(user_recs_by_type,
                                                           group_seen_items,
                                                           top_n=top_n)

    # Fusi√≥n h√≠brida final
    hybrid_scores = {}
    for rec_type, item_scores in aggregated_by_type.items():
        weight = base_weights.get(rec_type, 0.33)
        for item, score in item_scores.items():
            hybrid_scores[item] = hybrid_scores.get(item, 0) + weight * score

    # Top-N h√≠brido
    hybrid_top = sorted(hybrid_scores.items(), key=lambda x: x[1], reverse=True)[:top_n]

    return hybrid_top


def get_group_seen_items(usuarios_historico, user_ids):
    """
    Retorna el conjunto de √≠tems ya vistos por cualquier usuario del grupo.
    """
    vistos = usuarios_historico[usuarios_historico['id_user'].isin(user_ids)]
    return set(vistos['id_item'].unique())



def main():
    usuarios_historico, items_names, preferencias, padres, items_clasificacion = load_data()

    user_item_matrix = usuarios_historico.pivot_table(index='id_user', columns='id_item', values='valoracion')
    all_items = items_names['id_item'].unique()
    user_item_matrix = user_item_matrix.reindex(columns=all_items)

    sim_matrix = compute_user_similarity(user_item_matrix)

    try:
        ids = input("Ingrese los IDs de usuario separados por coma: ").strip()
        user_ids = [int(uid.strip()) for uid in ids.split(',') if uid.strip().isdigit()]
    except Exception as e:
        print("Error con los IDs:", e)
        return

    if not user_ids:
        print("No se ingresaron IDs v√°lidos.")
        return

    
     
    final_recs = group_hybrid_recommender_with_aggregation(user_ids,
                                                           user_item_matrix, sim_matrix,
                                                           usuarios_historico, items_names,
                                                           preferencias, padres, items_clasificacion,
                                                           base_weights={'collaborative': 0.33,
                                                                         'content': 0.33,
                                                                         'demographic': 0.34},
                                                           bonus_factor=0.1,
                                                           top_n=50)

    print(f"\nüéØ Recomendaciones GRUPALES para usuarios {user_ids}:")
    for item, score in final_recs:
        item_name = items_names.loc[items_names['id_item'] == item, 'name_item'].values[0]
        print(f"‚úÖ {item_name} (Score: {score:.4f})")

main()



Ingrese los IDs de usuario separados por coma: 123,170
{17415: {123: 0.7060952532422895, 170: 0.2977655480545009}, 17536: {123: 0.6294580194543645}, 17374: {123: 0.6294580194543645}, 17402: {123: 0.6294580194543645}, 17403: {123: 0.6294580194543645}, 17429: {123: 0.6294580194543645}, 17502: {123: 0.6294580194543645}, 17393: {123: 0.6294580194543644}, 17431: {123: 0.6294580194543644}, 17550: {123: 0.6294580194543644}, 17381: {123: 0.6294580194543644}, 17410: {123: 0.6294580194543644}, 17418: {123: 0.6294580194543644}, 17422: {123: 0.6294580194543644}, 17436: {123: 0.6294580194543644}, 17444: {123: 0.6294580194543644}, 17516: {123: 0.6294580194543644}, 17540: {123: 0.6294580194543644}, 17371: {123: 0.6294580194543644}, 17434: {123: 0.6294580194543644}, 17547: {123: 0.6294580194543644}, 17369: {123: 0.620825699264695}, 17513: {123: 0.6194899964270894}, 17427: {123: 0.6103466231944787}, 17545: {123: 0.5637237702460132}, 17500: {123: 0.5637237702460132}, 17483: {123: 0.5188938623210766}, 17