In [None]:
import os
import tensorflow as tf
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from metrics import get_metrics

In [None]:
dataset = "coveo"   # coveo, diginetica, rees46, retailrocket, yoochoose
dataset_path = f"../datasets/{dataset}"

In [None]:
train_path = os.path.join(dataset_path,f"{dataset}_processed_view_train_full.tsv")
test_path = os.path.join(dataset_path,f"{dataset}_processed_view_test.tsv")

In [None]:
def load_and_process_data(path):
    data = pd.read_csv(path, sep='\t')

    user_ids = data['SessionId'].astype('category').cat.codes.values
    item_ids = data['ItemId'].astype('category').cat.codes.values
    data['UserId'] = user_ids
    data['ItemId'] = item_ids

    return data, len(np.unique(user_ids)), len(np.unique(item_ids))


def create_interaction_matrix(data, num_users, num_items):
    interactions = np.zeros((num_users, num_items))
    for row in data.itertuples():
        interactions[row.UserId, row.ItemId] = 1
    return interactions


def knn_model(X_train, y_train, X_test, k=20):
    train_data = tf.constant(X_train, dtype=tf.float32)
    test_data = tf.constant(X_test, dtype=tf.float32)
    train_labels = tf.constant(y_train, dtype=tf.int32)

    distances = tf.norm(train_data[:, tf.newaxis] - test_data[tf.newaxis, :], axis=2)
    _, indices = tf.nn.top_k(-distances, k=k)
    nearest_labels = tf.gather(train_labels, indices)
    predictions = tf.math.reduce_mode(nearest_labels, axis=1)
    
    return predictions.numpy(), X_train, y_train

In [None]:
# Carga y procesa los datos de entrenamiento
train_data, num_users, num_items = load_and_process_data(train_path)

# Crea la matriz de interacciones para los datos de entrenamiento
X_train = create_interaction_matrix(train_data, num_users, num_items)
y_train = X_train.copy()  # Las etiquetas son las mismas interacciones

# Carga y procesa los datos de prueba
test_data, _, _ = load_and_process_data(test_path)

# Crea la matriz de interacciones para los datos de prueba
X_test = create_interaction_matrix(test_data, num_users, num_items)

# Normaliza los datos
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Entrena y evalúa el modelo KNN
predictions, X_train, y_train = knn_model(X_train, y_train, X_test)

# Calcula las métricas
metrics = get_metrics(f"knn_tensorflow_{dataset}", test_data, predictions)