In [178]:
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, Flatten, Dense
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.losses import CategoricalCrossentropy
from tensorflow.keras.metrics import CategoricalAccuracy
from tensorflow.keras.layers import Dropout
import pandas as pd

In [179]:
user_df = pd.read_json("../data/MOOCCube/entities/user.json",lines=True)

user_df = user_df[["id","course_order"]]

# user_df = user_df.head(5)

data = user_df.to_dict(orient='records')[:1000]
print(data)

[{'id': 'U_7001215', 'course_order': ['C_course-v1:TsinghuaX+00740043_2x_2015_T2+sp', 'C_course-v1:TsinghuaX+30240184+sp', 'C_course-v1:TsinghuaX+00740043X_2015_T2+sp', 'C_course-v1:TsinghuaX+10421094X_2015_2+sp', 'C_course-v1:TsinghuaX+30240184_2X+sp']}, {'id': 'U_10402446', 'course_order': ['C_course-v1:TsinghuaX+00510888X+2019_T1', 'C_course-v1:TsinghuaX+30640014X+sp']}, {'id': 'U_10359065', 'course_order': ['C_course-v1:TsinghuaX+00612642X+sp', 'C_course-v1:TsinghuaX+80000271X+sp']}, {'id': 'U_7423998', 'course_order': ['C_course-v1:TsinghuaX+30240184_2X+sp', 'C_course-v1:SCUT+145033+sp', 'C_course-v1:TsinghuaX+00740043X_2015_T2+sp', 'C_course-v1:TsinghuaX+30240184+sp', 'C_course-v1:TsinghuaX+30640014X+sp', 'C_course-v1:TsinghuaX+00690092X+sp', 'C_course-v1:TsinghuaX+30240243X+sp']}, {'id': 'U_545306', 'course_order': ['C_course-v1:TsinghuaX+20430064_2X+sp', 'C_course-v1:TsinghuaX+02070251X+2019_T1', 'C_course-v1:TsinghuaX+0350161X_2015_T2+sp', 'C_course-v1:TsinghuaX+60240013X+sp',

In [180]:



book_to_id = {book: idx for idx, book in enumerate(set(book for d in data for book in d["course_order"]))}
num_books = len(book_to_id)

user_book_matrix = np.zeros((len(data), num_books), dtype=np.float32)
for i, d in enumerate(data):
    for book in d["course_order"]:
        book_id = book_to_id[book]
        user_book_matrix[i, book_id] = 1.0

model = Sequential([
    Embedding(input_dim=num_books, output_dim=64, input_length=num_books),
    Flatten(),
    Dense(128, activation='relu'),
    Dropout(0.2),
    Dense(num_books, activation='softmax')
])

model.compile(optimizer=Adam(learning_rate=0.001), loss=CategoricalCrossentropy(), metrics=[CategoricalAccuracy()])

num_epochs = 1000
model.fit(user_book_matrix, user_book_matrix, epochs=num_epochs, verbose=1)




Epoch 1/1000
Epoch 2/1000
Epoch 3/1000
Epoch 4/1000
Epoch 5/1000
Epoch 6/1000
Epoch 7/1000
Epoch 8/1000
 6/32 [====>.........................] - ETA: 1s - loss: 13505.3857 - categorical_accuracy: 0.0260

KeyboardInterrupt: 

In [None]:
def recommend_books_for_user(user_id, model, data, book_to_id):

    user_index = None
    for i, d in enumerate(data):
        if d["id"] == user_id:
            user_index = i
            break

    user_book_probabilities = model.predict(user_book_matrix[user_index:user_index+1])[0]

    recommendations = {}
    for book, book_id in book_to_id.items():
        recommendations[book] = user_book_probabilities[book_id]

    user_books = data[user_index]["course_order"]
    for book in user_books:
        recommendations.pop(book, None)

    sorted_recommendations = sorted(recommendations.items(), key=lambda x: x[1], reverse=True)

    return sorted_recommendations


In [None]:
user_id_to_recommend = 'U_10359065'
recommendations = recommend_books_for_user(user_id_to_recommend, model, data, book_to_id)[:10]

print(f"Recomendaciones para el usuario {user_id_to_recommend}:")
recomendations = []
for book, probability in recommendations:
    print(f"Libro: {book}, Probabilidad: {probability:.4f}")
    dic = {"id":book,"probability":probability} 
    recomendations.append(dic)
print(recomendations)
    

Recomendaciones para el usuario U_10359065:
Libro: C_course-v1:TsinghuaX+00740123_X+sp, Probabilidad: 0.1263
Libro: C_course-v1:TsinghuaX+30240184+sp, Probabilidad: 0.1252
Libro: C_course-v1:TsinghuaX+00740043X_2015_T2+sp, Probabilidad: 0.1246
Libro: C_course-v1:TsinghuaX+30240243X+sp, Probabilidad: 0.0753
Libro: C_course-v1:TsinghuaX+20250103X+sp, Probabilidad: 0.0744
Libro: C_course-v1:MITx+6_00_1x+sp, Probabilidad: 0.0725
Libro: C_course-v1:TsinghuaX+20250064+sp, Probabilidad: 0.0370
Libro: C_course-v1:TsinghuaX+30640014X+sp, Probabilidad: 0.0356
Libro: C_course-v1:TsinghuaX+10610224X+sp, Probabilidad: 0.0309
Libro: C_course-v1:MicrosoftX+Microsoft101+sp, Probabilidad: 0.0307
[{'id': 'C_course-v1:TsinghuaX+00740123_X+sp', 'probability': 0.12626937}, {'id': 'C_course-v1:TsinghuaX+30240184+sp', 'probability': 0.12518379}, {'id': 'C_course-v1:TsinghuaX+00740043X_2015_T2+sp', 'probability': 0.12463273}, {'id': 'C_course-v1:TsinghuaX+30240243X+sp', 'probability': 0.075292826}, {'id': 'C_