In [278]:
import pandas as pd
from collections import defaultdict

users, movies, timestamp = [], [], []
user_len, movie_len = 0, 0
movie_hist_dict = defaultdict(list)

with open("../../data/ratings.dat", "r") as f:
    for line in f:
        u, m, r, t = line.split("::")
        user_len = max(user_len, int(u))
        movie_len = max(movie_len, int(m))
        movie_hist_dict[u].append([m,t])


for u in movie_hist_dict:
    movie_hist_dict[u].sort(key=lambda x: int(x[1]))
    movie_hist_dict[u] = [int(movie) for movie, _ in movie_hist_dict[u]]





In [279]:
import torch
device = torch.device("mps" if torch.backends.mps.is_available() else "cpu")


In [280]:
import torch
from collections import deque

def build_session_parallel_batches(movie_hist_dict, batch_size, max_steps=20):
    sessions = list(movie_hist_dict.values())
    session_queue = deque(sessions)  # 전체 세션을 큐로 관리

    active_sessions = []
    session_positions = []

    # 초기 active session 채우기
    for _ in range(batch_size):
        if session_queue:
            s = session_queue.popleft()
            active_sessions.append(s)
            session_positions.append(0)

    batch_inputs = []
    batch_targets = []
    reset_masks = []

    for step in range(max_steps):
        input_batch = []
        target_batch = []
        reset_batch = []

        for i in range(batch_size):
            session = active_sessions[i]
            pos = session_positions[i]

            if pos + 1 < len(session):
                input_batch.append(session[pos])
                target_batch.append(session[pos + 1])
                reset_batch.append(0 if pos > 0 else 1)
                session_positions[i] += 1
            else:
                # 세션이 끝났으면 새로운 세션으로 교체
                if session_queue:
                    new_session = session_queue.popleft()
                    active_sessions[i] = new_session
                    session_positions[i] = 0

                    input_batch.append(new_session[0])
                    if len(new_session) > 1:
                        target_batch.append(new_session[1])
                    else:
                        target_batch.append(-1)  # 단일 아이템 세션
                    reset_batch.append(1)
                    session_positions[i] += 1
                else:
                    # 더 이상 세션이 없다면 padding
                    input_batch.append(0)
                    target_batch.append(-1)
                    reset_batch.append(1)

        batch_inputs.append(input_batch)
        batch_targets.append(target_batch)
        reset_masks.append(reset_batch)

    return (
        torch.tensor(batch_inputs),      # (timesteps, batch_size)
        torch.tensor(batch_targets),
        torch.tensor(reset_masks)
    )


In [281]:
max_steps = max(len(session) for session in movie_hist_dict.values()) - 1

batch_inputs, batch_targets, reset_masks = build_session_parallel_batches(movie_hist_dict, batch_size=16, max_steps=max_steps)

print("🟢 Inputs:\n", batch_inputs)
print("🎯 Targets:\n", batch_targets)
print("🔄 Reset Masks:\n", reset_masks)


🟢 Inputs:
 tensor([[3186, 1198,  593,  ..., 2243,  748, 2643],
        [1270, 1210, 2858,  ..., 1968, 1198,  266],
        [1721, 1217, 3534,  ..., 2976, 3421, 1269],
        ...,
        [ 317, 2092,  150,  ...,  199, 3061, 1100],
        [2872, 2371,  162,  ..., 2971, 2941, 3667],
        [ 653,  587,  457,  ..., 1035, 1288, 1431]])
🎯 Targets:
 tensor([[1270, 1210, 2858,  ..., 1968, 1198,  266],
        [1721, 1217, 3534,  ..., 2976, 3421, 1269],
        [1022, 2717, 1968,  ..., 3033, 1968, 2555],
        ...,
        [2872, 2083,  162,  ..., 2971, 2941, 3667],
        [ 653,  587,  457,  ..., 1035, 1288, 1431],
        [   2, 3004, 2336,  ..., 1081,  900, 1544]])
🔄 Reset Masks:
 tensor([[1, 1, 1,  ..., 1, 1, 1],
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        ...,
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 1, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0]])


In [282]:
import torch
import torch.nn as nn

class Gru4Rec(nn.Module):
    def __init__(self, input_size, hidden_size, output_size, num_layers=2, device="cpu"):
        super(Gru4Rec, self).__init__()
        self.device = device
        self.hidden_size = hidden_size
        self.num_layers = num_layers

        self.embedding = nn.Embedding(input_size, hidden_size).to(device)
        self.gru = nn.GRU(hidden_size, hidden_size, num_layers, batch_first=True).to(device)
        self.linear = nn.Linear(hidden_size, output_size).to(device)

    def forward(self, input_seq, hidden=None, reset_mask=None):
        input_seq = input_seq.to(self.device) 
        input_seq = input_seq.unsqueeze(1)

        batch_size = input_seq.size(0)

        if hidden is None:
            hidden = torch.zeros(self.num_layers, batch_size, self.hidden_size, device=self.device)

        if reset_mask is not None:
            reset_mask = reset_mask.to(self.device).float().view(1, batch_size, 1)
            zero_hidden = torch.zeros_like(hidden)
            hidden = hidden * (1.0 - reset_mask) + zero_hidden * reset_mask

        embedded = self.embedding(input_seq)
        output, hidden = self.gru(embedded, hidden)
        predictions = self.linear(output[:, -1, :])
        return predictions, hidden



In [284]:
def recall_at_k(predictions, targets, k=10):

    _, top_k_idx = torch.topk(predictions, k, dim=-1, largest=True, sorted=True)

    recall = 0
    for idx in range(predictions.size(0)):
        if targets[idx].item() in top_k_idx[idx]:
            recall += 1
    return recall / predictions.size(0)

# MRR@k 계산 함수
def mrr_at_k(predictions, targets, k=20):
    _, top_k_idx = torch.topk(predictions, k, dim=-1, largest=True, sorted=True)
    mrr = 0
    for idx in range(predictions.size(0)):  
        rank = (top_k_idx[idx] == targets[idx].item()).nonzero()
        if rank.size(0) > 0:
            mrr += 1 / (rank.item() + 1) 
    return mrr / predictions.size(0)


In [298]:
input_size = movie_len + 1  
hidden_size = 128
output_size = input_size
num_layers = 2
learning_rate = 0.0001
num_epochs = 60
device = device
r_k = 10
m_k = 20
max_steps= max_steps




In [299]:
import torch.optim as optim

model = Gru4Rec(input_size, hidden_size, output_size, num_layers,device=device).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)


In [300]:
for epoch in range(num_epochs):
    epoch_loss = 0.0  # 에폭당 평균 손실값 계산을 위한 변수
    epoch_recall = 0.0  # Recall 평균 계산 변수
    epoch_mrr = 0.0     # MRR 평균 계산 변수

    for step in range(max_steps):
        input_seq = batch_inputs[step].to(device)         # (batch_size,)
        target_seq = batch_targets[step].to(device)       # (batch_size,)
        reset_mask = reset_masks[step].to(device)   # (batch_size,)

        predictions,_ = model(input_seq,reset_mask=reset_mask)

        loss = criterion(predictions, target_seq)

        epoch_loss += loss.item() 

        recall = recall_at_k(predictions, target_seq, r_k)
        epoch_recall += recall
        

        mrr = mrr_at_k(predictions, target_seq, m_k)
        epoch_mrr += mrr

        optimizer.zero_grad()
        loss.backward()

        optimizer.step()
        predictions = torch.argmax(predictions, dim=-1)  # (batch_size,)



            
    # 한 에폭이 끝날 때마다 평균 손실과 Recall@20, MRR@20 출력
    if(epoch%10==0):
      print(f"Epoch {epoch+1} complete. Average Loss: {epoch_loss / max_steps}, "
        f"Recall@10: {epoch_recall / max_steps}, MRR@20: {epoch_mrr / max_steps}")


Epoch 1 complete. Average Loss: 7.768088232527757, Recall@10: 0.022616731517509727, MRR@20: 0.007958387272345525
Epoch 11 complete. Average Loss: 6.58779737457691, Recall@10: 0.12848573281452658, MRR@20: 0.0629947032106301
Epoch 21 complete. Average Loss: 5.714881359319691, Recall@10: 0.2689688715953307, MRR@20: 0.13213215154977456


In [272]:
def predict_next_movie(model, user_history, top_k=10):
    model.eval()
    with torch.no_grad():
        hidden = None
        reset_mask = torch.tensor([1], dtype=torch.float32)  # 첫 입력은 리셋 필요

        for i, movie_id in enumerate(user_history):
            input_seq = torch.tensor([movie_id])  # (batch=1)
            reset = reset_mask if i == 0 else torch.tensor([0], dtype=torch.float32)
            output, hidden = model(input_seq, hidden=hidden, reset_mask=reset)

        # 마지막 output으로부터 top-k 영화 예측
        topk_values, topk_indices = torch.topk(output, top_k)
        return topk_indices.squeeze().tolist()  # 예측된 top-k 영화 ID 리스트


In [273]:
user_history = [1, 5, 100, 4, 20, 50, 30]

# 예측
recommended_movies = predict_next_movie(model, user_history, top_k=5)

print("Top-5 추천 영화 ID:", recommended_movies)


Top-5 추천 영화 ID: [720, 318, 2396, 3753, 2858]
