In [2]:
import numpy as np
import torch
import torch.nn as nn

# 1. 먼저 전체 데이터를 하나의 리스트로 읽기
users, movies, ratings = [], [], []
user_len, movie_len = 0, 0

with open("../data/ratings.dat", "r") as f:
    for line in f:
        u, m, r, t = line.split("::")
        user_len = max(user_len, int(u))
        movie_len = max(movie_len, int(m))
        users.append(int(u))
        movies.append(int(m))
        ratings.append(float(r))

indices = np.arange(len(ratings))
np.random.shuffle(indices)

users = np.array(users)[indices]
movies = np.array(movies)[indices]
ratings = np.array(ratings)[indices]

device = torch.device("mps" if torch.backends.mps.is_available() else "cpu")


users = torch.tensor(users, dtype=torch.long, device="mps")
movies = torch.tensor(movies, dtype=torch.long, device = "mps")
ratings = torch.tensor(ratings, dtype=torch.float, device = "mps")

# 3. 데이터 분할 (예: 60% 학습, 20% 검증, 20% 테스트)
n_samples = len(ratings)
train_size = int(0.6 * n_samples)
valid_size = int(0.2 * n_samples)

# 학습 데이터
users_train = users[:train_size]
movies_train = movies[:train_size]
ratings_train = ratings[:train_size]


# 검증 데이터
users_valid = users[train_size:train_size+valid_size]
movies_valid = movies[train_size:train_size+valid_size]
ratings_valid = ratings[train_size:train_size+valid_size]


# 테스트 데이터
users_test = users[train_size+valid_size:]
movies_test = movies[train_size+valid_size:]
ratings_test = ratings[train_size+valid_size:]






In [4]:
class DL_MF(nn.Module):
    def __init__(self, user_len, movie_len,rank):
        super(DL_MF,self).__init__()
        self.user_len = user_len
        self.movie_len = movie_len
        self.rank = rank
        
        self.users_embs = nn.Embedding(user_len+1, rank)
        self.items_embs = nn.Embedding(movie_len+1, rank)
        
        self.fc1 = nn.Linear(rank*2, rank)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(rank, 5)
        self.fc3 = nn.Linear(5, 1)
        
    def forward(self, user_idx, movie_idx):
        user_emb = self.users_embs(user_idx)
        item_emb = self.items_embs(movie_idx)
        user_item_concat_emb = torch.concat([user_emb, item_emb], dim=1)
        x = self.fc1(user_item_concat_emb)
        x = self.relu(x)
        x = self.fc2(x)
        x = self.relu(x)
        x = self.fc3(x)  # 최종 출력 (batch_size, 1)
        
        return x.squeeze()  # (batch_size,)

In [5]:
from torch.utils.data import DataLoader, TensorDataset

train_loader = DataLoader(TensorDataset(users_train, movies_train, ratings_train), batch_size=64, shuffle=True)
valid_loader = DataLoader(TensorDataset(users_valid, movies_valid, ratings_valid), batch_size=64, shuffle=False)
test_loader = DataLoader(TensorDataset(users_test, movies_test, ratings_test), batch_size=64, shuffle=False)

In [6]:
import torch.optim as optim




device = torch.device("mps" if torch.mps.is_available() else "cpu")

epochs = 10
rank = 10
model = DL_MF(user_len,movie_len,rank).to(device)
optimizer = optim.Adam(model.parameters(), lr = 0.001)

criterion = nn.MSELoss()



epoch_train_losses, epoch_val_losses = [], []
for epoch in range(epochs):
    model.train()
    train_losses = []
    for user, movie, rating in train_loader:
        user, movie, rating = user.to(device), movie.to(device), rating.to(device)

        pred = model(user, movie)
        loss = criterion(pred, rating)
        train_losses.append(loss.item())

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    # 검증 루프
    model.eval()
    valid_losses = []
    with torch.no_grad():
        for user, movie, rating in valid_loader:
            user, movie, rating = user.to(device), movie.to(device), rating.to(device)
            pred = model(user, movie)
            loss = criterion(pred, rating)
            valid_losses.append(loss.item())
            

    # 로그 출력
    print(f'Epoch: {epoch+1}, Train Loss: {np.mean(train_losses):.4f}, Val Loss: {np.mean(valid_losses):.4f}')





Epoch: 1, Train Loss: 1.1848, Val Loss: 0.9321
Epoch: 2, Train Loss: 0.8774, Val Loss: 0.8558
Epoch: 3, Train Loss: 0.8353, Val Loss: 0.8438
Epoch: 4, Train Loss: 0.8240, Val Loss: 0.8370
Epoch: 5, Train Loss: 0.8181, Val Loss: 0.8353
Epoch: 6, Train Loss: 0.8143, Val Loss: 0.8360
Epoch: 7, Train Loss: 0.8112, Val Loss: 0.8327
Epoch: 8, Train Loss: 0.8079, Val Loss: 0.8319
Epoch: 9, Train Loss: 0.8047, Val Loss: 0.8297
Epoch: 10, Train Loss: 0.8015, Val Loss: 0.8284


In [7]:
model.eval()
with torch.no_grad():
    user = users_test[50].unsqueeze(0)
    movie = movies_test[50].unsqueeze(0)
    ratings = ratings_test[50].unsqueeze(0)
    pred = model(user, movie)
    print(pred)

tensor(3.7693, device='mps:0')


In [8]:
print(ratings)

tensor([4.], device='mps:0')
