In [1]:
print("helloworld")

helloworld


In [2]:
import numpy as np
import torch
import torch.nn as nn

# 1. 먼저 전체 데이터를 하나의 리스트로 읽기
users, movies, ratings = [], [], []
user_len, movie_len = 0, 0

with open("../data/ratings.dat", "r") as f:
    for line in f:
        u, m, r, t = line.split("::")
        user_len = max(user_len, int(u))
        movie_len = max(movie_len, int(m))
        users.append(int(u))
        movies.append(int(m))
        ratings.append(float(r))

indices = np.arange(len(ratings))
np.random.shuffle(indices)

users = np.array(users)[indices]
movies = np.array(movies)[indices]
ratings = np.array(ratings)[indices]

device = torch.device("mps" if torch.backends.mps.is_available() else "cpu")


users = torch.tensor(users, dtype=torch.long, device="mps")
movies = torch.tensor(movies, dtype=torch.long, device = "mps")
ratings = torch.tensor(ratings, dtype=torch.float, device = "mps")

# 3. 데이터 분할 (예: 60% 학습, 20% 검증, 20% 테스트)
n_samples = len(ratings)
train_size = int(0.6 * n_samples)
valid_size = int(0.2 * n_samples)

# 학습 데이터
users_train = users[:train_size]
movies_train = movies[:train_size]
ratings_train = ratings[:train_size]


# 검증 데이터
users_valid = users[train_size:train_size+valid_size]
movies_valid = movies[train_size:train_size+valid_size]
ratings_valid = ratings[train_size:train_size+valid_size]


# 테스트 데이터
users_test = users[train_size+valid_size:]
movies_test = movies[train_size+valid_size:]
ratings_test = ratings[train_size+valid_size:]






In [4]:
from torch.utils.data import DataLoader, TensorDataset

train_loader = DataLoader(TensorDataset(users_train, movies_train, ratings_train), batch_size=64, shuffle=True)
valid_loader = DataLoader(TensorDataset(users_valid, movies_valid, ratings_valid), batch_size=64, shuffle=False)
test_loader = DataLoader(TensorDataset(users_test, movies_test, ratings_test), batch_size=64, shuffle=False)



# a + b_i + b_u + r_u*r_i

In [32]:
import torch.nn as nn
import torch.optim as optim
import torch

class MF(nn.Module):
    def __init__(self, user_len, movie_len, ratings, rank):
        super(MF, self).__init__()
        self.user_len = user_len
        self.movie_len = movie_len
        self.rank = rank
        self.alpha = ratings.float().mean()
        self.user_bias = nn.Embedding(user_len+1, 1)
        self.item_bias = nn.Embedding(movie_len+1, 1)
        self.user_reps = nn.Embedding(user_len+1, rank)
        self.item_reps = nn.Embedding(movie_len+1, rank)
        
        nn.init.xavier_uniform_(self.user_reps.weight)
        nn.init.xavier_uniform_(self.item_reps.weight)
        nn.init.zeros_(self.user_bias.weight)
        nn.init.zeros_(self.item_bias.weight)

    def forward(self, user,item):
        user_bias = self.user_bias(user).squeeze()
        item_bias = self.item_bias(item).squeeze()
        user_emb = self.user_reps(user)
        item_emb = self.item_reps(item)
        return self.alpha + user_bias + item_bias + torch.sum(user_emb*item_emb,dim=1)
    

    
    


In [37]:
print(user_len)

6040


In [34]:


device = torch.device("mps" if torch.mps.is_available() else "cpu")

rank = 200

epochs = 2

model = MF(user_len, movie_len, ratings, rank).to(device)

optimizer = optim.Adam(model.parameters(), lr=0.001)
criterion = nn.MSELoss()



epoch_train_losses, epoch_val_losses = [], []
for epoch in range(epochs):
    model.train()
    train_losses = []
    for user, movie, rating in train_loader:
        user, movie, rating = user.to(device), movie.to(device), rating.to(device)

        pred = model(user, movie)
        loss = criterion(pred, rating)
        train_losses.append(loss.item())

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    # 검증 루프
    model.eval()
    valid_losses = []
    with torch.no_grad():
        for user, movie, rating in valid_loader:
            user, movie, rating = user.to(device), movie.to(device), rating.to(device)
            pred = model(user, movie)
            loss = criterion(pred, rating)
            valid_losses.append(loss.item())
            

    # 로그 출력
    print(f'Epoch: {epoch+1}, Train Loss: {np.mean(train_losses):.4f}, Val Loss: {np.mean(valid_losses):.4f}')





Epoch: 1, Train Loss: 0.9376, Val Loss: 0.7802
Epoch: 2, Train Loss: 0.5384, Val Loss: 0.8024


In [35]:
model.eval()
with torch.no_grad():
    user = users_test[50].unsqueeze(0)
    movie = movies_test[50].unsqueeze(0)
    ratings = ratings_test[50].unsqueeze(0)
    pred = model(user, movie)
    print(pred)

tensor([4.6763], device='mps:0')


In [36]:
print(ratings)

tensor([5.], device='mps:0')


In [26]:
print(users_test.shape)
print(movies_test.shape)
print(ratings_test.shape)
ratings_test

torch.Size([200043])
torch.Size([200043])
torch.Size([200043])


tensor([5., 4., 3.,  ..., 4., 3., 5.], device='mps:0')

In [26]:
print(f"{users_test[4]}유저가 {movies_test[4]}에 {ratings_test[4]}점을 줬다")

4060유저가 2046에 4.0점을 줬다


In [27]:
model.eval()
with torch.no_grad():
    user = users_test[4].unsqueeze(0)
    movie = movies_test[4].unsqueeze(0)
    ratings = ratings_test[4].unsqueeze(0)
    pred = model(user, movie)
    print(f"{user}유저가 {movie}에 {pred}점을 줄 것이라 예측했다")

tensor([4060], device='mps:0')유저가 tensor([2046], device='mps:0')에 tensor([3.7759], device='mps:0')점을 줄 것이라 예측했다


In [28]:
model.eval()
with torch.no_grad():
    user = users_test[50].unsqueeze(0)
    movie = movies_test[50].unsqueeze(0)
    ratings = ratings_test[50].unsqueeze(0)
    pred = model(user, movie)
    print(pred)
print(ratings)

tensor([4.2033], device='mps:0')
tensor([5.], device='mps:0')


# b_i + b_u + r_u*r_i

In [29]:
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim


# 모델 정의
class MF(nn.Module):
    def __init__(self, user_len, movie_len, rank):
        super(MF, self).__init__()
        self.user_bias = nn.Embedding(user_len+1, 1)
        self.item_bias = nn.Embedding(movie_len+1, 1)
        self.user_reps = nn.Embedding(user_len+1, rank)
        self.item_reps = nn.Embedding(movie_len+1, rank)

    def forward(self, user, item):
        user_bias = self.user_bias(user).squeeze()
        item_bias = self.item_bias(item).squeeze()
        user_emb = self.user_reps(user)
        item_emb = self.item_reps(item)
        return user_bias + item_bias + torch.sum(user_emb * item_emb, dim=1)

# 학습 설정
rank = 100
epochs = 10

model = MF(user_len, movie_len, rank).to(device)
optimizer = optim.Adam(model.parameters(), lr=0.001)
criterion = nn.MSELoss()

# 학습 루프
for epoch in range(epochs):
    model.train()
    train_losses = []
    for user, movie, rating in train_loader:
        user, movie, rating = user.to(device), movie.to(device), rating.to(device)

        pred = model(user, movie)
        loss = criterion(pred, rating)
        train_losses.append(loss.item())

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    # 검증 루프
    model.eval()
    valid_losses = []
    with torch.no_grad():
        for user, movie, rating in valid_loader:
            user, movie, rating = user.to(device), movie.to(device), rating.to(device)
            pred = model(user, movie)
            loss = criterion(pred, rating)
            valid_losses.append(loss.item())

    # 로그 출력
    print(f'Epoch: {epoch+1}, Train Loss: {np.mean(train_losses):.4f}, Val Loss: {np.mean(valid_losses):.4f}')


Epoch: 1, Train Loss: 89.5009, Val Loss: 66.7852
Epoch: 2, Train Loss: 40.1633, Val Loss: 38.9625
Epoch: 3, Train Loss: 17.3637, Val Loss: 24.0509
Epoch: 4, Train Loss: 7.6747, Val Loss: 16.3321
Epoch: 5, Train Loss: 3.7028, Val Loss: 12.2241
Epoch: 6, Train Loss: 2.0342, Val Loss: 9.9081
Epoch: 7, Train Loss: 1.3073, Val Loss: 8.5302
Epoch: 8, Train Loss: 0.9767, Val Loss: 7.6854
Epoch: 9, Train Loss: 0.8157, Val Loss: 7.1312
Epoch: 10, Train Loss: 0.7233, Val Loss: 6.7450


In [30]:
model.eval()
with torch.no_grad():
    user = users_test[3].unsqueeze(0)
    movie = movies_test[3].unsqueeze(0)
    ratings = ratings_test[3].unsqueeze(0)
    pred = model(user, movie)
    print(pred)

tensor([3.4446], device='mps:0')


In [31]:
print(ratings)

tensor([3.], device='mps:0')
