In [146]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.autograd import Variable
from torch.utils.data import Dataset, DataLoader
from torch.nn.parameter import Parameter

def local_kernel(u, v):
    dist = torch.norm(u - v, p=2, dim=2)
    hat = torch.clamp(1. - dist**2, min=0.)
    return hat

# 데이터 로딩
import numpy as np
users, movies, ratings = [], [], []
user_len, movie_len = 0, 0

with open("../data/ratings.dat", "r") as f:
    for line in f:
        u, m, r, t = line.split("::")
        user_len = max(user_len, int(u))
        movie_len = max(movie_len, int(m))
        users.append(int(u))
        movies.append(int(m))
        ratings.append(float(r))

indices = np.arange(len(ratings))
np.random.shuffle(indices)

users = np.array(users)[indices]
movies = np.array(movies)[indices]
ratings = np.array(ratings)[indices]

device = torch.device("mps" if torch.backends.mps.is_available() else "cpu")

users = torch.tensor(users, dtype=torch.long, device=device)
movies = torch.tensor(movies, dtype=torch.long, device=device)
ratings = torch.tensor(ratings, dtype=torch.float, device=device)

# 데이터 분할
n_samples = len(ratings)
train_size = int(0.6 * n_samples)
valid_size = int(0.2 * n_samples)

users_train = users[:train_size]
movies_train = movies[:train_size]
ratings_train = ratings[:train_size]

users_valid = users[train_size:train_size+valid_size]
movies_valid = movies[train_size:train_size+valid_size]
ratings_valid = ratings[train_size:train_size+valid_size]

users_test = users[train_size+valid_size:]
movies_test = movies[train_size+valid_size:]
ratings_test = ratings[train_size+valid_size:]

In [147]:
def make_rating_matrix(users, movies, ratings, num_users, num_items):
    # 행렬 크기를 정확히 맞춤
    rating_matrix = torch.zeros((num_users, num_items), dtype=torch.float32)
    mask = torch.zeros_like(rating_matrix)

    for u, m, r in zip(users, movies, ratings):
        rating_matrix[u-1, m-1] = r  # 인덱스를 0부터 시작하도록 조정
        mask[u-1, m-1] = 1.0
    return rating_matrix.to(device), mask.to(device)

n_users = user_len
n_items = movie_len

train_r, train_m = make_rating_matrix(users_train, movies_train, ratings_train, n_users, n_items)
valid_r, valid_m = make_rating_matrix(users_valid, movies_valid, ratings_valid, n_users, n_items)
test_r, test_m = make_rating_matrix(users_test, movies_test, ratings_test, n_users, n_items)

In [148]:
class KernelLayer(nn.Module):
    def __init__(self, n_in, n_hid, n_dim, lambda_s, lambda_2, activation=nn.Sigmoid()):
        super().__init__()
        # 입력 차원과 출력 차원을 정확히 맞춤
        self.W = nn.Parameter(torch.randn(n_in, n_hid))
        self.u = nn.Parameter(torch.randn(n_in, 1, n_dim))
        self.v = nn.Parameter(torch.randn(1, n_hid, n_dim))
        self.b = nn.Parameter(torch.randn(n_hid))

        self.lambda_s = lambda_s
        self.lambda_2 = lambda_2

        nn.init.xavier_uniform_(self.W, gain=torch.nn.init.calculate_gain("relu"))
        nn.init.xavier_uniform_(self.u, gain=torch.nn.init.calculate_gain("relu"))
        nn.init.xavier_uniform_(self.v, gain=torch.nn.init.calculate_gain("relu"))
        nn.init.zeros_(self.b)
        self.activation = activation

    def forward(self, x):
        # 입력 차원 확인
        if x.size(1) != self.W.size(0):
            # 차원이 맞지 않으면 조정
            x = x[:, :self.W.size(0)]
        
        w_hat = local_kernel(self.u, self.v)
        sparse_reg = F.mse_loss(w_hat, torch.zeros_like(w_hat))
        sparse_reg_term = self.lambda_s * sparse_reg
        l2_reg = F.mse_loss(self.W, torch.zeros_like(self.W))
        l2_reg_term = self.lambda_2 * l2_reg
        W_eff = self.W * w_hat
        y = torch.matmul(x, W_eff) + self.b
        y = self.activation(y)
        return y, sparse_reg_term + l2_reg_term

class KernelNet(nn.Module):
    def __init__(self, n_u, n_hid, n_dim, n_layers, lambda_s, lambda_2):
        super().__init__()
        layers = []
        for i in range(n_layers):
            if i == 0:
                layers.append(KernelLayer(n_u, n_hid, n_dim, lambda_s, lambda_2))
            else:
                layers.append(KernelLayer(n_hid, n_hid, n_dim, lambda_s, lambda_2))
        # 마지막 레이어의 출력 차원을 입력 차원과 동일하게 설정
        layers.append(KernelLayer(n_hid, n_u, n_dim, lambda_s, lambda_2, activation=nn.Identity()))
        self.layers = nn.ModuleList(layers)
        self.dropout = nn.Dropout(0.5)

    def forward(self, x):
        total_reg = None
        for i, layer in enumerate(self.layers):
            x, reg = layer(x)
            if i < len(self.layers)-1:
                x = self.dropout(x)
            if total_reg is None:
                total_reg = reg
            else:
                total_reg += reg
        return x, total_reg
class CompleteNet(nn.Module):
    def __init__(self, kernel_net, n_u, n_m, n_hid, n_dim, n_layers, lambda_s, lambda_2, gk_size, dot_scale):
        super().__init__()
        self.gk_size = gk_size
        self.dot_scale = dot_scale
        self.local_kernel_net = kernel_net
        # n_m+1 대신 n_m을 사용
        self.global_kernel_net = KernelNet(n_m, n_hid, n_dim, n_layers, lambda_s, lambda_2)
        self.conv_kernel = nn.Parameter(torch.randn(n_m, gk_size**2) * 0.1)
        nn.init.xavier_uniform_(self.conv_kernel, gain=torch.nn.init.calculate_gain("relu"))

    def forward(self, train_r):
        x, _ = self.local_kernel_net(train_r)
        gk = self.global_kernel(x, self.gk_size, self.dot_scale)
        x = self.global_conv(train_r, gk)
        x, global_reg_loss = self.global_kernel_net(x)
        # 출력 차원을 입력 차원과 동일하게 맞춤
        if x.size(1) != train_r.size(1):
            x = x[:, :train_r.size(1)]
        return x, global_reg_loss

    def global_kernel(self, input, gk_size, dot_scale):
        avg_pooling = torch.mean(input, dim=0)
        avg_pooling = avg_pooling.view(1, -1)
        gk = torch.matmul(avg_pooling, self.conv_kernel) * dot_scale
        gk = gk.view(1, 1, gk_size, gk_size)
        return gk

    def global_conv(self, input, W):
        input = input.unsqueeze(0).unsqueeze(0)
        conv2d = nn.LeakyReLU()(F.conv2d(input, W, stride=1, padding=1))
        return conv2d.squeeze(0).squeeze(0)

class Loss(nn.Module):
    def forward(self, pred_p, reg_loss, train_m, train_r):
        # 차원이 맞지 않으면 조정
        if pred_p.size(1) != train_r.size(1):
            pred_p = pred_p[:, :train_r.size(1)]
        if train_m.size(1) != train_r.size(1):
            train_m = train_m[:, :train_r.size(1)]
            
        diff = train_m * (train_r - pred_p)
        sqE = F.mse_loss(diff, torch.zeros_like(diff))
        loss_p = sqE + reg_loss
        return loss_p

In [149]:
# 모델 파라미터 설정
n_hid = 500
n_dim = 5
n_layers = 2
lambda_s = 1e-3
lambda_2 = 1e-4
num_epochs = 50

# Pretrain 모델 초기화
pretrain_model = KernelNet(n_items, n_hid, n_dim, n_layers, lambda_s, lambda_2).to(device)
criterion = Loss()
optimizer_kernel = optim.Adam(pretrain_model.parameters(), lr=1e-3)

# Pretrain 학습
for epoch in range(num_epochs):
    pretrain_model.train()
    pred_train, reg_loss = pretrain_model(train_r)
    loss = criterion(pred_train, reg_loss, train_m, train_r)

    optimizer_kernel.zero_grad()
    loss.backward()
    optimizer_kernel.step()

    # Validation
    pretrain_model.eval()
    with torch.no_grad():
        pred_valid, _ = pretrain_model(valid_r)
        val_loss = criterion(pred_valid, 0, valid_m, valid_r)

    print(f"[Epoch {epoch+1}] Train Loss: {loss.item():.4f} | Valid Loss: {val_loss.item():.4f}")

# CompleteNet 초기화 및 학습
pretrained_model = CompleteNet(pretrain_model, n_u=n_users, n_m=n_items, n_hid=128, n_dim=10, n_layers=3, lambda_s=0.1, lambda_2=0.01, gk_size=4, dot_scale=1.0).to(device)
optimizer_finetune = torch.optim.Adam(pretrained_model.parameters(), lr=1e-3)

# Fine-tuning
for epoch in range(10):
    pretrained_model.train()
    optimizer_finetune.zero_grad()

    pred_ratings, reg_loss = pretrained_model(train_r)
    loss = criterion(pred_ratings, reg_loss, train_m, train_r)
    
    loss.backward()
    optimizer_finetune.step()

    print(f"Epoch [{epoch+1}/10], Loss: {loss.item()}")

[Epoch 1] Train Loss: 0.3620 | Valid Loss: 0.1030
[Epoch 2] Train Loss: 0.3151 | Valid Loss: 0.0884
[Epoch 3] Train Loss: 0.2711 | Valid Loss: 0.0737
[Epoch 4] Train Loss: 0.2270 | Valid Loss: 0.0590
[Epoch 5] Train Loss: 0.1831 | Valid Loss: 0.0450
[Epoch 6] Train Loss: 0.1421 | Valid Loss: 0.0325
[Epoch 7] Train Loss: 0.1057 | Valid Loss: 0.0224
[Epoch 8] Train Loss: 0.0767 | Valid Loss: 0.0155
[Epoch 9] Train Loss: 0.0568 | Valid Loss: 0.0118
[Epoch 10] Train Loss: 0.0463 | Valid Loss: 0.0113
[Epoch 11] Train Loss: 0.0448 | Valid Loss: 0.0127
[Epoch 12] Train Loss: 0.0494 | Valid Loss: 0.0150
[Epoch 13] Train Loss: 0.0552 | Valid Loss: 0.0169
[Epoch 14] Train Loss: 0.0608 | Valid Loss: 0.0179
[Epoch 15] Train Loss: 0.0642 | Valid Loss: 0.0179
[Epoch 16] Train Loss: 0.0635 | Valid Loss: 0.0171
[Epoch 17] Train Loss: 0.0601 | Valid Loss: 0.0156
[Epoch 18] Train Loss: 0.0557 | Valid Loss: 0.0139
[Epoch 19] Train Loss: 0.0510 | Valid Loss: 0.0123
[Epoch 20] Train Loss: 0.0459 | Valid Lo

RuntimeError: mat1 and mat2 shapes cannot be multiplied (6039x3951 and 3952x128)