<a href="https://colab.research.google.com/github/01star01ek/01star01ek/blob/main/%EC%9D%BC%EB%8B%A8%EC%B5%9C%EC%A2%85%EC%BD%94%EB%93%9C.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

### 데이터 로딩

In [99]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split

def load_data():
    train_path = '/content/u.base'
    test_path = '/content/u.test'

    # Load training and testing data
    train_data = pd.read_csv(train_path)
    test_data = pd.read_csv(test_path)

    # Determine the number of users and movies
    num_users = max(train_data['user_id'].max(), test_data['user_id'].max())
    num_movies = max(train_data['item_id'].max(), test_data['item_id'].max())

    # Convert to zero-based index
    train_data[['user_id', 'item_id']] -= 1
    test_data[['user_id', 'item_id']] -= 1
    test_data[['rating']] = 1 # NaN을 모두 1로 바꿔줌

    train, valid = train_test_split(train_data, test_size=0.1, random_state = 1234)

    # Create matrices
    train_ratings_matrix = np.zeros((num_users, num_movies))
    valid_ratings_matrix = np.zeros((num_users, num_movies))


    for row in train.itertuples():
        train_ratings_matrix[row.user_id, row.item_id] = (row.rating - 1) / 4.0
    for row in valid.itertuples():
        valid_ratings_matrix[row.user_id, row.item_id] = (row.rating - 1) / 4.0

    return num_users, num_movies, train_ratings_matrix, valid_ratings_matrix, test_data


### validation 평가

In [100]:
# validation 평가
def eval(model, train_data, valid_data):
    pred_u_score = model.predict(train_data)
    target_item = np.where(valid_data > 0)

    # 예측값을 원래 스케일로 복원 (0-1 -> 1-5)
    pred_original_scale = pred_u_score * 4.0 + 1.0
    valid_original_scale = valid_data * 4.0 + 1.0

    # RMSE 계산
    rmse = np.sqrt(mean_squared_error(valid_original_scale[target_item], pred_original_scale[target_item]))
    return rmse

### 모델

In [101]:
import numpy as np


class Relu:
    def __init__(self):
        self.mask = None

    def forward(self, x):
        self.mask = (x <= 0)
        out = x.copy()
        out[self.mask] = 0

        return out

    def backward(self, dout):
        dout[self.mask] = 0
        dx = dout

        return dx


def sigmoid(x):
    return 1 / (1 + np.exp(-x))

class Sigmoid:
    def __init__(self):
        self.out = None

    def forward(self, x):
        out = sigmoid(x)
        self.out = out
        return out

    def backward(self, dout):
        dx = dout * (1.0 - self.out) * self.out

        return dx


class Affine:
    def __init__(self, W, b):
        self.W = W
        self.b = b

        self.x = None
        self.original_x_shape = None
        # 가중치와 편향 매개변수의 미분
        self.dW = None
        self.db = None

    def forward(self, x):
        # 텐서 대응
        self.original_x_shape = x.shape
        x = x.reshape(x.shape[0], -1)
        self.x = x

        out = np.dot(self.x, self.W) + self.b

        return out

    def backward(self, dout):
        dx = np.dot(dout, self.W.T)
        self.dW = np.dot(self.x.T, dout)
        self.db = np.sum(dout, axis=0)

        dx = dx.reshape(*self.original_x_shape)  # 입력 데이터 모양 변경(텐서 대응)
        return dx



class MSELoss:
    def __init__(self, weight_decay_lambda=0.005):
        self.loss = None
        self.y = None
        self.t = None
        self.t_mask = None
        self.weight_decay_lambda = weight_decay_lambda

    def forward(self, x, t, params=None):
        self.t = t
        self.y = x
        self.loss = (self.y - self.t)**2
        loss = np.mean(self.loss) * 0.5

        if params is not None and self.weight_decay_lambda > 0:
            weight_decay = 0
            for idx in range(1, len(params)//2 + 1):
                weight_key = 'W' + str(idx)
                if weight_key in params:
                    W = params[weight_key]
                    weight_decay += 0.5 * self.weight_decay_lambda * np.sum(W**2)
            loss += weight_decay

        return loss

    def backward(self, dout=1):
        dx = ( self.y - self.t) / len(self.y[self.t_mask])
        return dx


In [102]:
import numpy as np
from collections import OrderedDict

class MultiLayerNet:
    def __init__(self, input_size, hidden_size_list, output_size):
        self.input_size = input_size
        self.output_size = output_size
        self.hidden_size_list = hidden_size_list
        self.hidden_layer_num = len(hidden_size_list)
        self.params = {}

        # 가중치 초기화
        self.__init_weight()

        # 계층 생성
        self.layers = OrderedDict()
        for idx in range(1, self.hidden_layer_num+1):
            self.layers['Affine' + str(idx)] = Affine(self.params['W' + str(idx)],
                                                      self.params['b' + str(idx)])
            self.layers['Activation_function' + str(idx)] =Relu()

        idx = self.hidden_layer_num + 1
        self.layers['Affine' + str(idx)] = Affine(self.params['W' + str(idx)],
            self.params['b' + str(idx)])

        self.last_layer = MSELoss()

    def __init_weight(self):
      layer_sizes = [self.input_size]
      for hidden_size in self.hidden_size_list:
          layer_sizes.append(hidden_size)
      layer_sizes.append(self.output_size)


      for i in range(1, len(layer_sizes)):
          prev_size = layer_sizes[i-1]
          current_size = layer_sizes[i]

          xavier_scale = np.sqrt(2.0 / (prev_size + current_size))

          weight_key = 'W' + str(i)
          self.params[weight_key] = xavier_scale * np.random.randn(prev_size, current_size)

          bias_key = 'b' + str(i)
          self.params[bias_key] = np.zeros(current_size)

    def predict(self, x):
        for layer in self.layers.values():
            x = layer.forward(x)
        return x

    def loss(self, x, t):
        y = self.predict(x)

        return self.last_layer.forward(y, t, self.params)


    def gradient(self, x, t):
        # forward
        self.loss(x, t)

        # backward
        dout = 1
        dout = self.last_layer.backward(dout)

        layers = list(self.layers.values())
        layers.reverse()
        for layer in layers:
            dout = layer.backward(dout)

        # 결과 저장
        grads = {}
        for idx in range(1, self.hidden_layer_num+2):
            grads['W' + str(idx)] = self.layers['Affine' + str(idx)].dW
            grads['b' + str(idx)] = self.layers['Affine' + str(idx)].db

        return grads

### Optimizer

In [103]:
import numpy as np

class Adam:
    def __init__(self, lr=0.001, beta1=0.8, beta2=0.999):
        self.lr = lr
        self.beta1 = beta1
        self.beta2 = beta2
        self.iter = 0
        self.m = None
        self.v = None
        self.epsilon = 1e-8

    def update(self, params, grads):

        if self.m is None or self.v is None:
            self.m, self.v = {}, {}
            for key in params.keys():
                self.m[key] = np.zeros_like(params[key])
                self.v[key] = np.zeros_like(params[key])

        self.iter += 1

        for key in params.keys():
            self.m[key] = self.beta1 * self.m[key] + (1.0 - self.beta1) * grads[key]
            self.v[key] = self.beta2 * self.v[key] + (1.0 - self.beta2) * (grads[key] ** 2)
            m_corrected = self.m[key] / (1.0 - self.beta1 ** self.iter)
            v_corrected = self.v[key] / (1.0 - self.beta2 ** self.iter)
            params[key] -= self.lr * m_corrected / (np.sqrt(v_corrected) + self.epsilon)

In [104]:
# submission 만들기
from sklearn.metrics import mean_squared_error
from pandas import DataFrame
def make_submission(model, train_data, test_data):
    pred_u_score = model.predict(train_data)
    target_item = (test_data['user_id'].to_numpy(), test_data['item_id'].to_numpy())

    pred_original_scale = pred_u_score[target_item] * 4.0 + 1.0
    pred_original_scale = np.clip(pred_original_scale, 1.0, 5.0)

    results = pd.DataFrame()
    results["target_id"] = test_data["target_id"]
    results["rating"] = pred_original_scale
    print(results.head())  # 결과 확인용
    results.to_csv('/content/submission.csv', index=False)


### main

In [105]:
import warnings
import random
import time
import numpy as np
import matplotlib.pyplot as plt

# colab에서 나오는 warning들을 무시
warnings.filterwarnings('ignore')

# 결과 재현을 위해 Seed를 고정
def seed_everything(random_seed):
    np.random.seed(random_seed)
    random.seed(random_seed)

seed = 1
seed_everything(seed)


# 0. 데이터 읽기==========
num_users, num_movies, train_ratings_matrix, valid_ratings_matrix, test_data = load_data()


# 1. 실험용 설정==========
train_size = num_users
max_epochs = 30
batch_size = 60
initial_lr = 0.001
decay_rate = 0.97
patience = 10

optimizers = Adam(lr=initial_lr, beta1=0.9, beta2=0.999)
train_loss = []
Model = MultiLayerNet(
        input_size=num_movies, hidden_size_list=[40, 30],
        output_size=num_movies)


# 2. 훈련 시작==========
best_rmse = float('inf')
best_epoch = 0
best_params = {}
counter = 0

for i in range(max_epochs):
    if i > 10:
      optimizers.lr = initial_lr * (decay_rate ** (i-10))

    shuffled_user_index = np.asarray(range(num_users))
    np.random.shuffle(shuffled_user_index)

    batch_num = int(num_users / batch_size) + 1
    epoch_loss = 0

    for b_idx in range(batch_num):
        batch_idx = shuffled_user_index[b_idx*batch_size : (b_idx+1)*batch_size]
        if len(batch_idx) == 0:
            continue

        x_batch = train_ratings_matrix[batch_idx]

        grads = Model.gradient(x_batch, x_batch)
        optimizers.update(Model.params, grads)

        loss = Model.loss(x_batch, x_batch)
        train_loss.append(loss)
        epoch_loss += loss

    avg_loss = epoch_loss / batch_num if batch_num > 0 else 0
    print("epoch:" + str(i+1) + "  loss:" + str(loss))

    current_rmse = eval(Model, train_ratings_matrix, valid_ratings_matrix)
    print("validation rmse:", current_rmse)

    if current_rmse < best_rmse:
        print(f"성능 good RMSE: {best_rmse:.4f} -> {current_rmse:.4f}")
        best_rmse = current_rmse
        best_epoch = i

        best_params = {}
        for key, val in Model.params.items():
            best_params[key] = val.copy()

        counter = 0
    else:
        counter += 1
        print(f"성능 bad {counter}/{patience}")

    if counter >= patience:
        print(f"Early stopping at epoch {i+1}!")
        print(f"Best epoch was {best_epoch+1} with RMSE {best_rmse:.4f}")
        break

if best_params:
    print(f"최고 성능 모델 복원(에폭 {best_epoch+1})")
    for key in Model.params:
        Model.params[key] = best_params[key]

# 3. 평가==========
rmse = eval(Model, train_ratings_matrix, valid_ratings_matrix)
print( "============================")
print( "=========== rmse ===========")
print(str(rmse))


# # 4. submission만들기==========
make_submission(Model, train_ratings_matrix, test_data)

epoch:1  loss:0.4301287025853614
validation rmse: 2.6540067322295395
성능 good RMSE: inf -> 2.6540
epoch:2  loss:0.4291073300098328
validation rmse: 2.346386606497952
성능 good RMSE: 2.6540 -> 2.3464
epoch:3  loss:0.4263394901455469
validation rmse: 2.317354370036533
성능 good RMSE: 2.3464 -> 2.3174
epoch:4  loss:0.4279028266016043
validation rmse: 2.2887015820932874
성능 good RMSE: 2.3174 -> 2.2887
epoch:5  loss:0.43033170687551947
validation rmse: 2.248699341921355
성능 good RMSE: 2.2887 -> 2.2487
epoch:6  loss:0.43097915638035317
validation rmse: 2.213207256748558
성능 good RMSE: 2.2487 -> 2.2132
epoch:7  loss:0.43334836550122224
validation rmse: 2.1849303784668077
성능 good RMSE: 2.2132 -> 2.1849
epoch:8  loss:0.43460664509341457
validation rmse: 2.167203154319526
성능 good RMSE: 2.1849 -> 2.1672
epoch:9  loss:0.4381294161918005
validation rmse: 2.154807822436993
성능 good RMSE: 2.1672 -> 2.1548
epoch:10  loss:0.44227277355623107
validation rmse: 2.1453873582207614
성능 good RMSE: 2.1548 -> 2.1454
epo