## Data Download

In [1]:
# 다운로드 받는 코드
import kagglehub
import os
import pandas as pd

# Download latest version
path = kagglehub.dataset_download("andrezaza/clapper-massive-rotten-tomatoes-movies-and-reviews")
path = path.replace('\\', '/')

review_df = pd.read_csv(os.path.join(path, os.listdir(path)[0]))
movie_df = pd.read_csv(os.path.join(path, os.listdir(path)[1]))

print("Path to dataset files:", path)
print(movie_df.shape)
print(review_df.shape)

  from .autonotebook import tqdm as notebook_tqdm


Path to dataset files: /home/shin/.cache/kagglehub/datasets/andrezaza/clapper-massive-rotten-tomatoes-movies-and-reviews/versions/4
(143258, 16)
(1444963, 11)


In [2]:
movie_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 143258 entries, 0 to 143257
Data columns (total 16 columns):
 #   Column                Non-Null Count   Dtype  
---  ------                --------------   -----  
 0   id                    143258 non-null  object 
 1   title                 142891 non-null  object 
 2   audienceScore         73248 non-null   float64
 3   tomatoMeter           33877 non-null   float64
 4   rating                13991 non-null   object 
 5   ratingContents        13991 non-null   object 
 6   releaseDateTheaters   30773 non-null   object 
 7   releaseDateStreaming  79420 non-null   object 
 8   runtimeMinutes        129431 non-null  float64
 9   genre                 132175 non-null  object 
 10  originalLanguage      129400 non-null  object 
 11  director              139041 non-null  object 
 12  writer                90116 non-null   object 
 13  boxOffice             14743 non-null   object 
 14  distributor           23001 non-null   object 
 15  

In [3]:
movie_df["genre"].unique().shape

(2913,)

In [4]:
review_df.head()

Unnamed: 0,id,reviewId,creationDate,criticName,isTopCritic,originalScore,reviewState,publicatioName,reviewText,scoreSentiment,reviewUrl
0,beavers,1145982,2003-05-23,Ivan M. Lincoln,False,3.5/4,fresh,Deseret News (Salt Lake City),Timed to be just long enough for most youngste...,POSITIVE,http://www.deseretnews.com/article/700003233/B...
1,blood_mask,1636744,2007-06-02,The Foywonder,False,1/5,rotten,Dread Central,It doesn't matter if a movie costs 300 million...,NEGATIVE,http://www.dreadcentral.com/index.php?name=Rev...
2,city_hunter_shinjuku_private_eyes,2590987,2019-05-28,Reuben Baron,False,,fresh,CBR,The choreography is so precise and lifelike at...,POSITIVE,https://www.cbr.com/city-hunter-shinjuku-priva...
3,city_hunter_shinjuku_private_eyes,2558908,2019-02-14,Matt Schley,False,2.5/5,rotten,Japan Times,The film's out-of-touch attempts at humor may ...,NEGATIVE,https://www.japantimes.co.jp/culture/2019/02/0...
4,dangerous_men_2015,2504681,2018-08-29,Pat Padua,False,,fresh,DCist,Its clumsy determination is endearing and some...,POSITIVE,http://dcist.com/2015/11/out_of_frame_dangerou...


## Data train & test split

In [5]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split

In [6]:
review_df.drop_duplicates(inplace=True)
review_df.reset_index(drop=True, inplace=True)

In [7]:
review_df["id"].value_counts().quantile(0.9)

np.float64(56.0)

In [8]:
selected_movie_ids = review_df['id'].value_counts()[review_df['id'].value_counts()>=56].index

In [9]:
selected_movie_ids

Index(['joker_2019', 'once_upon_a_time_in_hollywood', 'avengers_endgame',
       'captain_marvel', 'a_star_is_born_2018', 'black_panther_2018',
       'star_wars_the_rise_of_skywalker', 'the_batman', 'dune_2021',
       'avengers_infinity_war',
       ...
       'i_am_trying_to_break_your_heart', 'stolen_summer',
       'friends_the_reunion', 'clara_sola', 'stand_by_me_1986', 'devils_due',
       '1090089-jack_frost', 'cookies_fortune', 'welcome_to_new_york_2015',
       'lux_aeterna'],
      dtype='object', name='id', length=6963)

In [10]:
selected_review_df = review_df[review_df['id'].isin(selected_movie_ids)]

In [11]:
selected_review_df.reset_index(drop=True, inplace=True)

In [12]:
selected_review_df.head()

Unnamed: 0,id,reviewId,creationDate,criticName,isTopCritic,originalScore,reviewState,publicatioName,reviewText,scoreSentiment,reviewUrl
0,the_duff,2763233,2021-02-02,Richard Crouse,False,3/5,fresh,Richard Crouse,A school comedy so predictable the screenwrite...,POSITIVE,http://www.richardcrouse.ca/the-duff-3-stars-a...
1,the_duff,2692661,2020-05-27,Andrew Galdi,False,,fresh,Movie Bitches,It was good acting and good writing.,POSITIVE,https://www.youtube.com/watch?v=8KeoWwUtXVQ&li...
2,the_duff,2679627,2020-03-26,Avaryl Halley,False,,fresh,Movie Bitches,"Oh dear, am I the Duff?",POSITIVE,https://www.youtube.com/watch?v=8KeoWwUtXVQ&li...
3,the_duff,2615809,2019-08-15,Udita Jhunjhunwala,True,,fresh,Livemint,While director Ari Sandel's adaptation is devo...,POSITIVE,https://www.livemint.com/Leisure/3wDs58QVt0Mtm...
4,the_duff,2586271,2019-05-14,Olivia Luder,True,3/5,fresh,One Room With A View,Not clever enough to be genre-defining in its ...,POSITIVE,https://oneroomwithaview.com/2015/03/06/duff-r...


In [13]:
selected_reviewer_names = selected_review_df['criticName'].value_counts()[selected_review_df['criticName'].value_counts()>=4].index

In [14]:
selected_reviewer_names

Index(['Frank Swietek', 'Roger Moore', 'Brian Orndorf', 'Jeffrey M. Anderson',
       'Rich Cline', 'Nell Minow', 'Laura Clifford', 'James Berardinelli',
       'Dennis Schwartz', 'David Nusair',
       ...
       'Rob Mackie', 'Erick Massoto', 'Sarah Raskin', 'Brian Grubb',
       'Sierra Bilton', 'D.W. Mault', 'Russell Holly', 'Adam Bernstein',
       'Florence Epstein', 'Jason Rhode'],
      dtype='object', name='criticName', length=6093)

In [15]:
selected_review_df = selected_review_df[selected_review_df['criticName'].isin(selected_reviewer_names)]
selected_review_df.head()

Unnamed: 0,id,reviewId,creationDate,criticName,isTopCritic,originalScore,reviewState,publicatioName,reviewText,scoreSentiment,reviewUrl
0,the_duff,2763233,2021-02-02,Richard Crouse,False,3/5,fresh,Richard Crouse,A school comedy so predictable the screenwrite...,POSITIVE,http://www.richardcrouse.ca/the-duff-3-stars-a...
1,the_duff,2692661,2020-05-27,Andrew Galdi,False,,fresh,Movie Bitches,It was good acting and good writing.,POSITIVE,https://www.youtube.com/watch?v=8KeoWwUtXVQ&li...
2,the_duff,2679627,2020-03-26,Avaryl Halley,False,,fresh,Movie Bitches,"Oh dear, am I the Duff?",POSITIVE,https://www.youtube.com/watch?v=8KeoWwUtXVQ&li...
3,the_duff,2615809,2019-08-15,Udita Jhunjhunwala,True,,fresh,Livemint,While director Ari Sandel's adaptation is devo...,POSITIVE,https://www.livemint.com/Leisure/3wDs58QVt0Mtm...
4,the_duff,2586271,2019-05-14,Olivia Luder,True,3/5,fresh,One Room With A View,Not clever enough to be genre-defining in its ...,POSITIVE,https://oneroomwithaview.com/2015/03/06/duff-r...


In [16]:
selected_review_df.shape

(917415, 11)

In [17]:
selected_review_df.reset_index(drop=True, inplace=True)

In [18]:
selected_review_df.columns

Index(['id', 'reviewId', 'creationDate', 'criticName', 'isTopCritic',
       'originalScore', 'reviewState', 'publicatioName', 'reviewText',
       'scoreSentiment', 'reviewUrl'],
      dtype='object')

In [19]:
X_train, X_test, Y_train, Y_test = train_test_split(selected_review_df.drop('reviewState', axis=1), selected_review_df['reviewState'], stratify=selected_review_df.criticName, test_size=0.1)
X_train, X_val, Y_train, Y_val = train_test_split(X_train, Y_train, stratify=X_train.criticName, test_size=0.25)

## Modeling

### NeuMF-NCF

#### Dataset

In [20]:
from sklearn.preprocessing import LabelEncoder

user_encoder = LabelEncoder()
item_encoder = LabelEncoder()

user_encoder.fit(X_train["criticName"])
item_encoder.fit(X_train["id"])

In [21]:
import pandas as pd
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from sklearn.preprocessing import LabelEncoder

class ReviewDataset(Dataset):
    def __init__(self, X, Y, user_encoder, item_encoder):
        self.user = torch.LongTensor(user_encoder.transform(X["criticName"]))
        self.item = torch.LongTensor(item_encoder.transform(X["id"]))
        
        self.Y = torch.FloatTensor([1 if y == "fresh" else 0 for y in Y.to_list()])
        
    def __len__(self):
        return len(self.Y)
    
    def __getitem__(self, idx):
        return self.user[idx], self.item[idx], self.Y[idx]

#### Model

In [20]:
import torch
import torch.nn as nn
import torch.nn.functional as F 
 
 
class NCF(nn.Module):
    def __init__(self, user_num, item_num, embedding_dim=32, hidden_units=[256, 128, 64]):
        super(NCF, self).__init__()
        self.embed_user = nn.Embedding(user_num, embedding_dim)
        self.embed_item = nn.Embedding(item_num, embedding_dim)

        MLP_layers = []
        input_size = embedding_dim * 2
        for hidden_unit in hidden_units:
            MLP_layers.append(nn.Linear(input_size, hidden_unit))
            MLP_layers.append(nn.ReLU())
            input_size = hidden_unit
        self.MLP_layers = nn.Sequential(*MLP_layers)

        self.predict_layer = nn.Linear(input_size, 1)
 
    def forward(self, user, item):
        embed_user = self.embed_user(user)
        embed_item = self.embed_item(item)
        interaction = torch.cat((embed_user, embed_item), -1)
        output_MLP = self.MLP_layers(interaction)

        prediction = self.predict_layer(output_MLP)
        return prediction.view(-1)

#### train

In [21]:
from tqdm.auto import tqdm

BATCH_SIZE = 256
EPOCHS = 20
LEARNING_RATE = 0.001

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

train_dataset = ReviewDataset(X_train, Y_train, user_encoder, item_encoder)
train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)

val_dataset = ReviewDataset(X_val, Y_val, user_encoder, item_encoder)
val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False)

model = NCF(user_num=len(user_encoder.classes_), item_num=len(item_encoder.classes_)).to(device)
fn_loss = nn.BCEWithLogitsLoss()

optimizer = torch.optim.Adam(model.parameters(), lr=LEARNING_RATE)

def fn_acc(y_pred, y_true):
    """
    시그모이드를 적용한 예측값과 실제값으로 정확도를 계산합니다.
    """
    predictions = (torch.sigmoid(y_pred) >= 0.5).float()
    correct = (predictions == y_true).float().sum()
    return correct / len(y_true)

for epoch in range(EPOCHS):
    model.train()
    train_loss = 0.0
    train_acc = 0.0
    total_train_samples = 0
    for user, item, y in tqdm(train_loader, desc=f"EPOCH: {epoch+1:02d}"):
        user, item, y = user.to(device), item.to(device), y.to(device)
        y_hat = model(user, item)
        loss = fn_loss(y_hat, y)
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        batch_size = y.size(0)
        train_loss += loss.item() * batch_size
        train_acc += fn_acc(y_hat, y) * batch_size
        total_train_samples += batch_size
        
    train_loss /= total_train_samples
    train_acc = (train_acc / total_train_samples)
    
    model.eval()
    val_loss = 0.0
    val_acc = 0.0
    total_val_samples = 0
    with torch.no_grad():
        for user, item, y in val_loader:
            user, item, y = user.to(device), item.to(device), y.to(device)
            y_hat = model(user, item)
            loss = fn_loss(y_hat, y)
            
            batch_size = y.size(0)
            val_loss += loss.item() * batch_size
            val_acc += fn_acc(y_hat, y) * batch_size
            total_val_samples += batch_size
        
    val_loss /= total_val_samples
    val_acc = (val_acc / total_val_samples)
    
    print(f"EPOCH: {epoch+1:02d}, "
          f"TRAIN LOSS: {train_loss:.4f}, TRAIN ACC: {train_acc:.2f}%, "
          f"VAL LOSS: {val_loss:.4f}, VAL ACC: {val_acc:.2f}%")

EPOCH: 01: 100%|██████████| 2419/2419 [00:03<00:00, 630.09it/s]


EPOCH: 01, TRAIN LOSS: 0.5572, TRAIN ACC: 0.72%, VAL LOSS: 0.4970, VAL ACC: 0.76%


EPOCH: 02: 100%|██████████| 2419/2419 [00:03<00:00, 669.43it/s]


EPOCH: 02, TRAIN LOSS: 0.4709, TRAIN ACC: 0.78%, VAL LOSS: 0.4716, VAL ACC: 0.78%


EPOCH: 03: 100%|██████████| 2419/2419 [00:03<00:00, 678.50it/s]


EPOCH: 03, TRAIN LOSS: 0.4523, TRAIN ACC: 0.78%, VAL LOSS: 0.4665, VAL ACC: 0.78%


EPOCH: 04: 100%|██████████| 2419/2419 [00:03<00:00, 633.65it/s]


EPOCH: 04, TRAIN LOSS: 0.4430, TRAIN ACC: 0.79%, VAL LOSS: 0.4641, VAL ACC: 0.78%


EPOCH: 05: 100%|██████████| 2419/2419 [00:03<00:00, 662.03it/s]


EPOCH: 05, TRAIN LOSS: 0.4361, TRAIN ACC: 0.79%, VAL LOSS: 0.4639, VAL ACC: 0.78%


EPOCH: 06: 100%|██████████| 2419/2419 [00:03<00:00, 634.82it/s]


EPOCH: 06, TRAIN LOSS: 0.4296, TRAIN ACC: 0.80%, VAL LOSS: 0.4653, VAL ACC: 0.78%


EPOCH: 07: 100%|██████████| 2419/2419 [00:03<00:00, 672.31it/s]


EPOCH: 07, TRAIN LOSS: 0.4228, TRAIN ACC: 0.80%, VAL LOSS: 0.4721, VAL ACC: 0.78%


EPOCH: 08: 100%|██████████| 2419/2419 [00:03<00:00, 674.53it/s]


EPOCH: 08, TRAIN LOSS: 0.4155, TRAIN ACC: 0.80%, VAL LOSS: 0.4759, VAL ACC: 0.77%


EPOCH: 09: 100%|██████████| 2419/2419 [00:03<00:00, 667.53it/s]


EPOCH: 09, TRAIN LOSS: 0.4072, TRAIN ACC: 0.81%, VAL LOSS: 0.4801, VAL ACC: 0.77%


EPOCH: 10: 100%|██████████| 2419/2419 [00:03<00:00, 678.12it/s]


EPOCH: 10, TRAIN LOSS: 0.3986, TRAIN ACC: 0.81%, VAL LOSS: 0.4885, VAL ACC: 0.77%


EPOCH: 11: 100%|██████████| 2419/2419 [00:03<00:00, 667.88it/s]


EPOCH: 11, TRAIN LOSS: 0.3890, TRAIN ACC: 0.82%, VAL LOSS: 0.5029, VAL ACC: 0.77%


EPOCH: 12: 100%|██████████| 2419/2419 [00:03<00:00, 673.13it/s]


EPOCH: 12, TRAIN LOSS: 0.3790, TRAIN ACC: 0.82%, VAL LOSS: 0.5088, VAL ACC: 0.77%


EPOCH: 13: 100%|██████████| 2419/2419 [00:03<00:00, 671.99it/s]


EPOCH: 13, TRAIN LOSS: 0.3684, TRAIN ACC: 0.83%, VAL LOSS: 0.5286, VAL ACC: 0.76%


EPOCH: 14: 100%|██████████| 2419/2419 [00:03<00:00, 671.13it/s]


EPOCH: 14, TRAIN LOSS: 0.3575, TRAIN ACC: 0.83%, VAL LOSS: 0.5447, VAL ACC: 0.76%


EPOCH: 15: 100%|██████████| 2419/2419 [00:03<00:00, 673.64it/s]


EPOCH: 15, TRAIN LOSS: 0.3469, TRAIN ACC: 0.84%, VAL LOSS: 0.5602, VAL ACC: 0.76%


EPOCH: 16: 100%|██████████| 2419/2419 [00:03<00:00, 680.43it/s]


EPOCH: 16, TRAIN LOSS: 0.3360, TRAIN ACC: 0.84%, VAL LOSS: 0.5882, VAL ACC: 0.76%


EPOCH: 17: 100%|██████████| 2419/2419 [00:03<00:00, 670.16it/s]


EPOCH: 17, TRAIN LOSS: 0.3250, TRAIN ACC: 0.85%, VAL LOSS: 0.6080, VAL ACC: 0.75%


EPOCH: 18: 100%|██████████| 2419/2419 [00:03<00:00, 672.55it/s]


EPOCH: 18, TRAIN LOSS: 0.3143, TRAIN ACC: 0.86%, VAL LOSS: 0.6299, VAL ACC: 0.75%


EPOCH: 19: 100%|██████████| 2419/2419 [00:03<00:00, 671.42it/s]


EPOCH: 19, TRAIN LOSS: 0.3038, TRAIN ACC: 0.86%, VAL LOSS: 0.6558, VAL ACC: 0.74%


EPOCH: 20: 100%|██████████| 2419/2419 [00:03<00:00, 671.67it/s]


EPOCH: 20, TRAIN LOSS: 0.2939, TRAIN ACC: 0.87%, VAL LOSS: 0.6788, VAL ACC: 0.74%


#### Test

In [22]:
import torch
import numpy as np
from tqdm import tqdm
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score

def evaluate_model(model, test_loader, device):
    """
    모델을 평가하고 다양한 평가 지표를 계산합니다.
    
    Parameters:
    model: 학습된 모델
    test_loader: 테스트 데이터 로더
    device: 연산 장치 (CPU/GPU)
    
    Returns:
    dict: 다양한 평가 지표를 포함한 딕셔너리
    """
    model.eval()
    
    # 예측값과 실제값을 저장할 리스트
    all_predictions = []
    all_probabilities = []
    all_targets = []
    total_loss = 0
    fn_loss = nn.BCEWithLogitsLoss()
    
    # tqdm으로 진행상황 표시
    test_progress = tqdm(test_loader, desc="Evaluating", unit="batch")
    
    with torch.no_grad():
        for user, item, y in test_progress:
            # 데이터를 device로 이동
            user = user.to(device)
            item = item.to(device)
            y = y.to(device)
            
            # 모델 예측
            y_hat = model(user, item)
            
            # 손실 계산
            loss = fn_loss(y_hat, y)
            total_loss += loss.item() * y.size(0)
            
            # 시그모이드 함수를 통한 확률값 계산
            probabilities = torch.sigmoid(y_hat)
            
            # 예측 클래스 결정 (0.5를 임계값으로 사용)
            predictions = (probabilities >= 0.5).float()
            
            # CPU로 이동하고 numpy 배열로 변환
            all_predictions.extend(predictions.cpu().numpy())
            all_probabilities.extend(probabilities.cpu().numpy())
            all_targets.extend(y.cpu().numpy())
            
            # 진행바 업데이트
            test_progress.set_postfix({'loss': f'{loss.item():.4f}'})
    
    # numpy 배열로 변환
    all_predictions = np.array(all_predictions)
    all_probabilities = np.array(all_probabilities)
    all_targets = np.array(all_targets)
    
    # 평균 손실 계산
    avg_loss = total_loss / len(test_loader.dataset)
    
    # 각종 평가 지표 계산
    metrics = {
        'loss': avg_loss,
        'accuracy': accuracy_score(all_targets, all_predictions),
        'precision': precision_score(all_targets, all_predictions),
        'recall': recall_score(all_targets, all_predictions),
        'f1': f1_score(all_targets, all_predictions),
        'auc_roc': roc_auc_score(all_targets, all_probabilities)
    }
    
    return metrics

# 평가 실행
def print_evaluation_results(metrics):
    """
    평가 결과를 보기 좋게 출력합니다.
    """
    print("\n" + "="*50)
    print("모델 평가 결과")
    print("="*50)
    print(f"Loss: {metrics['loss']:.4f}")
    print(f"Accuracy: {metrics['accuracy']:.4f}")
    print(f"Precision: {metrics['precision']:.4f}")
    print(f"Recall: {metrics['recall']:.4f}")
    print(f"F1 Score: {metrics['f1']:.4f}")
    print(f"AUC-ROC: {metrics['auc_roc']:.4f}")
    print("="*50)

# 테스트 데이터셋 생성 및 평가 실행
def evaluate_test_set(X_test, Y_test, model, user_encoder, item_encoder, batch_size=256):
    """
    테스트 데이터셋을 생성하고 모델을 평가합니다.
    """
    print("테스트 데이터셋 생성 중...")
    test_dataset = ReviewDataset(X_test, Y_test, user_encoder, item_encoder)
    test_loader = DataLoader(
        test_dataset,
        batch_size=batch_size,
        shuffle=False,
        drop_last=False  # 테스트 시에는 모든 샘플을 평가
    )
    
    print(f"테스트 데이터 수: {len(test_dataset)}")
    print("모델 평가 중...")
    
    # 평가 실행
    device = next(model.parameters()).device  # 모델이 있는 디바이스 확인
    metrics = evaluate_model(model, test_loader, device)
    
    # 결과 출력
    print_evaluation_results(metrics)
    
    return metrics

metrics = evaluate_test_set(
    X_test=X_test,
    Y_test=Y_test,
    model=model,
    user_encoder=user_encoder,
    item_encoder=item_encoder,
    batch_size=BATCH_SIZE
)

테스트 데이터셋 생성 중...
테스트 데이터 수: 91742
모델 평가 중...


Evaluating: 100%|██████████| 359/359 [00:00<00:00, 771.12batch/s, loss=0.4739]



모델 평가 결과
Loss: 0.4868
Accuracy: 0.7699
Precision: 0.7972
Recall: 0.8770
F1 Score: 0.8352
AUC-ROC: 0.8274


### NeuMF-NMF

In [22]:
import torch
import torch.nn as nn
import torch.nn.functional as F 
 
 
class NCF(nn.Module):
    def __init__(self, user_num, item_num, factor_num=32, num_layers=3):
        super(NCF, self).__init__()
        self.embed_user_GMF = nn.Embedding(user_num, factor_num)
        self.embed_item_GMF = nn.Embedding(item_num, factor_num)
        self.embed_user_MLP = nn.Embedding(
				user_num, factor_num * (2 ** (num_layers - 1)))
        self.embed_item_MLP = nn.Embedding(
                item_num, factor_num * (2 ** (num_layers - 1)))

        MLP_modules = []
        for i in range(num_layers):
            input_size = factor_num * (2 ** (num_layers - i))
            MLP_modules.append(nn.Linear(input_size, input_size//2))
            MLP_modules.append(nn.ReLU())
        self.MLP_layers = nn.Sequential(*MLP_modules)
 
        predict_size = factor_num * 2
 
        self.predict_layer = nn.Linear(predict_size, 1)
 
    def forward(self, user, item):
        embed_user_GMF = self.embed_user_GMF(user)
        embed_item_GMF = self.embed_item_GMF(item)
        output_GMF = embed_user_GMF * embed_item_GMF

        embed_user_MLP = self.embed_user_MLP(user)
        embed_item_MLP = self.embed_item_MLP(item)
        interaction = torch.cat((embed_user_MLP, embed_item_MLP), -1)
        output_MLP = self.MLP_layers(interaction)

        concat = torch.cat((output_GMF, output_MLP), -1)

        prediction = self.predict_layer(concat)
        return prediction.view(-1)

In [23]:
from tqdm.auto import tqdm

BATCH_SIZE = 256
EPOCHS = 20
LEARNING_RATE = 0.001

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

train_dataset = ReviewDataset(X_train, Y_train, user_encoder, item_encoder)
train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)

val_dataset = ReviewDataset(X_val, Y_val, user_encoder, item_encoder)
val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False)

model = NCF(user_num=len(user_encoder.classes_), item_num=len(item_encoder.classes_)).to(device)
fn_loss = nn.BCEWithLogitsLoss()

optimizer = torch.optim.Adam(model.parameters(), lr=LEARNING_RATE)

def fn_acc(y_pred, y_true):
    """
    시그모이드를 적용한 예측값과 실제값으로 정확도를 계산합니다.
    """
    predictions = (torch.sigmoid(y_pred) >= 0.5).float()
    correct = (predictions == y_true).float().sum()
    return correct / len(y_true)

for epoch in range(EPOCHS):
    model.train()
    train_loss = 0.0
    train_acc = 0.0
    total_train_samples = 0
    for user, item, y in tqdm(train_loader, desc=f"EPOCH: {epoch+1:02d}"):
        user, item, y = user.to(device), item.to(device), y.to(device)
        y_hat = model(user, item)
        loss = fn_loss(y_hat, y)
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        batch_size = y.size(0)
        train_loss += loss.item() * batch_size
        train_acc += fn_acc(y_hat, y) * batch_size
        total_train_samples += batch_size
        
    train_loss /= total_train_samples
    train_acc = (train_acc / total_train_samples)
    
    model.eval()
    val_loss = 0.0
    val_acc = 0.0
    total_val_samples = 0
    with torch.no_grad():
        for user, item, y in val_loader:
            user, item, y = user.to(device), item.to(device), y.to(device)
            y_hat = model(user, item)
            loss = fn_loss(y_hat, y)
            
            batch_size = y.size(0)
            val_loss += loss.item() * batch_size
            val_acc += fn_acc(y_hat, y) * batch_size
            total_val_samples += batch_size
        
    val_loss /= total_val_samples
    val_acc = (val_acc / total_val_samples)
    
    print(f"EPOCH: {epoch+1:02d}, "
          f"TRAIN LOSS: {train_loss:.4f}, TRAIN ACC: {train_acc:.2f}, "
          f"VAL LOSS: {val_loss:.4f}, VAL ACC: {val_acc:.2f}")

EPOCH: 01: 100%|██████████| 2419/2419 [00:03<00:00, 606.94it/s]


EPOCH: 01, TRAIN LOSS: 0.5254, TRAIN ACC: 0.74%, VAL LOSS: 0.4720, VAL ACC: 0.78%


EPOCH: 02: 100%|██████████| 2419/2419 [00:03<00:00, 644.84it/s]


EPOCH: 02, TRAIN LOSS: 0.4564, TRAIN ACC: 0.78%, VAL LOSS: 0.4608, VAL ACC: 0.78%


EPOCH: 03: 100%|██████████| 2419/2419 [00:03<00:00, 634.36it/s]


EPOCH: 03, TRAIN LOSS: 0.4432, TRAIN ACC: 0.79%, VAL LOSS: 0.4589, VAL ACC: 0.78%


EPOCH: 04: 100%|██████████| 2419/2419 [00:03<00:00, 624.01it/s]


EPOCH: 04, TRAIN LOSS: 0.4334, TRAIN ACC: 0.79%, VAL LOSS: 0.4630, VAL ACC: 0.78%


EPOCH: 05: 100%|██████████| 2419/2419 [00:03<00:00, 627.19it/s]


EPOCH: 05, TRAIN LOSS: 0.4213, TRAIN ACC: 0.80%, VAL LOSS: 0.4704, VAL ACC: 0.78%


EPOCH: 06: 100%|██████████| 2419/2419 [00:03<00:00, 645.27it/s]


EPOCH: 06, TRAIN LOSS: 0.4055, TRAIN ACC: 0.81%, VAL LOSS: 0.4811, VAL ACC: 0.77%


EPOCH: 07: 100%|██████████| 2419/2419 [00:03<00:00, 648.10it/s]


EPOCH: 07, TRAIN LOSS: 0.3858, TRAIN ACC: 0.82%, VAL LOSS: 0.5008, VAL ACC: 0.76%


EPOCH: 08: 100%|██████████| 2419/2419 [00:03<00:00, 646.08it/s]


EPOCH: 08, TRAIN LOSS: 0.3630, TRAIN ACC: 0.83%, VAL LOSS: 0.5322, VAL ACC: 0.76%


EPOCH: 09: 100%|██████████| 2419/2419 [00:03<00:00, 645.98it/s]


EPOCH: 09, TRAIN LOSS: 0.3376, TRAIN ACC: 0.85%, VAL LOSS: 0.5649, VAL ACC: 0.75%


EPOCH: 10: 100%|██████████| 2419/2419 [00:03<00:00, 647.77it/s]


EPOCH: 10, TRAIN LOSS: 0.3105, TRAIN ACC: 0.86%, VAL LOSS: 0.6147, VAL ACC: 0.75%


EPOCH: 11: 100%|██████████| 2419/2419 [00:03<00:00, 626.86it/s]


EPOCH: 11, TRAIN LOSS: 0.2828, TRAIN ACC: 0.88%, VAL LOSS: 0.6776, VAL ACC: 0.74%


EPOCH: 12: 100%|██████████| 2419/2419 [00:03<00:00, 648.01it/s]


EPOCH: 12, TRAIN LOSS: 0.2555, TRAIN ACC: 0.89%, VAL LOSS: 0.7275, VAL ACC: 0.74%


EPOCH: 13: 100%|██████████| 2419/2419 [00:03<00:00, 642.28it/s]


EPOCH: 13, TRAIN LOSS: 0.2286, TRAIN ACC: 0.90%, VAL LOSS: 0.8059, VAL ACC: 0.73%


EPOCH: 14: 100%|██████████| 2419/2419 [00:03<00:00, 642.36it/s]


EPOCH: 14, TRAIN LOSS: 0.2028, TRAIN ACC: 0.92%, VAL LOSS: 0.8786, VAL ACC: 0.73%


EPOCH: 15: 100%|██████████| 2419/2419 [00:03<00:00, 641.02it/s]


EPOCH: 15, TRAIN LOSS: 0.1786, TRAIN ACC: 0.93%, VAL LOSS: 0.9851, VAL ACC: 0.73%


EPOCH: 16: 100%|██████████| 2419/2419 [00:03<00:00, 644.94it/s]


EPOCH: 16, TRAIN LOSS: 0.1564, TRAIN ACC: 0.94%, VAL LOSS: 1.1036, VAL ACC: 0.73%


EPOCH: 17: 100%|██████████| 2419/2419 [00:03<00:00, 647.94it/s]


EPOCH: 17, TRAIN LOSS: 0.1363, TRAIN ACC: 0.95%, VAL LOSS: 1.2281, VAL ACC: 0.72%


EPOCH: 18: 100%|██████████| 2419/2419 [00:03<00:00, 609.38it/s]


EPOCH: 18, TRAIN LOSS: 0.1180, TRAIN ACC: 0.95%, VAL LOSS: 1.3646, VAL ACC: 0.72%


EPOCH: 19: 100%|██████████| 2419/2419 [00:03<00:00, 650.57it/s]


EPOCH: 19, TRAIN LOSS: 0.1016, TRAIN ACC: 0.96%, VAL LOSS: 1.5085, VAL ACC: 0.72%


EPOCH: 20: 100%|██████████| 2419/2419 [00:03<00:00, 649.62it/s]


EPOCH: 20, TRAIN LOSS: 0.0876, TRAIN ACC: 0.97%, VAL LOSS: 1.6436, VAL ACC: 0.72%


In [24]:
import torch
import numpy as np
from tqdm import tqdm
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score

def evaluate_model(model, test_loader, device):
    """
    모델을 평가하고 다양한 평가 지표를 계산합니다.
    
    Parameters:
    model: 학습된 모델
    test_loader: 테스트 데이터 로더
    device: 연산 장치 (CPU/GPU)
    
    Returns:
    dict: 다양한 평가 지표를 포함한 딕셔너리
    """
    model.eval()
    
    # 예측값과 실제값을 저장할 리스트
    all_predictions = []
    all_probabilities = []
    all_targets = []
    total_loss = 0
    fn_loss = nn.BCEWithLogitsLoss()
    
    # tqdm으로 진행상황 표시
    test_progress = tqdm(test_loader, desc="Evaluating", unit="batch")
    
    with torch.no_grad():
        for user, item, y in test_progress:
            # 데이터를 device로 이동
            user = user.to(device)
            item = item.to(device)
            y = y.to(device)
            
            # 모델 예측
            y_hat = model(user, item)
            
            # 손실 계산
            loss = fn_loss(y_hat, y)
            total_loss += loss.item() * y.size(0)
            
            # 시그모이드 함수를 통한 확률값 계산
            probabilities = torch.sigmoid(y_hat)
            
            # 예측 클래스 결정 (0.5를 임계값으로 사용)
            predictions = (probabilities >= 0.5).float()
            
            # CPU로 이동하고 numpy 배열로 변환
            all_predictions.extend(predictions.cpu().numpy())
            all_probabilities.extend(probabilities.cpu().numpy())
            all_targets.extend(y.cpu().numpy())
            
            # 진행바 업데이트
            test_progress.set_postfix({'loss': f'{loss.item():.4f}'})
    
    # numpy 배열로 변환
    all_predictions = np.array(all_predictions)
    all_probabilities = np.array(all_probabilities)
    all_targets = np.array(all_targets)
    
    # 평균 손실 계산
    avg_loss = total_loss / len(test_loader.dataset)
    
    # 각종 평가 지표 계산
    metrics = {
        'loss': avg_loss,
        'accuracy': accuracy_score(all_targets, all_predictions),
        'precision': precision_score(all_targets, all_predictions),
        'recall': recall_score(all_targets, all_predictions),
        'f1': f1_score(all_targets, all_predictions),
        'auc_roc': roc_auc_score(all_targets, all_probabilities)
    }
    
    return metrics

# 평가 실행
def print_evaluation_results(metrics):
    """
    평가 결과를 보기 좋게 출력합니다.
    """
    print("\n" + "="*50)
    print("모델 평가 결과")
    print("="*50)
    print(f"Loss: {metrics['loss']:.4f}")
    print(f"Accuracy: {metrics['accuracy']:.4f}")
    print(f"Precision: {metrics['precision']:.4f}")
    print(f"Recall: {metrics['recall']:.4f}")
    print(f"F1 Score: {metrics['f1']:.4f}")
    print(f"AUC-ROC: {metrics['auc_roc']:.4f}")
    print("="*50)

# 테스트 데이터셋 생성 및 평가 실행
def evaluate_test_set(X_test, Y_test, model, user_encoder, item_encoder, batch_size=256):
    """
    테스트 데이터셋을 생성하고 모델을 평가합니다.
    """
    print("테스트 데이터셋 생성 중...")
    test_dataset = ReviewDataset(X_test, Y_test, user_encoder, item_encoder)
    test_loader = DataLoader(
        test_dataset,
        batch_size=batch_size,
        shuffle=False,
        drop_last=False  # 테스트 시에는 모든 샘플을 평가
    )
    
    print(f"테스트 데이터 수: {len(test_dataset)}")
    print("모델 평가 중...")
    
    # 평가 실행
    device = next(model.parameters()).device  # 모델이 있는 디바이스 확인
    metrics = evaluate_model(model, test_loader, device)
    
    # 결과 출력
    print_evaluation_results(metrics)
    
    return metrics

metrics = evaluate_test_set(
    X_test=X_test,
    Y_test=Y_test,
    model=model,
    user_encoder=user_encoder,
    item_encoder=item_encoder,
    batch_size=BATCH_SIZE
)

테스트 데이터셋 생성 중...
테스트 데이터 수: 91742
모델 평가 중...


Evaluating: 100%|██████████| 359/359 [00:00<00:00, 808.38batch/s, loss=2.7553]



모델 평가 결과
Loss: 1.6484
Accuracy: 0.7224
Precision: 0.7938
Recall: 0.7919
F1 Score: 0.7929
AUC-ROC: 0.7671
