<a href="https://colab.research.google.com/github/Afzalkhm12/Kelompok-3-MovieLens-Recommender-System-NCF-vs-MF/blob/main/Untitled5.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [6]:

import os, sys, math, random, glob, gc, json, time
import numpy as np
import pandas as pd
from tqdm import tqdm

import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from sklearn.metrics import mean_squared_error, mean_absolute_error

from torch.utils.tensorboard import SummaryWriter
import matplotlib.pyplot as plt

SEED = 42
random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)

<torch._C.Generator at 0x7cb07add68b0>

In [7]:
from google.colab import drive

drive.mount('/content/drive')

BASE_CANDIDATES = [
    '/content/drive/MyDrive/Penelitian/uas',
]

REQUIRED_FILES = [
    'genome-scores.csv', 'genome-tags.csv', 'links.csv',
    'movies.csv', 'ratings.csv', 'tags.csv'
]

def find_file(bases, fname):
    for b in bases:
        if os.path.exists(b):
            hits = glob.glob(os.path.join(b, '**', fname), recursive=True)
            if len(hits) > 0:
                hits = sorted(hits, key=len)
                return hits[0]
    return None

paths = {}
for f in REQUIRED_FILES:
    p = find_file(BASE_CANDIDATES, f)
    assert p is not None, f"File '{f}' tidak ditemukan di Penelitian/uas/. Pastikan nama file sesuai."
    paths[f] = p

print("== Dataset Paths ==")
for k,v in paths.items():
    print(f"{k}: {v}")

BASE_DIR = os.path.dirname(list(paths.values())[0])
MODEL_DIR = os.path.join(BASE_DIR, 'models')
FIG_DIR = os.path.join(MODEL_DIR, 'figs')
RUNS_DIR = os.path.join(MODEL_DIR, 'runs')
os.makedirs(MODEL_DIR, exist_ok=True)
os.makedirs(FIG_DIR, exist_ok=True)
os.makedirs(RUNS_DIR, exist_ok=True)
print("Model dir:", MODEL_DIR)

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
== Dataset Paths ==
genome-scores.csv: /content/drive/MyDrive/Penelitian/uas/genome-scores.csv
genome-tags.csv: /content/drive/MyDrive/Penelitian/uas/genome-tags.csv
links.csv: /content/drive/MyDrive/Penelitian/uas/links.csv
movies.csv: /content/drive/MyDrive/Penelitian/uas/movies.csv
ratings.csv: /content/drive/MyDrive/Penelitian/uas/ratings.csv
tags.csv: /content/drive/MyDrive/Penelitian/uas/tags.csv
Model dir: /content/drive/MyDrive/Penelitian/uas/models


In [8]:
def read_csv_lower(path, **kwargs):
    df = pd.read_csv(path, **kwargs)
    df.columns = [c.lower() for c in df.columns]
    return df

ratings_dtypes = {'userid': np.int32, 'movieid': np.int32, 'rating': np.float32, 'timestamp': np.int64}
movies_dtypes  = {'movieid': np.int32, 'title': 'string', 'genres': 'string'}
links_dtypes   = {'movieid': np.int32, 'imdbid': 'Int64', 'tmdbid': 'Int64'}

gscores_dtypes = {'movieid': np.int32, 'tagid': np.int32, 'relevance': np.float32}
gtags_dtypes   = {'tagid': np.int32, 'tag': 'string'}

tags_dtypes    = {'userid': np.int32, 'movieid': np.int32, 'tag': 'string', 'timestamp': np.int64}

USE_SUBSET = False
SUBSET_N = 2_000_000

ratings = read_csv_lower(paths['ratings.csv'], dtype=ratings_dtypes)
if USE_SUBSET and len(ratings) > SUBSET_N:
    ratings = ratings.sample(SUBSET_N, random_state=SEED).sort_values('timestamp').reset_index(drop=True)

movies  = read_csv_lower(paths['movies.csv'], dtype=movies_dtypes)
links   = read_csv_lower(paths['links.csv'], dtype=links_dtypes)

gscores = read_csv_lower(paths['genome-scores.csv'], dtype=gscores_dtypes)
gtags   = read_csv_lower(paths['genome-tags.csv'], dtype=gtags_dtypes)
user_tags = read_csv_lower(paths['tags.csv'], dtype=tags_dtypes)

print("Ratings head:", ratings.head())
print("Movies head:", movies.head())
print("N ratings:", len(ratings), "| N movies:", movies['movieid'].nunique(), "| N users:", ratings['userid'].nunique())

# Range rating aktual
R_MIN, R_MAX = float(ratings['rating'].min()), float(ratings['rating'].max())
print('Rating range:', R_MIN, 'to', R_MAX)

# ---------------------------
# EDA Ringkas (disimpan png)
# ---------------------------
plt.figure()
ratings['rating'].hist(bins=20)
plt.title('Distribusi Rating')
plt.xlabel('Rating'); plt.ylabel('Frekuensi')
plt.tight_layout(); plt.savefig(os.path.join(FIG_DIR, 'rating_distribution.png')); plt.close()

# Interaksi per user & item
ui_per_user = ratings.groupby('userid').size()
ui_per_item = ratings.groupby('movieid').size()

plt.figure(); ui_per_user.hist(bins=50)
plt.title('Distribusi Interaksi per User')
plt.xlabel('# Interaksi'); plt.ylabel('Frekuensi')
plt.tight_layout(); plt.savefig(os.path.join(FIG_DIR, 'interaksi_per_user.png')); plt.close()


plt.figure(); ui_per_item.hist(bins=50)
plt.title('Distribusi Interaksi per Item')
plt.xlabel('# Interaksi'); plt.ylabel('Frekuensi')
plt.tight_layout(); plt.savefig(os.path.join(FIG_DIR, 'interaksi_per_item.png')); plt.close()

movies['genres_list'] = movies['genres'].fillna('(no genres listed)').apply(lambda s: s.split('|'))
all_genres = pd.Series([g for lst in movies['genres_list'] for g in lst])
all_genres.value_counts().head(15).to_csv(os.path.join(MODEL_DIR, 'top_genres.csv'))
print('Top genres:', all_genres.value_counts().head(15))

user_tags['tag'] = user_tags['tag'].fillna('').str.lower()
user_tags['tag'].value_counts().head(50).to_csv(os.path.join(MODEL_DIR, 'top_user_tags.csv'))
print('Top user tags:', user_tags['tag'].value_counts().head(50))

Ratings head:    userid  movieid  rating   timestamp
0       1        2     3.5  1112486027
1       1       29     3.5  1112484676
2       1       32     3.5  1112484819
3       1       47     3.5  1112484727
4       1       50     3.5  1112484580
Movies head:    movieid                               title  \
0        1                    Toy Story (1995)   
1        2                      Jumanji (1995)   
2        3             Grumpier Old Men (1995)   
3        4            Waiting to Exhale (1995)   
4        5  Father of the Bride Part II (1995)   

                                        genres  
0  Adventure|Animation|Children|Comedy|Fantasy  
1                   Adventure|Children|Fantasy  
2                               Comedy|Romance  
3                         Comedy|Drama|Romance  
4                                       Comedy  
N ratings: 20000263 | N movies: 27278 | N users: 138493
Rating range: 0.5 to 5.0
Top genres: Drama          13344
Comedy          8374
Thriller 

In [4]:
uid_unique = ratings['userid'].unique()
mid_unique = ratings['movieid'].unique()
user2idx = {u:i for i,u in enumerate(uid_unique)}
item2idx = {m:i for i,m in enumerate(mid_unique)}

ratings['u'] = ratings['userid'].map(user2idx).astype(np.int32)
ratings['i'] = ratings['movieid'].map(item2idx).astype(np.int32)

n_users = len(user2idx)
n_items = len(item2idx)
print('n_users:', n_users, '| n_items:', n_items)


n_users: 138493 | n_items: 26744


In [5]:
ratings = ratings.sort_values(['userid','timestamp']).reset_index(drop=True)

cnt = ratings.groupby('userid')['timestamp'].transform('count')
ord = ratings.groupby('userid')['timestamp'].rank(method='first', ascending=True)

is_test = (ord == cnt)
is_val  = (cnt >= 3) & (ord == (cnt - 1))
is_train = ~(is_test | is_val)

train_df = ratings[is_train][['u','i','rating']].reset_index(drop=True)
val_df   = ratings[is_val][['u','i','rating']].reset_index(drop=True)
test_df  = ratings[is_test][['u','i','rating']].reset_index(drop=True)

print('Split sizes -> Train:', len(train_df), '| Val:', len(val_df), '| Test:', len(test_df))

Split sizes -> Train: 19723277 | Val: 138493 | Test: 138493


In [6]:
class ExplicitDataset(Dataset):
    def __init__(self, df):
        self.u = torch.tensor(df['u'].values, dtype=torch.long)
        self.i = torch.tensor(df['i'].values, dtype=torch.long)
        self.r = torch.tensor(df['rating'].values, dtype=torch.float32)
    def __len__(self):
        return len(self.r)
    def __getitem__(self, idx):
        return self.u[idx], self.i[idx], self.r[idx]

BATCH_SIZE = 4096
NUM_WORKERS = 2
PERSISTENT = False

train_ds = ExplicitDataset(train_df)
val_ds   = ExplicitDataset(val_df) if len(val_df) > 0 else None
test_ds  = ExplicitDataset(test_df)

train_loader = DataLoader(train_ds, batch_size=BATCH_SIZE, shuffle=True,  num_workers=NUM_WORKERS, pin_memory=True, persistent_workers=PERSISTENT)
val_loader   = DataLoader(val_ds,   batch_size=BATCH_SIZE, shuffle=False, num_workers=NUM_WORKERS, pin_memory=True, persistent_workers=PERSISTENT) if val_ds is not None else None
test_loader  = DataLoader(test_ds,  batch_size=BATCH_SIZE, shuffle=False, num_workers=NUM_WORKERS, pin_memory=True, persistent_workers=PERSISTENT)

In [7]:
class MF(nn.Module):
    def __init__(self, n_users, n_items, emb_dim=64, bias=True):
        super().__init__()
        self.user_emb = nn.Embedding(n_users, emb_dim)
        self.item_emb = nn.Embedding(n_items, emb_dim)
        nn.init.normal_(self.user_emb.weight, std=0.01)
        nn.init.normal_(self.item_emb.weight, std=0.01)
        self.use_bias = bias
        if bias:
            self.user_bias = nn.Embedding(n_users, 1)
            self.item_bias = nn.Embedding(n_items, 1)
            nn.init.zeros_(self.user_bias.weight)
            nn.init.zeros_(self.item_bias.weight)
        else:
            self.user_bias = None
            self.item_bias = None
    def forward(self, u, i):
        ue = self.user_emb(u)
        ie = self.item_emb(i)
        dot = (ue * ie).sum(dim=1)
        if self.use_bias:
            dot = dot + self.user_bias(u).squeeze(1) + self.item_bias(i).squeeze(1)
        return dot

class NCF(nn.Module):
    def __init__(self, n_users, n_items, emb_dim=64, mlp_dims=(256,128,64), dropout=0.3):
        super().__init__()
        self.user_emb = nn.Embedding(n_users, emb_dim)
        self.item_emb = nn.Embedding(n_items, emb_dim)
        nn.init.normal_(self.user_emb.weight, std=0.01)
        nn.init.normal_(self.item_emb.weight, std=0.01)
        layers = []
        in_dim = emb_dim*2
        for h in mlp_dims:
            layers.append(nn.Linear(in_dim, h))
            layers.append(nn.ReLU())
            layers.append(nn.Dropout(dropout))
            in_dim = h
        layers.append(nn.Linear(in_dim, 1))
        self.mlp = nn.Sequential(*layers)
    def forward(self, u, i):
        ue = self.user_emb(u)
        ie = self.item_emb(i)
        x = torch.cat([ue, ie], dim=1)
        out = self.mlp(x).squeeze(1)
        return out

In [8]:
DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print('Device:', DEVICE)

RUN_NAME = time.strftime('run_%Y%m%d_%H%M%S')
WRITER = SummaryWriter(log_dir=os.path.join(RUNS_DIR, RUN_NAME))

@torch.no_grad()
def evaluate_explicit(model, loader, clip_range=None):
    model.eval(); model.to(DEVICE)
    preds, reals = [], []
    for u,i,r in loader:
        u = u.to(DEVICE); i = i.to(DEVICE); r = r.to(DEVICE)
        y = model(u,i)
        y = y.detach().cpu().numpy()
        if clip_range is not None:
            y = np.clip(y, clip_range[0], clip_range[1])
        preds.append(y)
        reals.append(r.detach().cpu().numpy())
    preds = np.concatenate(preds); reals = np.concatenate(reals)
    rmse = math.sqrt(mean_squared_error(reals, preds))
    mae = mean_absolute_error(reals, preds)
    return rmse, mae


def train_explicit(model, train_loader, val_loader=None, epochs=5, lr=5e-3, wd=1e-5, clip=5.0, patience=2, clip_range=None, tag='model'):
    model.to(DEVICE)
    opt = torch.optim.AdamW(model.parameters(), lr=lr, weight_decay=wd)
    best_val = float('inf')
    best_path = os.path.join(MODEL_DIR, f"best_{model.__class__.__name__}.pt")
    wait = 0
    for ep in range(1, epochs+1):
        model.train()
        running = 0.0; steps = 0
        pbar = tqdm(train_loader, desc=f"[Train {model.__class__.__name__}] Epoch {ep}/{epochs}")
        for u,i,r in pbar:
            u = u.to(DEVICE); i = i.to(DEVICE); r = r.to(DEVICE)
            y = model(u,i)
            loss = nn.functional.mse_loss(y, r)
            opt.zero_grad(); loss.backward()
            nn.utils.clip_grad_norm_(model.parameters(), clip)
            opt.step()
            running += float(loss.detach().cpu()); steps += 1
            pbar.set_postfix({'mse': float(loss.detach().cpu())})
        train_mse = running / max(1, steps)
        WRITER.add_scalar(f'{tag}/train_mse', train_mse, ep)

        # --- Robust check untuk val_loader (mendukung LimitedLoader tanpa .dataset) ---
        has_val = val_loader is not None
        if has_val:
            try:
                has_val = len(val_loader) > 0
            except Exception:
                # jika tidak bisa di-len, tetap coba validasi
                has_val = True

        if has_val:
            val_rmse, val_mae = evaluate_explicit(model, val_loader, clip_range)
            WRITER.add_scalar(f'{tag}/val_rmse', val_rmse, ep)
            WRITER.add_scalar(f'{tag}/val_mae', val_mae, ep)
            print(f"Epoch {ep}: Val RMSE={val_rmse:.4f} | Val MAE={val_mae:.4f}")
            if val_rmse < best_val:
                best_val = val_rmse
                torch.save({'model_state_dict': model.state_dict(), 'val_rmse': val_rmse, 'val_mae': val_mae}, best_path)
                print('  Saved(best):', best_path)
                wait = 0
            else:
                wait += 1
                if wait >= patience:
                    print('  Early stopping!')
                    break
        else:
            torch.save({'model_state_dict': model.state_dict()}, best_path)
    return best_path

Device: cpu


In [9]:
HPO_USE_GRID = True
HPO_USE_OPTUNA = False  # set True jika ingin Optuna (lebih berat)
HPO_EPOCHS = 2          # epoch singkat untuk seleksi HP
HPO_SUBSET_STEPS = 400  # batasi batch/steps per epoch saat HPO (None untuk penuh)

# DataLoader terbatas langkah saat HPO
class LimitedLoader:
    """Wrapper DataLoader dengan batas langkah; kompatibel dengan len() dan .dataset.
    """
    def __init__(self, loader, max_steps=None):
        self.loader = loader
        self.max_steps = max_steps
    def __iter__(self):
        for s, batch in enumerate(self.loader):
            if self.max_steps is not None and s >= self.max_steps:
                break
            yield batch
    def __len__(self):
        if self.max_steps is None:
            try:
                return len(self.loader)
            except Exception:
                return 0
        return min(self.max_steps, len(self.loader))
    # agar kompatibel dengan kode yang memanggil val_loader.dataset
    @property
    def dataset(self):
        return self

MF_GRID = {
    'emb_dim': [32, 64, 128],
    'lr': [1e-3, 5e-3],
    'wd': [1e-6, 1e-5, 1e-4],
}

# ------- Grid untuk NCF -------
NCF_GRID = {
    'emb_dim': [32, 64],
    'mlp_dims': [(128,64), (256,128,64)],
    'dropout': [0.2, 0.3],
    'lr': [3e-3, 5e-3],
    'wd': [1e-6, 1e-5],
}

best_cfg = {}

if HPO_USE_GRID:
    print("== HPO Grid: MF ==")
    best_val = float('inf'); best = None
    for emb in MF_GRID['emb_dim']:
        for lr in MF_GRID['lr']:
            for wd in MF_GRID['wd']:
                model = MF(n_users, n_items, emb_dim=emb, bias=True)
                tag = f"HPO_MF/emb{emb}_lr{lr}_wd{wd}"
                tr_loader = LimitedLoader(train_loader, HPO_SUBSET_STEPS)
                vl_loader = LimitedLoader(val_loader, HPO_SUBSET_STEPS) if val_loader is not None else None
                path = train_explicit(model, tr_loader, vl_loader, epochs=HPO_EPOCHS, lr=lr, wd=wd, patience=1, clip_range=(R_MIN, R_MAX), tag=tag)
                ckpt = torch.load(path, map_location='cpu')
                model.load_state_dict(ckpt['model_state_dict'])
                if vl_loader is not None:
                    vrmse, _ = evaluate_explicit(model.to(DEVICE), val_loader, (R_MIN, R_MAX))
                else:
                    vrmse, _ = evaluate_explicit(model.to(DEVICE), test_loader, (R_MIN, R_MAX))
                print(f"MF emb={emb} lr={lr} wd={wd} -> Val RMSE={vrmse:.4f}")
                if vrmse < best_val:
                    best_val = vrmse; best = {'emb_dim': emb, 'lr': lr, 'wd': wd}
    best_cfg['MF'] = best
    print('Best MF cfg:', best_cfg['MF'])

    print("== HPO Grid: NCF ==")
    best_val = float('inf'); best = None
    for emb in NCF_GRID['emb_dim']:
        for dims in NCF_GRID['mlp_dims']:
            for dp in NCF_GRID['dropout']:
                for lr in NCF_GRID['lr']:
                    for wd in NCF_GRID['wd']:
                        model = NCF(n_users, n_items, emb_dim=emb, mlp_dims=dims, dropout=dp)
                        tag = f"HPO_NCF/emb{emb}_dims{'-'.join(map(str,dims))}_dp{dp}_lr{lr}_wd{wd}"
                        tr_loader = LimitedLoader(train_loader, HPO_SUBSET_STEPS)
                        vl_loader = LimitedLoader(val_loader, HPO_SUBSET_STEPS) if val_loader is not None else None
                        path = train_explicit(model, tr_loader, vl_loader, epochs=HPO_EPOCHS, lr=lr, wd=wd, patience=1, clip_range=(R_MIN, R_MAX), tag=tag)
                        ckpt = torch.load(path, map_location='cpu')
                        model.load_state_dict(ckpt['model_state_dict'])
                        if vl_loader is not None:
                            vrmse, _ = evaluate_explicit(model.to(DEVICE), val_loader, (R_MIN, R_MAX))
                        else:
                            vrmse, _ = evaluate_explicit(model.to(DEVICE), test_loader, (R_MIN, R_MAX))
                        print(f"NCF emb={emb} dims={dims} dp={dp} lr={lr} wd={wd} -> Val RMSE={vrmse:.4f}")
                        if vrmse < best_val:
                            best_val = vrmse; best = {'emb_dim': emb, 'mlp_dims': dims, 'dropout': dp, 'lr': lr, 'wd': wd}
    best_cfg['NCF'] = best
    print('Best NCF cfg:', best_cfg['NCF'])

== HPO Grid: MF ==


[Train MF] Epoch 1/2: 100%|██████████| 400/400 [00:54<00:00,  7.37it/s, mse=6.61]


Epoch 1: Val RMSE=3.0739 | Val MAE=2.8718
  Saved(best): /content/drive/MyDrive/Penelitian/uas/models/best_MF.pt


[Train MF] Epoch 2/2: 100%|██████████| 400/400 [00:51<00:00,  7.77it/s, mse=2.08]


Epoch 2: Val RMSE=2.1456 | Val MAE=1.8384
  Saved(best): /content/drive/MyDrive/Penelitian/uas/models/best_MF.pt
MF emb=32 lr=0.001 wd=1e-06 -> Val RMSE=2.1456


[Train MF] Epoch 1/2: 100%|██████████| 400/400 [00:55<00:00,  7.15it/s, mse=6.5]


Epoch 1: Val RMSE=3.0728 | Val MAE=2.8709
  Saved(best): /content/drive/MyDrive/Penelitian/uas/models/best_MF.pt


[Train MF] Epoch 2/2: 100%|██████████| 400/400 [00:54<00:00,  7.29it/s, mse=2.13]


Epoch 2: Val RMSE=2.1445 | Val MAE=1.8372
  Saved(best): /content/drive/MyDrive/Penelitian/uas/models/best_MF.pt
MF emb=32 lr=0.001 wd=1e-05 -> Val RMSE=2.1445


[Train MF] Epoch 1/2: 100%|██████████| 400/400 [00:58<00:00,  6.79it/s, mse=6.62]


Epoch 1: Val RMSE=3.0779 | Val MAE=2.8765
  Saved(best): /content/drive/MyDrive/Penelitian/uas/models/best_MF.pt


[Train MF] Epoch 2/2: 100%|██████████| 400/400 [00:52<00:00,  7.58it/s, mse=2.14]


Epoch 2: Val RMSE=2.1462 | Val MAE=1.8397
  Saved(best): /content/drive/MyDrive/Penelitian/uas/models/best_MF.pt
MF emb=32 lr=0.001 wd=0.0001 -> Val RMSE=2.1462


[Train MF] Epoch 1/2: 100%|██████████| 400/400 [00:57<00:00,  6.90it/s, mse=1.31]


Epoch 1: Val RMSE=1.5880 | Val MAE=1.2462
  Saved(best): /content/drive/MyDrive/Penelitian/uas/models/best_MF.pt


[Train MF] Epoch 2/2: 100%|██████████| 400/400 [00:55<00:00,  7.25it/s, mse=0.91]


Epoch 2: Val RMSE=1.1634 | Val MAE=0.8955
  Saved(best): /content/drive/MyDrive/Penelitian/uas/models/best_MF.pt
MF emb=32 lr=0.005 wd=1e-06 -> Val RMSE=1.1634


[Train MF] Epoch 1/2: 100%|██████████| 400/400 [00:57<00:00,  7.02it/s, mse=1.31]


Epoch 1: Val RMSE=1.5854 | Val MAE=1.2427
  Saved(best): /content/drive/MyDrive/Penelitian/uas/models/best_MF.pt


[Train MF] Epoch 2/2: 100%|██████████| 400/400 [00:53<00:00,  7.42it/s, mse=0.955]


Epoch 2: Val RMSE=1.1627 | Val MAE=0.8949
  Saved(best): /content/drive/MyDrive/Penelitian/uas/models/best_MF.pt
MF emb=32 lr=0.005 wd=1e-05 -> Val RMSE=1.1627


[Train MF] Epoch 1/2: 100%|██████████| 400/400 [00:56<00:00,  7.14it/s, mse=1.3]


Epoch 1: Val RMSE=1.5868 | Val MAE=1.2448
  Saved(best): /content/drive/MyDrive/Penelitian/uas/models/best_MF.pt


[Train MF] Epoch 2/2: 100%|██████████| 400/400 [00:52<00:00,  7.55it/s, mse=0.975]


Epoch 2: Val RMSE=1.1631 | Val MAE=0.8947
  Saved(best): /content/drive/MyDrive/Penelitian/uas/models/best_MF.pt
MF emb=32 lr=0.005 wd=0.0001 -> Val RMSE=1.1631


[Train MF] Epoch 1/2: 100%|██████████| 400/400 [01:27<00:00,  4.60it/s, mse=4.53]


Epoch 1: Val RMSE=2.8482 | Val MAE=2.6087
  Saved(best): /content/drive/MyDrive/Penelitian/uas/models/best_MF.pt


[Train MF] Epoch 2/2: 100%|██████████| 400/400 [01:28<00:00,  4.50it/s, mse=1.66]


Epoch 2: Val RMSE=1.8851 | Val MAE=1.5585
  Saved(best): /content/drive/MyDrive/Penelitian/uas/models/best_MF.pt
MF emb=64 lr=0.001 wd=1e-06 -> Val RMSE=1.8851


[Train MF] Epoch 1/2: 100%|██████████| 400/400 [01:33<00:00,  4.29it/s, mse=4.44]


Epoch 1: Val RMSE=2.8256 | Val MAE=2.5818
  Saved(best): /content/drive/MyDrive/Penelitian/uas/models/best_MF.pt


[Train MF] Epoch 2/2: 100%|██████████| 400/400 [01:21<00:00,  4.92it/s, mse=1.56]


Epoch 2: Val RMSE=1.8770 | Val MAE=1.5511
  Saved(best): /content/drive/MyDrive/Penelitian/uas/models/best_MF.pt
MF emb=64 lr=0.001 wd=1e-05 -> Val RMSE=1.8770


[Train MF] Epoch 1/2: 100%|██████████| 400/400 [01:32<00:00,  4.35it/s, mse=4.7]


Epoch 1: Val RMSE=2.8371 | Val MAE=2.5956
  Saved(best): /content/drive/MyDrive/Penelitian/uas/models/best_MF.pt


[Train MF] Epoch 2/2: 100%|██████████| 400/400 [01:21<00:00,  4.93it/s, mse=1.61]


Epoch 2: Val RMSE=1.8815 | Val MAE=1.5556
  Saved(best): /content/drive/MyDrive/Penelitian/uas/models/best_MF.pt
MF emb=64 lr=0.001 wd=0.0001 -> Val RMSE=1.8815


[Train MF] Epoch 1/2: 100%|██████████| 400/400 [01:30<00:00,  4.43it/s, mse=1.23]


Epoch 1: Val RMSE=1.4951 | Val MAE=1.1494
  Saved(best): /content/drive/MyDrive/Penelitian/uas/models/best_MF.pt


[Train MF] Epoch 2/2: 100%|██████████| 400/400 [01:29<00:00,  4.46it/s, mse=0.908]


Epoch 2: Val RMSE=1.1497 | Val MAE=0.8815
  Saved(best): /content/drive/MyDrive/Penelitian/uas/models/best_MF.pt
MF emb=64 lr=0.005 wd=1e-06 -> Val RMSE=1.1497


[Train MF] Epoch 1/2: 100%|██████████| 400/400 [01:26<00:00,  4.60it/s, mse=1.31]


Epoch 1: Val RMSE=1.5026 | Val MAE=1.1548
  Saved(best): /content/drive/MyDrive/Penelitian/uas/models/best_MF.pt


[Train MF] Epoch 2/2: 100%|██████████| 400/400 [01:24<00:00,  4.75it/s, mse=0.917]


Epoch 2: Val RMSE=1.1471 | Val MAE=0.8807
  Saved(best): /content/drive/MyDrive/Penelitian/uas/models/best_MF.pt
MF emb=64 lr=0.005 wd=1e-05 -> Val RMSE=1.1471


[Train MF] Epoch 1/2: 100%|██████████| 400/400 [01:26<00:00,  4.61it/s, mse=1.13]


Epoch 1: Val RMSE=1.4957 | Val MAE=1.1491
  Saved(best): /content/drive/MyDrive/Penelitian/uas/models/best_MF.pt


[Train MF] Epoch 2/2: 100%|██████████| 400/400 [01:22<00:00,  4.86it/s, mse=0.945]


Epoch 2: Val RMSE=1.1464 | Val MAE=0.8802
  Saved(best): /content/drive/MyDrive/Penelitian/uas/models/best_MF.pt
MF emb=64 lr=0.005 wd=0.0001 -> Val RMSE=1.1464


[Train MF] Epoch 1/2: 100%|██████████| 400/400 [02:16<00:00,  2.92it/s, mse=3.26]


Epoch 1: Val RMSE=2.5557 | Val MAE=2.2608
  Saved(best): /content/drive/MyDrive/Penelitian/uas/models/best_MF.pt


[Train MF] Epoch 2/2: 100%|██████████| 400/400 [02:19<00:00,  2.87it/s, mse=1.29]


Epoch 2: Val RMSE=1.6410 | Val MAE=1.3114
  Saved(best): /content/drive/MyDrive/Penelitian/uas/models/best_MF.pt
MF emb=128 lr=0.001 wd=1e-06 -> Val RMSE=1.6410


[Train MF] Epoch 1/2: 100%|██████████| 400/400 [02:13<00:00,  3.00it/s, mse=3.13]


Epoch 1: Val RMSE=2.5419 | Val MAE=2.2453
  Saved(best): /content/drive/MyDrive/Penelitian/uas/models/best_MF.pt


[Train MF] Epoch 2/2: 100%|██████████| 400/400 [02:06<00:00,  3.15it/s, mse=1.39]


Epoch 2: Val RMSE=1.6391 | Val MAE=1.3108
  Saved(best): /content/drive/MyDrive/Penelitian/uas/models/best_MF.pt
MF emb=128 lr=0.001 wd=1e-05 -> Val RMSE=1.6391


[Train MF] Epoch 1/2: 100%|██████████| 400/400 [02:09<00:00,  3.09it/s, mse=3.23]


Epoch 1: Val RMSE=2.5305 | Val MAE=2.2318
  Saved(best): /content/drive/MyDrive/Penelitian/uas/models/best_MF.pt


[Train MF] Epoch 2/2: 100%|██████████| 400/400 [01:56<00:00,  3.44it/s, mse=1.33]


Epoch 2: Val RMSE=1.6338 | Val MAE=1.3058
  Saved(best): /content/drive/MyDrive/Penelitian/uas/models/best_MF.pt
MF emb=128 lr=0.001 wd=0.0001 -> Val RMSE=1.6338


[Train MF] Epoch 1/2: 100%|██████████| 400/400 [02:27<00:00,  2.71it/s, mse=1.18]


Epoch 1: Val RMSE=1.4439 | Val MAE=1.0972
  Saved(best): /content/drive/MyDrive/Penelitian/uas/models/best_MF.pt


[Train MF] Epoch 2/2: 100%|██████████| 400/400 [02:13<00:00,  3.00it/s, mse=0.953]


Epoch 2: Val RMSE=1.1511 | Val MAE=0.8839
  Saved(best): /content/drive/MyDrive/Penelitian/uas/models/best_MF.pt
MF emb=128 lr=0.005 wd=1e-06 -> Val RMSE=1.1511


[Train MF] Epoch 1/2: 100%|██████████| 400/400 [02:22<00:00,  2.81it/s, mse=1.22]


Epoch 1: Val RMSE=1.4452 | Val MAE=1.0971
  Saved(best): /content/drive/MyDrive/Penelitian/uas/models/best_MF.pt


[Train MF] Epoch 2/2: 100%|██████████| 400/400 [02:13<00:00,  3.00it/s, mse=0.928]


Epoch 2: Val RMSE=1.1470 | Val MAE=0.8782
  Saved(best): /content/drive/MyDrive/Penelitian/uas/models/best_MF.pt
MF emb=128 lr=0.005 wd=1e-05 -> Val RMSE=1.1470


[Train MF] Epoch 1/2: 100%|██████████| 400/400 [02:30<00:00,  2.66it/s, mse=1.17]


Epoch 1: Val RMSE=1.4553 | Val MAE=1.1047
  Saved(best): /content/drive/MyDrive/Penelitian/uas/models/best_MF.pt


[Train MF] Epoch 2/2: 100%|██████████| 400/400 [02:06<00:00,  3.15it/s, mse=0.909]


Epoch 2: Val RMSE=1.1429 | Val MAE=0.8767
  Saved(best): /content/drive/MyDrive/Penelitian/uas/models/best_MF.pt
MF emb=128 lr=0.005 wd=0.0001 -> Val RMSE=1.1429
Best MF cfg: {'emb_dim': 128, 'lr': 0.005, 'wd': 0.0001}
== HPO Grid: NCF ==


[Train NCF] Epoch 1/2: 100%|██████████| 400/400 [01:01<00:00,  6.48it/s, mse=0.9]


Epoch 1: Val RMSE=0.9670 | Val MAE=0.7569
  Saved(best): /content/drive/MyDrive/Penelitian/uas/models/best_NCF.pt


[Train NCF] Epoch 2/2: 100%|██████████| 400/400 [00:57<00:00,  6.93it/s, mse=0.864]


Epoch 2: Val RMSE=0.9501 | Val MAE=0.7419
  Saved(best): /content/drive/MyDrive/Penelitian/uas/models/best_NCF.pt
NCF emb=32 dims=(128, 64) dp=0.2 lr=0.003 wd=1e-06 -> Val RMSE=0.9501


[Train NCF] Epoch 1/2: 100%|██████████| 400/400 [00:59<00:00,  6.72it/s, mse=0.919]


Epoch 1: Val RMSE=0.9653 | Val MAE=0.7537
  Saved(best): /content/drive/MyDrive/Penelitian/uas/models/best_NCF.pt


[Train NCF] Epoch 2/2: 100%|██████████| 400/400 [00:57<00:00,  6.94it/s, mse=0.837]


Epoch 2: Val RMSE=0.9423 | Val MAE=0.7282
  Saved(best): /content/drive/MyDrive/Penelitian/uas/models/best_NCF.pt
NCF emb=32 dims=(128, 64) dp=0.2 lr=0.003 wd=1e-05 -> Val RMSE=0.9423


[Train NCF] Epoch 1/2: 100%|██████████| 400/400 [01:00<00:00,  6.62it/s, mse=0.888]


Epoch 1: Val RMSE=0.9592 | Val MAE=0.7478
  Saved(best): /content/drive/MyDrive/Penelitian/uas/models/best_NCF.pt


[Train NCF] Epoch 2/2: 100%|██████████| 400/400 [00:57<00:00,  6.92it/s, mse=0.844]


Epoch 2: Val RMSE=0.9429 | Val MAE=0.7283
  Saved(best): /content/drive/MyDrive/Penelitian/uas/models/best_NCF.pt
NCF emb=32 dims=(128, 64) dp=0.2 lr=0.005 wd=1e-06 -> Val RMSE=0.9429


[Train NCF] Epoch 1/2: 100%|██████████| 400/400 [01:01<00:00,  6.50it/s, mse=0.894]


Epoch 1: Val RMSE=0.9588 | Val MAE=0.7417
  Saved(best): /content/drive/MyDrive/Penelitian/uas/models/best_NCF.pt


[Train NCF] Epoch 2/2: 100%|██████████| 400/400 [00:57<00:00,  6.92it/s, mse=0.855]


Epoch 2: Val RMSE=0.9419 | Val MAE=0.7290
  Saved(best): /content/drive/MyDrive/Penelitian/uas/models/best_NCF.pt
NCF emb=32 dims=(128, 64) dp=0.2 lr=0.005 wd=1e-05 -> Val RMSE=0.9419


[Train NCF] Epoch 1/2: 100%|██████████| 400/400 [01:00<00:00,  6.59it/s, mse=0.925]


Epoch 1: Val RMSE=0.9669 | Val MAE=0.7558
  Saved(best): /content/drive/MyDrive/Penelitian/uas/models/best_NCF.pt


[Train NCF] Epoch 2/2: 100%|██████████| 400/400 [01:04<00:00,  6.21it/s, mse=0.871]


Epoch 2: Val RMSE=0.9508 | Val MAE=0.7421
  Saved(best): /content/drive/MyDrive/Penelitian/uas/models/best_NCF.pt
NCF emb=32 dims=(128, 64) dp=0.3 lr=0.003 wd=1e-06 -> Val RMSE=0.9508


[Train NCF] Epoch 1/2: 100%|██████████| 400/400 [01:02<00:00,  6.43it/s, mse=0.883]


Epoch 1: Val RMSE=0.9654 | Val MAE=0.7541
  Saved(best): /content/drive/MyDrive/Penelitian/uas/models/best_NCF.pt


[Train NCF] Epoch 2/2: 100%|██████████| 400/400 [00:58<00:00,  6.85it/s, mse=0.851]


Epoch 2: Val RMSE=0.9438 | Val MAE=0.7332
  Saved(best): /content/drive/MyDrive/Penelitian/uas/models/best_NCF.pt
NCF emb=32 dims=(128, 64) dp=0.3 lr=0.003 wd=1e-05 -> Val RMSE=0.9438


[Train NCF] Epoch 1/2: 100%|██████████| 400/400 [00:59<00:00,  6.70it/s, mse=0.983]


Epoch 1: Val RMSE=0.9711 | Val MAE=0.7622
  Saved(best): /content/drive/MyDrive/Penelitian/uas/models/best_NCF.pt


[Train NCF] Epoch 2/2: 100%|██████████| 400/400 [00:58<00:00,  6.80it/s, mse=0.905]


Epoch 2: Val RMSE=0.9436 | Val MAE=0.7329
  Saved(best): /content/drive/MyDrive/Penelitian/uas/models/best_NCF.pt
NCF emb=32 dims=(128, 64) dp=0.3 lr=0.005 wd=1e-06 -> Val RMSE=0.9436


[Train NCF] Epoch 1/2: 100%|██████████| 400/400 [01:09<00:00,  5.79it/s, mse=0.943]


Epoch 1: Val RMSE=0.9712 | Val MAE=0.7639
  Saved(best): /content/drive/MyDrive/Penelitian/uas/models/best_NCF.pt


[Train NCF] Epoch 2/2: 100%|██████████| 400/400 [01:06<00:00,  6.01it/s, mse=0.913]


Epoch 2: Val RMSE=0.9508 | Val MAE=0.7426
  Saved(best): /content/drive/MyDrive/Penelitian/uas/models/best_NCF.pt
NCF emb=32 dims=(128, 64) dp=0.3 lr=0.005 wd=1e-05 -> Val RMSE=0.9508


[Train NCF] Epoch 1/2: 100%|██████████| 400/400 [01:22<00:00,  4.84it/s, mse=0.88]


Epoch 1: Val RMSE=0.9671 | Val MAE=0.7550
  Saved(best): /content/drive/MyDrive/Penelitian/uas/models/best_NCF.pt


[Train NCF] Epoch 2/2: 100%|██████████| 400/400 [01:17<00:00,  5.15it/s, mse=0.851]


Epoch 2: Val RMSE=0.9458 | Val MAE=0.7405
  Saved(best): /content/drive/MyDrive/Penelitian/uas/models/best_NCF.pt
NCF emb=32 dims=(256, 128, 64) dp=0.2 lr=0.003 wd=1e-06 -> Val RMSE=0.9458


[Train NCF] Epoch 1/2: 100%|██████████| 400/400 [01:10<00:00,  5.68it/s, mse=0.94]


Epoch 1: Val RMSE=0.9630 | Val MAE=0.7516
  Saved(best): /content/drive/MyDrive/Penelitian/uas/models/best_NCF.pt


[Train NCF] Epoch 2/2: 100%|██████████| 400/400 [01:07<00:00,  5.94it/s, mse=0.901]


Epoch 2: Val RMSE=0.9419 | Val MAE=0.7335
  Saved(best): /content/drive/MyDrive/Penelitian/uas/models/best_NCF.pt
NCF emb=32 dims=(256, 128, 64) dp=0.2 lr=0.003 wd=1e-05 -> Val RMSE=0.9419


[Train NCF] Epoch 1/2: 100%|██████████| 400/400 [01:12<00:00,  5.49it/s, mse=0.9]


Epoch 1: Val RMSE=0.9632 | Val MAE=0.7537
  Saved(best): /content/drive/MyDrive/Penelitian/uas/models/best_NCF.pt


[Train NCF] Epoch 2/2: 100%|██████████| 400/400 [01:07<00:00,  5.89it/s, mse=0.842]


Epoch 2: Val RMSE=0.9398 | Val MAE=0.7267
  Saved(best): /content/drive/MyDrive/Penelitian/uas/models/best_NCF.pt
NCF emb=32 dims=(256, 128, 64) dp=0.2 lr=0.005 wd=1e-06 -> Val RMSE=0.9398


[Train NCF] Epoch 1/2: 100%|██████████| 400/400 [01:12<00:00,  5.50it/s, mse=0.88]


Epoch 1: Val RMSE=0.9656 | Val MAE=0.7582
  Saved(best): /content/drive/MyDrive/Penelitian/uas/models/best_NCF.pt


[Train NCF] Epoch 2/2: 100%|██████████| 400/400 [01:08<00:00,  5.86it/s, mse=0.838]


Epoch 2: Val RMSE=0.9427 | Val MAE=0.7303
  Saved(best): /content/drive/MyDrive/Penelitian/uas/models/best_NCF.pt
NCF emb=32 dims=(256, 128, 64) dp=0.2 lr=0.005 wd=1e-05 -> Val RMSE=0.9427


[Train NCF] Epoch 1/2: 100%|██████████| 400/400 [01:11<00:00,  5.57it/s, mse=0.973]


Epoch 1: Val RMSE=0.9706 | Val MAE=0.7644
  Saved(best): /content/drive/MyDrive/Penelitian/uas/models/best_NCF.pt


[Train NCF] Epoch 2/2: 100%|██████████| 400/400 [01:09<00:00,  5.80it/s, mse=0.916]


Epoch 2: Val RMSE=0.9446 | Val MAE=0.7367
  Saved(best): /content/drive/MyDrive/Penelitian/uas/models/best_NCF.pt
NCF emb=32 dims=(256, 128, 64) dp=0.3 lr=0.003 wd=1e-06 -> Val RMSE=0.9446


[Train NCF] Epoch 1/2: 100%|██████████| 400/400 [01:11<00:00,  5.58it/s, mse=0.988]


Epoch 1: Val RMSE=0.9691 | Val MAE=0.7601
  Saved(best): /content/drive/MyDrive/Penelitian/uas/models/best_NCF.pt


[Train NCF] Epoch 2/2: 100%|██████████| 400/400 [01:08<00:00,  5.86it/s, mse=0.915]


Epoch 2: Val RMSE=0.9473 | Val MAE=0.7390
  Saved(best): /content/drive/MyDrive/Penelitian/uas/models/best_NCF.pt
NCF emb=32 dims=(256, 128, 64) dp=0.3 lr=0.003 wd=1e-05 -> Val RMSE=0.9473


[Train NCF] Epoch 1/2: 100%|██████████| 400/400 [01:10<00:00,  5.69it/s, mse=0.968]


Epoch 1: Val RMSE=0.9637 | Val MAE=0.7547
  Saved(best): /content/drive/MyDrive/Penelitian/uas/models/best_NCF.pt


[Train NCF] Epoch 2/2: 100%|██████████| 400/400 [01:07<00:00,  5.89it/s, mse=0.885]


Epoch 2: Val RMSE=0.9477 | Val MAE=0.7376
  Saved(best): /content/drive/MyDrive/Penelitian/uas/models/best_NCF.pt
NCF emb=32 dims=(256, 128, 64) dp=0.3 lr=0.005 wd=1e-06 -> Val RMSE=0.9477


[Train NCF] Epoch 1/2: 100%|██████████| 400/400 [01:10<00:00,  5.67it/s, mse=0.953]


Epoch 1: Val RMSE=0.9655 | Val MAE=0.7539
  Saved(best): /content/drive/MyDrive/Penelitian/uas/models/best_NCF.pt


[Train NCF] Epoch 2/2: 100%|██████████| 400/400 [01:08<00:00,  5.84it/s, mse=0.852]


Epoch 2: Val RMSE=0.9449 | Val MAE=0.7365
  Saved(best): /content/drive/MyDrive/Penelitian/uas/models/best_NCF.pt
NCF emb=32 dims=(256, 128, 64) dp=0.3 lr=0.005 wd=1e-05 -> Val RMSE=0.9449


[Train NCF] Epoch 1/2: 100%|██████████| 400/400 [01:32<00:00,  4.32it/s, mse=0.912]


Epoch 1: Val RMSE=0.9653 | Val MAE=0.7543
  Saved(best): /content/drive/MyDrive/Penelitian/uas/models/best_NCF.pt


[Train NCF] Epoch 2/2: 100%|██████████| 400/400 [01:25<00:00,  4.65it/s, mse=0.877]


Epoch 2: Val RMSE=0.9472 | Val MAE=0.7398
  Saved(best): /content/drive/MyDrive/Penelitian/uas/models/best_NCF.pt
NCF emb=64 dims=(128, 64) dp=0.2 lr=0.003 wd=1e-06 -> Val RMSE=0.9472


[Train NCF] Epoch 1/2: 100%|██████████| 400/400 [01:31<00:00,  4.35it/s, mse=0.95]


Epoch 1: Val RMSE=0.9710 | Val MAE=0.7615
  Saved(best): /content/drive/MyDrive/Penelitian/uas/models/best_NCF.pt


[Train NCF] Epoch 2/2: 100%|██████████| 400/400 [01:26<00:00,  4.63it/s, mse=0.889]


Epoch 2: Val RMSE=0.9480 | Val MAE=0.7374
  Saved(best): /content/drive/MyDrive/Penelitian/uas/models/best_NCF.pt
NCF emb=64 dims=(128, 64) dp=0.2 lr=0.003 wd=1e-05 -> Val RMSE=0.9480


[Train NCF] Epoch 1/2: 100%|██████████| 400/400 [01:37<00:00,  4.11it/s, mse=0.903]


Epoch 1: Val RMSE=0.9702 | Val MAE=0.7598
  Saved(best): /content/drive/MyDrive/Penelitian/uas/models/best_NCF.pt


[Train NCF] Epoch 2/2: 100%|██████████| 400/400 [01:32<00:00,  4.32it/s, mse=0.858]


Epoch 2: Val RMSE=0.9437 | Val MAE=0.7317
  Saved(best): /content/drive/MyDrive/Penelitian/uas/models/best_NCF.pt
NCF emb=64 dims=(128, 64) dp=0.2 lr=0.005 wd=1e-06 -> Val RMSE=0.9437


[Train NCF] Epoch 1/2: 100%|██████████| 400/400 [01:39<00:00,  4.03it/s, mse=0.929]


Epoch 1: Val RMSE=0.9675 | Val MAE=0.7568
  Saved(best): /content/drive/MyDrive/Penelitian/uas/models/best_NCF.pt


[Train NCF] Epoch 2/2: 100%|██████████| 400/400 [01:33<00:00,  4.26it/s, mse=0.849]


Epoch 2: Val RMSE=0.9407 | Val MAE=0.7314
  Saved(best): /content/drive/MyDrive/Penelitian/uas/models/best_NCF.pt
NCF emb=64 dims=(128, 64) dp=0.2 lr=0.005 wd=1e-05 -> Val RMSE=0.9407


[Train NCF] Epoch 1/2: 100%|██████████| 400/400 [01:40<00:00,  3.98it/s, mse=0.992]


Epoch 1: Val RMSE=0.9733 | Val MAE=0.7656
  Saved(best): /content/drive/MyDrive/Penelitian/uas/models/best_NCF.pt


[Train NCF] Epoch 2/2: 100%|██████████| 400/400 [01:33<00:00,  4.27it/s, mse=0.951]


Epoch 2: Val RMSE=0.9518 | Val MAE=0.7420
  Saved(best): /content/drive/MyDrive/Penelitian/uas/models/best_NCF.pt
NCF emb=64 dims=(128, 64) dp=0.3 lr=0.003 wd=1e-06 -> Val RMSE=0.9518


[Train NCF] Epoch 1/2: 100%|██████████| 400/400 [01:33<00:00,  4.29it/s, mse=1.04]


Epoch 1: Val RMSE=0.9694 | Val MAE=0.7609
  Saved(best): /content/drive/MyDrive/Penelitian/uas/models/best_NCF.pt


[Train NCF] Epoch 2/2: 100%|██████████| 400/400 [01:13<00:00,  5.45it/s, mse=0.926]


Epoch 2: Val RMSE=0.9463 | Val MAE=0.7339
  Saved(best): /content/drive/MyDrive/Penelitian/uas/models/best_NCF.pt
NCF emb=64 dims=(128, 64) dp=0.3 lr=0.003 wd=1e-05 -> Val RMSE=0.9463


[Train NCF] Epoch 1/2: 100%|██████████| 400/400 [01:34<00:00,  4.25it/s, mse=0.956]


Epoch 1: Val RMSE=0.9685 | Val MAE=0.7587
  Saved(best): /content/drive/MyDrive/Penelitian/uas/models/best_NCF.pt


[Train NCF] Epoch 2/2: 100%|██████████| 400/400 [01:27<00:00,  4.55it/s, mse=0.902]


Epoch 2: Val RMSE=0.9485 | Val MAE=0.7382
  Saved(best): /content/drive/MyDrive/Penelitian/uas/models/best_NCF.pt
NCF emb=64 dims=(128, 64) dp=0.3 lr=0.005 wd=1e-06 -> Val RMSE=0.9485


[Train NCF] Epoch 1/2: 100%|██████████| 400/400 [01:34<00:00,  4.24it/s, mse=0.992]


Epoch 1: Val RMSE=0.9695 | Val MAE=0.7618
  Saved(best): /content/drive/MyDrive/Penelitian/uas/models/best_NCF.pt


[Train NCF] Epoch 2/2: 100%|██████████| 400/400 [01:26<00:00,  4.61it/s, mse=0.894]


Epoch 2: Val RMSE=0.9524 | Val MAE=0.7479
  Saved(best): /content/drive/MyDrive/Penelitian/uas/models/best_NCF.pt
NCF emb=64 dims=(128, 64) dp=0.3 lr=0.005 wd=1e-05 -> Val RMSE=0.9524


[Train NCF] Epoch 1/2: 100%|██████████| 400/400 [01:52<00:00,  3.56it/s, mse=0.948]


Epoch 1: Val RMSE=0.9768 | Val MAE=0.7688
  Saved(best): /content/drive/MyDrive/Penelitian/uas/models/best_NCF.pt


[Train NCF] Epoch 2/2: 100%|██████████| 400/400 [01:43<00:00,  3.85it/s, mse=0.905]


Epoch 2: Val RMSE=0.9455 | Val MAE=0.7368
  Saved(best): /content/drive/MyDrive/Penelitian/uas/models/best_NCF.pt
NCF emb=64 dims=(256, 128, 64) dp=0.2 lr=0.003 wd=1e-06 -> Val RMSE=0.9455


[Train NCF] Epoch 1/2: 100%|██████████| 400/400 [01:39<00:00,  4.03it/s, mse=0.894]


Epoch 1: Val RMSE=0.9645 | Val MAE=0.7520
  Saved(best): /content/drive/MyDrive/Penelitian/uas/models/best_NCF.pt


[Train NCF] Epoch 2/2: 100%|██████████| 400/400 [01:32<00:00,  4.31it/s, mse=0.807]


Epoch 2: Val RMSE=0.9442 | Val MAE=0.7367
  Saved(best): /content/drive/MyDrive/Penelitian/uas/models/best_NCF.pt
NCF emb=64 dims=(256, 128, 64) dp=0.2 lr=0.003 wd=1e-05 -> Val RMSE=0.9442


[Train NCF] Epoch 1/2: 100%|██████████| 400/400 [01:37<00:00,  4.08it/s, mse=0.885]


Epoch 1: Val RMSE=0.9580 | Val MAE=0.7413
  Saved(best): /content/drive/MyDrive/Penelitian/uas/models/best_NCF.pt


[Train NCF] Epoch 2/2: 100%|██████████| 400/400 [01:33<00:00,  4.29it/s, mse=0.843]


Epoch 2: Val RMSE=0.9388 | Val MAE=0.7221
  Saved(best): /content/drive/MyDrive/Penelitian/uas/models/best_NCF.pt
NCF emb=64 dims=(256, 128, 64) dp=0.2 lr=0.005 wd=1e-06 -> Val RMSE=0.9388


[Train NCF] Epoch 1/2: 100%|██████████| 400/400 [01:53<00:00,  3.53it/s, mse=0.962]


Epoch 1: Val RMSE=0.9659 | Val MAE=0.7587
  Saved(best): /content/drive/MyDrive/Penelitian/uas/models/best_NCF.pt


[Train NCF] Epoch 2/2: 100%|██████████| 400/400 [01:45<00:00,  3.80it/s, mse=0.868]


Epoch 2: Val RMSE=0.9411 | Val MAE=0.7209
  Saved(best): /content/drive/MyDrive/Penelitian/uas/models/best_NCF.pt
NCF emb=64 dims=(256, 128, 64) dp=0.2 lr=0.005 wd=1e-05 -> Val RMSE=0.9411


[Train NCF] Epoch 1/2: 100%|██████████| 400/400 [01:52<00:00,  3.57it/s, mse=1.01]


Epoch 1: Val RMSE=0.9743 | Val MAE=0.7666
  Saved(best): /content/drive/MyDrive/Penelitian/uas/models/best_NCF.pt


[Train NCF] Epoch 2/2: 100%|██████████| 400/400 [01:46<00:00,  3.74it/s, mse=0.943]


Epoch 2: Val RMSE=0.9486 | Val MAE=0.7389
  Saved(best): /content/drive/MyDrive/Penelitian/uas/models/best_NCF.pt
NCF emb=64 dims=(256, 128, 64) dp=0.3 lr=0.003 wd=1e-06 -> Val RMSE=0.9486


[Train NCF] Epoch 1/2: 100%|██████████| 400/400 [01:51<00:00,  3.59it/s, mse=0.971]


Epoch 1: Val RMSE=0.9683 | Val MAE=0.7578
  Saved(best): /content/drive/MyDrive/Penelitian/uas/models/best_NCF.pt


[Train NCF] Epoch 2/2: 100%|██████████| 400/400 [01:45<00:00,  3.79it/s, mse=0.957]


Epoch 2: Val RMSE=0.9430 | Val MAE=0.7288
  Saved(best): /content/drive/MyDrive/Penelitian/uas/models/best_NCF.pt
NCF emb=64 dims=(256, 128, 64) dp=0.3 lr=0.003 wd=1e-05 -> Val RMSE=0.9430


[Train NCF] Epoch 1/2: 100%|██████████| 400/400 [01:50<00:00,  3.63it/s, mse=0.957]


Epoch 1: Val RMSE=0.9573 | Val MAE=0.7428
  Saved(best): /content/drive/MyDrive/Penelitian/uas/models/best_NCF.pt


[Train NCF] Epoch 2/2: 100%|██████████| 400/400 [01:38<00:00,  4.05it/s, mse=0.841]


Epoch 2: Val RMSE=0.9422 | Val MAE=0.7280
  Saved(best): /content/drive/MyDrive/Penelitian/uas/models/best_NCF.pt
NCF emb=64 dims=(256, 128, 64) dp=0.3 lr=0.005 wd=1e-06 -> Val RMSE=0.9422


[Train NCF] Epoch 1/2: 100%|██████████| 400/400 [01:51<00:00,  3.58it/s, mse=0.943]


Epoch 1: Val RMSE=0.9698 | Val MAE=0.7584
  Saved(best): /content/drive/MyDrive/Penelitian/uas/models/best_NCF.pt


[Train NCF] Epoch 2/2: 100%|██████████| 400/400 [01:45<00:00,  3.79it/s, mse=0.807]


Epoch 2: Val RMSE=0.9409 | Val MAE=0.7290
  Saved(best): /content/drive/MyDrive/Penelitian/uas/models/best_NCF.pt
NCF emb=64 dims=(256, 128, 64) dp=0.3 lr=0.005 wd=1e-05 -> Val RMSE=0.9409
Best NCF cfg: {'emb_dim': 64, 'mlp_dims': (256, 128, 64), 'dropout': 0.2, 'lr': 0.005, 'wd': 1e-06}


In [10]:
ncf_params = best_cfg.get('NCF', {'emb_dim':64, 'mlp_dims':(256,128,64), 'dropout':0.3, 'lr':5e-3, 'wd':1e-5})

with open(os.path.join(MODEL_DIR, 'best_hparams.json'), 'w') as f:
    json.dump({'MF': mf_params, 'NCF': ncf_params}, f, indent=2)

MF_EPOCHS  = 4
NCF_EPOCHS = 4

mf = MF(n_users, n_items, emb_dim=mf_params['emb_dim'], bias=True)
print('Final MF:', mf_params)

best_mf_path = train_explicit(mf, train_loader, val_loader, epochs=MF_EPOCHS, lr=mf_params['lr'], wd=mf_params['wd'], clip=5.0, patience=2, clip_range=(R_MIN, R_MAX), tag='MF_final')
ckpt = torch.load(best_mf_path, map_location='cpu')
mf.load_state_dict(ckpt['model_state_dict'])
rmse_test_mf, mae_test_mf = evaluate_explicit(mf.to(DEVICE), test_loader, clip_range=(R_MIN, R_MAX))
print(f"[MF] Test RMSE={rmse_test_mf:.4f} | Test MAE={mae_test_mf:.4f}")
final_mf_path = os.path.join(MODEL_DIR, 'final_MF.pt')
torch.save({'model_state_dict': mf.state_dict(), 'test_rmse': rmse_test_mf, 'test_mae': mae_test_mf, 'hparams': mf_params}, final_mf_path)
print('Saved:', final_mf_path)

ncf = NCF(n_users, n_items, emb_dim=ncf_params['emb_dim'], mlp_dims=tuple(ncf_params['mlp_dims']), dropout=ncf_params['dropout'])
print('Final NCF:', ncf_params)

best_ncf_path = train_explicit(ncf, train_loader, val_loader, epochs=NCF_EPOCHS, lr=ncf_params['lr'], wd=ncf_params['wd'], clip=5.0, patience=2, clip_range=(R_MIN, R_MAX), tag='NCF_final')
ckpt = torch.load(best_ncf_path, map_location='cpu')
ncf.load_state_dict(ckpt['model_state_dict'])
rmse_test_ncf, mae_test_ncf = evaluate_explicit(ncf.to(DEVICE), test_loader, clip_range=(R_MIN, R_MAX))
print(f"[NCF] Test RMSE={rmse_test_ncf:.4f} | Test MAE={mae_test_ncf:.4f}")
final_ncf_path = os.path.join(MODEL_DIR, 'final_NCF.pt')
torch.save({'model_state_dict': ncf.state_dict(), 'test_rmse': rmse_test_ncf, 'test_mae': mae_test_ncf, 'hparams': ncf_params}, final_ncf_path)
print('Saved:', final_ncf_path)

WRITER.close()


Final MF: {'emb_dim': 128, 'lr': 0.005, 'wd': 0.0001}


[Train MF] Epoch 1/4: 100%|██████████| 4816/4816 [26:34<00:00,  3.02it/s, mse=0.768]


Epoch 1: Val RMSE=0.9619 | Val MAE=0.7384
  Saved(best): /content/drive/MyDrive/Penelitian/uas/models/best_MF.pt


[Train MF] Epoch 2/4: 100%|██████████| 4816/4816 [25:17<00:00,  3.17it/s, mse=0.73]


Epoch 2: Val RMSE=0.9671 | Val MAE=0.7390


[Train MF] Epoch 3/4: 100%|██████████| 4816/4816 [24:59<00:00,  3.21it/s, mse=0.642]


Epoch 3: Val RMSE=0.9834 | Val MAE=0.7500
  Early stopping!
[MF] Test RMSE=0.9870 | Test MAE=0.7582
Saved: /content/drive/MyDrive/Penelitian/uas/models/final_MF.pt
Final NCF: {'emb_dim': 64, 'mlp_dims': (256, 128, 64), 'dropout': 0.2, 'lr': 0.005, 'wd': 1e-06}


[Train NCF] Epoch 1/4: 100%|██████████| 4816/4816 [20:04<00:00,  4.00it/s, mse=0.698]


Epoch 1: Val RMSE=0.8897 | Val MAE=0.6860
  Saved(best): /content/drive/MyDrive/Penelitian/uas/models/best_NCF.pt


[Train NCF] Epoch 2/4: 100%|██████████| 4816/4816 [19:36<00:00,  4.09it/s, mse=0.727]


Epoch 2: Val RMSE=0.8737 | Val MAE=0.6735
  Saved(best): /content/drive/MyDrive/Penelitian/uas/models/best_NCF.pt


[Train NCF] Epoch 3/4: 100%|██████████| 4816/4816 [19:45<00:00,  4.06it/s, mse=0.649]


Epoch 3: Val RMSE=0.8650 | Val MAE=0.6630
  Saved(best): /content/drive/MyDrive/Penelitian/uas/models/best_NCF.pt


[Train NCF] Epoch 4/4: 100%|██████████| 4816/4816 [19:06<00:00,  4.20it/s, mse=0.563]


Epoch 4: Val RMSE=0.8583 | Val MAE=0.6529
  Saved(best): /content/drive/MyDrive/Penelitian/uas/models/best_NCF.pt
[NCF] Test RMSE=0.8759 | Test MAE=0.6682
Saved: /content/drive/MyDrive/Penelitian/uas/models/final_NCF.pt


In [11]:
USE_IMPLICIT = True
POS_THRESHOLD = 4.0
NEG_PER_POS = 4
K_METRIC = 10

class ImplicitDataset(Dataset):
    def __init__(self, df_train_all, n_items, pos_threshold=4.0, neg_per_pos=4):
        self.n_items = n_items
        pos = df_train_all[df_train_all['rating'] >= pos_threshold]
        self.user_pos = pos.groupby('u')['i'].apply(set).to_dict()
        rows_u, rows_i, rows_y = [], [], []
        for u, items in tqdm(self.user_pos.items(), desc='NegSampling'):
            for it in items:
                rows_u.append(u); rows_i.append(it); rows_y.append(1.0)
                for _ in range(neg_per_pos):
                    while True:
                        ni = np.random.randint(0, n_items)
                        if ni not in items:
                            rows_u.append(u); rows_i.append(ni); rows_y.append(0.0)
                            break
        self.u = torch.tensor(rows_u, dtype=torch.long)
        self.i = torch.tensor(rows_i, dtype=torch.long)
        self.y = torch.tensor(rows_y, dtype=torch.float32)
    def __len__(self):
        return len(self.y)
    def __getitem__(self, idx):
        return self.u[idx], self.i[idx], self.y[idx]

class NCFImplicit(nn.Module):
    def __init__(self, n_users, n_items, emb_dim=64, mlp_dims=(256,128), dropout=0.3):
        super().__init__()
        self.user_emb = nn.Embedding(n_users, emb_dim)
        self.item_emb = nn.Embedding(n_items, emb_dim)
        nn.init.normal_(self.user_emb.weight, std=0.01)
        nn.init.normal_(self.item_emb.weight, std=0.01)
        layers = []
        in_dim = emb_dim*2
        for h in mlp_dims:
            layers.append(nn.Linear(in_dim, h))
            layers.append(nn.ReLU())
            layers.append(nn.Dropout(dropout))
            in_dim = h
        layers.append(nn.Linear(in_dim, 1))
        self.mlp = nn.Sequential(*layers)
    def forward(self, u, i):
        ue = self.user_emb(u)
        ie = self.item_emb(i)
        x = torch.cat([ue, ie], dim=1)
        logit = self.mlp(x).squeeze(1)
        return logit

@torch.no_grad()
def recall_ndcg_at_k(model, user_pos_dict, all_items, k=10):
    model.eval(); model.to(DEVICE)
    recalls, ndcgs = [], []
    for u, pos_items in tqdm(user_pos_dict.items(), desc=f'Metrics@{k}'):
        if len(pos_items) == 0:
            continue
        u_tensor = torch.tensor([u]*len(all_items), dtype=torch.long, device=DEVICE)
        i_tensor = torch.tensor(all_items, dtype=torch.long, device=DEVICE)
        scores = model(u_tensor, i_tensor).detach().cpu().numpy()
        topk_idx = np.argpartition(scores, -k)[-k:]
        rec_list = set(all_items[topk_idx])
        hit = len(rec_list & pos_items)
        recalls.append(hit / min(k, len(pos_items)))
        # NDCG
        sorted_topk = topk_idx[np.argsort(scores[topk_idx])[::-1]]
        dcg = 0.0; idcg = 0.0
        gains = [1.0 if all_items[idx] in pos_items else 0.0 for idx in sorted_topk]
        for rank, g in enumerate(gains, start=1):
            dcg += g / math.log2(rank+1)
        sorted_true = [1.0]*min(len(pos_items), k) + [0.0]*max(0, k-len(pos_items))
        for rank, g in enumerate(sorted_true, start=1):
            idcg += g / math.log2(rank+1)
        ndcgs.append(dcg / idcg if idcg>0 else 0.0)
    return float(np.mean(recalls)) if recalls else 0.0, float(np.mean(ndcgs)) if ndcgs else 0.0

if USE_IMPLICIT:
    impl_ds = ImplicitDataset(train_df, n_items, pos_threshold=POS_THRESHOLD, neg_per_pos=NEG_PER_POS)
    impl_loader = DataLoader(impl_ds, batch_size=8192, shuffle=True, num_workers=2, pin_memory=True)
    model_impl = NCFImplicit(n_users, n_items, emb_dim=64, mlp_dims=(256,128), dropout=0.3).to(DEVICE)
    opt = torch.optim.AdamW(model_impl.parameters(), lr=3e-3, weight_decay=1e-5)
    bce = nn.BCEWithLogitsLoss()
    writer_imp = SummaryWriter(log_dir=os.path.join(RUNS_DIR, RUN_NAME + '_implicit'))
    EPOCHS_IMPL = 3
    for ep in range(EPOCHS_IMPL):
        model_impl.train(); running=0.0; steps=0
        pbar = tqdm(impl_loader, desc=f"[Implicit NCF] Epoch {ep+1}/{EPOCHS_IMPL}")
        for u,i,y in pbar:
            u=u.to(DEVICE); i=i.to(DEVICE); y=y.to(DEVICE)
            logit = model_impl(u,i)
            loss = bce(logit, y)
            opt.zero_grad(); loss.backward(); opt.step()
            running += float(loss.detach().cpu()); steps+=1
            pbar.set_postfix({'bce': float(loss.detach().cpu())})
        writer_imp.add_scalar('implicit/train_bce', running/max(1,steps), ep+1)
    pos = train_df[train_df['rating'] >= POS_THRESHOLD]
    user_pos = pos.groupby('u')['i'].apply(set).to_dict()
    all_items = np.arange(n_items, dtype=np.int32)
    rec, ndcg = recall_ndcg_at_k(model_impl, user_pos, all_items, k=K_METRIC)
    print(f"[Implicit NCF] Recall@{K_METRIC}={rec:.4f} | NDCG@{K_METRIC}={ndcg:.4f}")
    torch.save({'model_state_dict': model_impl.state_dict(), 'Recall@K': rec, 'NDCG@K': ndcg}, os.path.join(MODEL_DIR,'final_NCF_implicit.pt'))
    writer_imp.add_scalar(f'implicit/Recall@{K_METRIC}', rec, 0)
    writer_imp.add_scalar(f'implicit/NDCG@{K_METRIC}', ndcg, 0)
    writer_imp.close()


NegSampling: 100%|██████████| 138238/138238 [02:20<00:00, 982.68it/s] 
[Implicit NCF] Epoch 1/3: 100%|██████████| 6004/6004 [38:42<00:00,  2.59it/s, bce=0.145]
[Implicit NCF] Epoch 2/3: 100%|██████████| 6004/6004 [38:15<00:00,  2.62it/s, bce=0.121]
[Implicit NCF] Epoch 3/3: 100%|██████████| 6004/6004 [38:29<00:00,  2.60it/s, bce=0.115]
Metrics@10: 100%|██████████| 138238/138238 [4:25:24<00:00,  8.68it/s]


[Implicit NCF] Recall@10=0.4324 | NDCG@10=0.4482


In [14]:
metrics_path = os.path.join(MODEL_DIR, 'metrics.csv')
metrics_df = pd.DataFrame([
    {'model':'MF',  'test_rmse': rmse_test_mf,  'test_mae': mae_test_mf},
    {'model':'NCF', 'test_rmse': rmse_test_ncf, 'test_mae': mae_test_ncf},
])
metrics_df.to_csv(metrics_path, index=False)
print('Saved metrics ->', metrics_path)

best_model_name = 'MF' if rmse_test_mf <= rmse_test_ncf else 'NCF'
best_model = mf if best_model_name=='MF' else ncf
print('Best model by RMSE:', best_model_name)

watched_train = train_df.groupby('u')['i'].apply(set).to_dict()

@torch.no_grad()
def topn_for_user(u_raw, topk=10):
    u = user2idx.get(u_raw, None)
    if u is None:
        raise ValueError('User raw id tidak ditemukan dalam mapping!')
    best_model.eval(); best_model.to(DEVICE)
    seen = watched_train.get(u, set())
    candidates = np.array([it for it in range(n_items) if it not in seen], dtype=np.int64)
    if len(candidates) == 0:
        return pd.DataFrame(columns=['movieid','title','genres','score'])
    u_tensor = torch.tensor([u]*len(candidates), dtype=torch.long, device=DEVICE)
    i_tensor = torch.tensor(candidates, dtype=torch.long, device=DEVICE)
    scores = best_model(u_tensor, i_tensor).detach().cpu().numpy()
    scores = np.clip(scores, R_MIN, R_MAX)
    top_idx = np.argpartition(scores, -topk)[-topk:]
    top_items = candidates[top_idx]
    top_scores = scores[top_idx]
    inv_item = {v:k for k,v in item2idx.items()}
    top_movieids = [inv_item[int(i)] for i in top_items]
    rec_df = pd.DataFrame({'movieid': top_movieids, 'score': top_scores})
    rec_df = rec_df.merge(movies[['movieid','title','genres']], on='movieid', how='left').sort_values('score', ascending=False).reset_index(drop=True)
    return rec_df[['movieid','title','genres','score']]


sample_raw_user = int(ratings['userid'].sample(1, random_state=SEED).iloc[0])
print('Sample user raw id:', sample_raw_user)
display(topn_for_user(sample_raw_user, topk=10))


Saved metrics -> /content/drive/MyDrive/Penelitian/uas/models/metrics.csv
Best model by RMSE: NCF
Sample user raw id: 122270


Unnamed: 0,movieid,title,genres,score
0,86237,Connections (1978),Documentary,4.38904
1,7502,Band of Brothers (2001),Action|Drama|War,4.351603
2,858,"Godfather, The (1972)",Crime|Drama,4.329669
3,318,"Shawshank Redemption, The (1994)",Crime|Drama,4.318628
4,1221,"Godfather: Part II, The (1974)",Crime|Drama,4.211534
5,77658,Cosmos (1980),Documentary,4.207386
6,27050,Bang Boom Bang - Ein todsicheres Ding (1999),Action|Comedy,4.170518
7,110324,"Missing Picture, The (L'image manquante) (2013)",Documentary,4.131399
8,113315,Zero Motivation (Efes beyahasei enosh) (2014),Comedy|Drama,4.094485
9,260,Star Wars: Episode IV - A New Hope (1977),Action|Adventure|Sci-Fi,4.075636


In [13]:
print("==== RINGKASAN ====")
print(f"MF:   Test RMSE={rmse_test_mf:.4f} | Test MAE={mae_test_mf:.4f}")
print(f"NCF:  Test RMSE={rmse_test_ncf:.4f} | Test MAE={mae_test_ncf:.4f}")
print("Model & artefak tersimpan di:")
print(final_mf_path)
print(final_ncf_path)
print(os.path.join(MODEL_DIR, 'best_hparams.json'))
print(metrics_path)
print('EDA figs dir ->', FIG_DIR)
print("Selesai.")

==== RINGKASAN ====
MF:   Test RMSE=0.9870 | Test MAE=0.7582
NCF:  Test RMSE=0.8759 | Test MAE=0.6682
Model & artefak tersimpan di:
/content/drive/MyDrive/Penelitian/uas/models/final_MF.pt
/content/drive/MyDrive/Penelitian/uas/models/final_NCF.pt
/content/drive/MyDrive/Penelitian/uas/models/best_hparams.json
/content/drive/MyDrive/Penelitian/uas/models/metrics.csv
EDA figs dir -> /content/drive/MyDrive/Penelitian/uas/models/figs
Selesai.


In [3]:
# CELL 14 — Rebuild artefak untuk UI (tanpa training ulang)

import os, glob, json
import numpy as np
import pandas as pd
try:
    from google.colab import drive
    drive.mount('/content/drive')
except Exception:
    pass

BASE_CANDIDATES = [
    '/content/drive/MyDrive/Penelitian/uas',
]
def find_file(bases, fname):
    for b in bases:
        if os.path.exists(b):
            hits = glob.glob(os.path.join(b, '**', fname), recursive=True)
            if hits:
                return sorted(hits, key=len)[0]
    return None

paths = {}
for f in ['ratings.csv','movies.csv','metrics.csv','final_MF.pt','final_NCF.pt']:
    p = find_file(BASE_CANDIDATES, f)
    if p is None and f in ['final_MF.pt','final_NCF.pt']:
        # boleh tidak ada salah satu model
        print(f"Warning: {f} belum ditemukan (tidak masalah bila cuma punya satu model).")
    elif p is None:
        raise FileNotFoundError(f"Gagal menemukan {f} di {BASE_CANDIDATES}")
    paths[f] = p

print("== Paths ==")
for k,v in paths.items():
    print(f"{k}: {v}")

BASE_DIR  = os.path.dirname(paths['ratings.csv'])
MODEL_DIR = os.path.join(BASE_DIR, 'models')
FIG_DIR   = os.path.join(MODEL_DIR, 'figs')
os.makedirs(MODEL_DIR, exist_ok=True)
os.makedirs(FIG_DIR, exist_ok=True)
ratings = pd.read_csv(paths['ratings.csv'])
movies  = pd.read_csv(paths['movies.csv'])

ratings.columns = [c.lower() for c in ratings.columns]
movies.columns  = [c.lower() for c in movies.columns]
uid_unique = ratings['userid'].unique()
mid_unique = ratings['movieid'].unique()
user2idx = {int(u): int(i) for i,u in enumerate(uid_unique)}
item2idx = {int(m): int(i) for i,m in enumerate(mid_unique)}
R_MIN, R_MAX = float(ratings['rating'].min()), float(ratings['rating'].max())
ratings['_u'] = ratings['userid'].map(user2idx).astype('int32')
ratings['_i'] = ratings['movieid'].map(item2idx).astype('int32')
watched_train = ratings.groupby('_u')['_i'].apply(set).to_dict()
idx_mappings_path = os.path.join(MODEL_DIR, 'idx_mappings.json')
with open(idx_mappings_path, 'w') as f:
    json.dump({'user2idx': user2idx, 'item2idx': item2idx, 'R_MIN': R_MIN, 'R_MAX': R_MAX}, f, indent=2)
print("Saved:", idx_mappings_path)

movies_small = movies[['movieid','title','genres']].copy()
movies_small_path = os.path.join(MODEL_DIR, 'movies_small.csv')
movies_small.to_csv(movies_small_path, index=False)
print("Saved:", movies_small_path)

watched_train_path = os.path.join(MODEL_DIR, 'watched_train.json')
with open(watched_train_path, 'w') as f:
    json.dump({int(k): [int(x) for x in list(v)] for k,v in watched_train.items()}, f)
print("Saved:", watched_train_path)
metrics_path = paths.get('metrics.csv') or find_file(BASE_CANDIDATES, 'metrics.csv')
assert metrics_path is not None, "metrics.csv tidak ditemukan; pastikan cell 12 pernah menyimpannya."
metrics_df = pd.read_csv(metrics_path)

best_row = metrics_df.sort_values('test_rmse').iloc[0]
best_model_name = best_row['model']

best_model_info_path = os.path.join(MODEL_DIR, 'best_model.json')
with open(best_model_info_path, 'w') as f:
    json.dump({'best_model': best_model_name}, f, indent=2)
print("Saved:", best_model_info_path)
if os.path.dirname(metrics_path) != MODEL_DIR:
    target_metrics = os.path.join(MODEL_DIR, 'metrics.csv')
    metrics_df.to_csv(target_metrics, index=False)
    print("Copied metrics.csv ->", target_metrics)
else:
    print("metrics.csv sudah di MODEL_DIR:", metrics_path)

print("\nArtefak siap untuk UI Streamlit di:", MODEL_DIR)


Mounted at /content/drive
== Paths ==
ratings.csv: /content/drive/MyDrive/Penelitian/uas/ratings.csv
movies.csv: /content/drive/MyDrive/Penelitian/uas/movies.csv
metrics.csv: /content/drive/MyDrive/Penelitian/uas/models/metrics.csv
final_MF.pt: /content/drive/MyDrive/Penelitian/uas/models/final_MF.pt
final_NCF.pt: /content/drive/MyDrive/Penelitian/uas/models/final_NCF.pt
Saved: /content/drive/MyDrive/Penelitian/uas/models/idx_mappings.json
Saved: /content/drive/MyDrive/Penelitian/uas/models/movies_small.csv
Saved: /content/drive/MyDrive/Penelitian/uas/models/watched_train.json
Saved: /content/drive/MyDrive/Penelitian/uas/models/best_model.json
metrics.csv sudah di MODEL_DIR: /content/drive/MyDrive/Penelitian/uas/models/metrics.csv

Artefak siap untuk UI Streamlit di: /content/drive/MyDrive/Penelitian/uas/models


In [4]:
# CELL 15 — Generate streamlit_app.py (MF & NCF) di MODEL_DIR

import os

app_path = os.path.join(MODEL_DIR, 'streamlit_app.py')
app_code = r"""
import os, json, time
import numpy as np
import pandas as pd
import streamlit as st
import torch
import torch.nn as nn

HERE = os.path.dirname(os.path.abspath(__file__))
ART_DIR = HERE
FIG_DIR = os.path.join(ART_DIR, 'figs')
DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# ======= Model defs (sesuai training) =======
class MF(nn.Module):
    def __init__(self, n_users, n_items, emb_dim=64, bias=True):
        super().__init__()
        self.user_emb = nn.Embedding(n_users, emb_dim)
        self.item_emb = nn.Embedding(n_items, emb_dim)
        self.use_bias = bias
        if bias:
            self.user_bias = nn.Embedding(n_users, 1)
            self.item_bias = nn.Embedding(n_items, 1)
        else:
            self.user_bias = None
            self.item_bias = None
    def forward(self, u, i):
        ue = self.user_emb(u); ie = self.item_emb(i)
        dot = (ue * ie).sum(dim=1)
        if self.use_bias:
            dot = dot + self.user_bias(u).squeeze(1) + self.item_bias(i).squeeze(1)
        return dot

class NCF(nn.Module):
    def __init__(self, n_users, n_items, emb_dim=64, mlp_dims=(256,128,64), dropout=0.3):
        super().__init__()
        self.user_emb = nn.Embedding(n_users, emb_dim)
        self.item_emb = nn.Embedding(n_items, emb_dim)
        layers = []
        in_dim = emb_dim*2
        for h in mlp_dims:
            layers += [nn.Linear(in_dim, h), nn.ReLU(), nn.Dropout(dropout)]
            in_dim = h
        layers += [nn.Linear(in_dim, 1)]
        self.mlp = nn.Sequential(*layers)
    def forward(self, u, i):
        ue = self.user_emb(u); ie = self.item_emb(i)
        x = torch.cat([ue, ie], dim=1)
        return self.mlp(x).squeeze(1)

# ======= Load artefak =======
st.set_page_config(page_title="Rekomendasi Film — MovieLens", layout="wide")
st.title("🎬 Sistem Rekomendasi Film — MovieLens (MF vs NCF)")
st.caption("Evaluasi RMSE & MAE, plus rekomendasi Top-K")

idx_info = json.load(open(os.path.join(ART_DIR, 'idx_mappings.json')))
user2idx = {int(k): int(v) for k,v in idx_info['user2idx'].items()}
item2idx = {int(k): int(v) for k,v in idx_info['item2idx'].items()}
R_MIN, R_MAX = idx_info['R_MIN'], idx_info['R_MAX']

movies_small = pd.read_csv(os.path.join(ART_DIR, 'movies_small.csv'))
metrics = pd.read_csv(os.path.join(ART_DIR, 'metrics.csv'))
best_info = json.load(open(os.path.join(ART_DIR, 'best_model.json')))

n_users = len(user2idx); n_items = len(item2idx)

# sidebar — pilih model
models_avail = metrics['model'].dropna().unique().tolist()  # ex: ['MF','NCF']
default_model = best_info.get('best_model', models_avail[0] if models_avail else 'MF')
model_choice = st.sidebar.selectbox("Pilih Model", options=models_avail, index=models_avail.index(default_model) if default_model in models_avail else 0)

# map ke nama checkpoint
ckpt_map = {'MF': 'final_MF.pt', 'NCF': 'final_NCF.pt'}
ckpt_name = ckpt_map.get(model_choice)
ckpt_path = os.path.join(ART_DIR, ckpt_name)
if not os.path.exists(ckpt_path):
    st.error(f"Checkpoint {ckpt_name} tidak ditemukan di {ART_DIR}")
    st.stop()

# rekonstruksi model dari state dict
state = torch.load(ckpt_path, map_location='cpu')
sd = state['model_state_dict']

if model_choice == 'MF':
    emb_dim = sd['user_emb.weight'].shape[1]
    model = MF(n_users, n_items, emb_dim=emb_dim, bias=True)
else:
    emb_dim = sd['user_emb.weight'].shape[1]
    # infer mlp hidden dims (ambil semua 'mlp.*.weight' kecuali output)
    linear_keys = [k for k in sd.keys() if k.startswith('mlp.') and k.endswith('.weight')]
    linear_ids = sorted([int(k.split('.')[1]) for k in linear_keys if k.split('.')[2]=='weight'])
    outs = []
    for lid in linear_ids:
        W = sd[f'mlp.{lid}.weight']
        outs.append(W.shape[0])
    mlp_dims = tuple(outs[:-1]) if outs and outs[-1]==1 else tuple(outs) if outs else (256,128,64)
    model = NCF(n_users, n_items, emb_dim=emb_dim, mlp_dims=mlp_dims, dropout=0.0)

model.load_state_dict(sd); model.eval(); model.to(DEVICE)

# tampilkan metrics
st.subheader("📊 Evaluasi (Test Set)")
st.dataframe(metrics.style.highlight_min(['test_rmse','test_mae'], color='#b6e3ff'), use_container_width=True)

# EDA (jika ada)
st.subheader("🔎 EDA (opsional)")
cols = st.columns(3)
for i, (fn, cap) in enumerate([
    ('rating_distribution.png','Distribusi Rating'),
    ('interaksi_per_user.png','Interaksi per User'),
    ('interaksi_per_item.png','Interaksi per Item'),
]):
    p = os.path.join(FIG_DIR, fn)
    if os.path.exists(p):
        with cols[i%3]:
            st.image(p, caption=cap, use_container_width=True)

# rekomendasi
st.subheader("🎯 Rekomendasi")
all_user_ids = sorted(user2idx.keys())
default_user = all_user_ids[len(all_user_ids)//2] if all_user_ids else (next(iter(user2idx)) if user2idx else 1)
user_raw = st.number_input("Masukkan User ID (raw)", value=default_user, step=1)
topk = st.slider("Top-K", 5, 30, 10, 1)
genre_filter = st.text_input("Filter Genre (opsional, mis. Action|Comedy)", "")

# watched masking
watched_train = {}
wt_path = os.path.join(ART_DIR, 'watched_train.json')
if os.path.exists(wt_path):
    tmp = json.load(open(wt_path))
    watched_train = {int(k): set(v) for k,v in tmp.items()}

inv_item = {v:k for k,v in item2idx.items()}

@torch.no_grad()
def recommend(u_raw, topk=10, genre_filter=None):
    if u_raw not in user2idx:
        return pd.DataFrame(columns=['movieid','title','genres','score'])
    u = user2idx[u_raw]
    seen = watched_train.get(u, set())
    candidates = np.array([it for it in range(n_items) if it not in seen], dtype=np.int64)
    if len(candidates) == 0:
        return pd.DataFrame(columns=['movieid','title','genres','score'])
    u_tensor = torch.tensor([u]*len(candidates), dtype=torch.long, device=DEVICE)
    i_tensor = torch.tensor(candidates, dtype=torch.long, device=DEVICE)
    scores = model(u_tensor, i_tensor).detach().cpu().numpy()
    scores = np.clip(scores, R_MIN, R_MAX)
    cand_movieids = [inv_item[int(i)] for i in candidates]
    rec_df = pd.DataFrame({'movieid': cand_movieids, 'score': scores})
    rec_df = rec_df.merge(movies_small, on='movieid', how='left')
    if genre_filter:
        rec_df = rec_df[rec_df['genres'].fillna('').str.contains(genre_filter, case=False, na=False)]
    return rec_df.sort_values('score', ascending=False).head(topk).reset_index(drop=True)

if st.button("Dapatkan Rekomendasi"):
    t0 = time.time()
    recs = recommend(user_raw, topk=topk, genre_filter=genre_filter.strip() or None)
    st.write(f"Hasil untuk User **{user_raw}** (Top-{topk})")
    st.dataframe(recs, use_container_width=True)
    st.caption(f"Selesai dalam {time.time()-t0:.2f}s")
"""

with open(app_path, "w", encoding="utf-8") as f:
    f.write(app_code)

print("✅ streamlit_app.py ditulis ke:", app_path)
print("Jalankan lokal (opsional):")
print(f"!pip install streamlit && streamlit run {app_path}")


✅ streamlit_app.py ditulis ke: /content/drive/MyDrive/Penelitian/uas/models/streamlit_app.py
Jalankan lokal (opsional):
!pip install streamlit && streamlit run /content/drive/MyDrive/Penelitian/uas/models/streamlit_app.py


In [5]:
# CELL 16 — requirements.txt + cara run/deploy

req_path = os.path.join(MODEL_DIR, 'requirements.txt')
with open(req_path, 'w') as f:
    f.write("\n".join([
        "pandas",
        "numpy",
        "tqdm",
        "matplotlib",
        "scikit-learn",
        "torch",
        "tensorboard",
        "streamlit"
    ]))
print("✅ Saved:", req_path)

print("\n=== Cara jalanin lokal ===")
print(f"1) pip install -r {req_path}")
print(f"2) streamlit run {os.path.join(MODEL_DIR, 'streamlit_app.py')}")

print("\n=== Deploy ke Streamlit Community Cloud ===")
print("• Taruh folder 'models/' (isi: streamlit_app.py, requirements.txt, metrics.csv, idx_mappings.json, movies_small.csv, watched_train.json, final_MF.pt/NCF.pt, (opsional) figs/)")
print("• Push ke GitHub, lalu hubungkan ke https://streamlit.io/cloud")
print("• Set file utama: models/streamlit_app.py")

print("\n=== Deploy ke Hugging Face Spaces (Streamlit) ===")
print("• Buat Space tipe Streamlit")
print("• Upload folder 'models/' beserta file-file di atas")
print("• Pastikan requirements.txt satu folder dengan streamlit_app.py")


✅ Saved: /content/drive/MyDrive/Penelitian/uas/models/requirements.txt

=== Cara jalanin lokal ===
1) pip install -r /content/drive/MyDrive/Penelitian/uas/models/requirements.txt
2) streamlit run /content/drive/MyDrive/Penelitian/uas/models/streamlit_app.py

=== Deploy ke Streamlit Community Cloud ===
• Taruh folder 'models/' (isi: streamlit_app.py, requirements.txt, metrics.csv, idx_mappings.json, movies_small.csv, watched_train.json, final_MF.pt/NCF.pt, (opsional) figs/)
• Push ke GitHub, lalu hubungkan ke https://streamlit.io/cloud
• Set file utama: models/streamlit_app.py

=== Deploy ke Hugging Face Spaces (Streamlit) ===
• Buat Space tipe Streamlit
• Upload folder 'models/' beserta file-file di atas
• Pastikan requirements.txt satu folder dengan streamlit_app.py


In [11]:
# EDA-1 — Setup & Load Data (self-contained)
import os, glob, re, json, math, random, time, gc
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
try:
    import seaborn as sns
except:
    !pip -q install seaborn
    import seaborn as sns

sns.set_theme(style="whitegrid")
try:
    from google.colab import drive
    drive.mount('/content/drive', force_remount=False)
except:
    pass
BASE_CANDIDATES = ['/content/drive/MyDrive/Penelitian/uas', '/content/drive/MyDrive', '.']
REQUIRED = ['ratings.csv','movies.csv','tags.csv','genome-scores.csv','genome-tags.csv','links.csv']

def find_file(fname):
    for base in BASE_CANDIDATES:
        if os.path.exists(base):
            hits = glob.glob(os.path.join(base, '**', fname), recursive=True)
            if hits:
                return sorted(hits, key=len)[0]
    return None

paths = {f: find_file(f) for f in REQUIRED}
missing = [k for k,v in paths.items() if v is None]
assert not missing, f"File hilang: {missing}. Pastikan semua CSV ada di Penelitian/uas."

print("== Dataset Paths ==")
for k,v in paths.items(): print(f"{k}: {v}")

BASE_DIR  = os.path.dirname(paths['ratings.csv'])
MODEL_DIR = os.path.join(BASE_DIR, 'models')
FIG_DIR   = os.path.join(MODEL_DIR, 'figs')
os.makedirs(MODEL_DIR, exist_ok=True)
os.makedirs(FIG_DIR, exist_ok=True)

def read_csv_lower(p, **kw):
    df = pd.read_csv(p, **kw); df.columns = [c.lower() for c in df.columns]; return df

ratings = read_csv_lower(paths['ratings.csv'])
movies  = read_csv_lower(paths['movies.csv'])
tags    = read_csv_lower(paths['tags.csv'])
gscores = read_csv_lower(paths['genome-scores.csv'])
gtags   = read_csv_lower(paths['genome-tags.csv'])
links   = read_csv_lower(paths['links.csv'])

print(f"N ratings={len(ratings):,} | users={ratings['userid'].nunique():,} | movies={ratings['movieid'].nunique():,}")

R_MIN, R_MAX = float(ratings['rating'].min()), float(ratings['rating'].max())
ratings['timestamp'] = pd.to_datetime(ratings['timestamp'], unit='s', errors='coerce')
def extract_year(title:str):
    if not isinstance(title, str): return np.nan
    m = re.search(r'\((\d{4})\)\s*$', title)
    return int(m.group(1)) if m else np.nan

movies['year'] = movies['title'].apply(extract_year)
movies_small = movies[['movieid','title','genres','year']]
movies_small.to_csv(os.path.join(MODEL_DIR, 'movies_small.csv'), index=False)

with open(os.path.join(MODEL_DIR, 'eda_info.json'), 'w') as f:
    json.dump({'R_MIN':R_MIN,'R_MAX':R_MAX,'n_users':int(ratings['userid'].nunique()),
               'n_items':int(ratings['movieid'].nunique()),'n_interactions':int(len(ratings))}, f, indent=2)

print("Setup OK → FIG_DIR:", FIG_DIR)


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
== Dataset Paths ==
ratings.csv: /content/drive/MyDrive/Penelitian/uas/ratings.csv
movies.csv: /content/drive/MyDrive/Penelitian/uas/movies.csv
tags.csv: /content/drive/MyDrive/Penelitian/uas/tags.csv
genome-scores.csv: /content/drive/MyDrive/Penelitian/uas/genome-scores.csv
genome-tags.csv: /content/drive/MyDrive/Penelitian/uas/genome-tags.csv
links.csv: /content/drive/MyDrive/Penelitian/uas/links.csv
N ratings=20,000,263 | users=138,493 | movies=26,744
Setup OK → FIG_DIR: /content/drive/MyDrive/Penelitian/uas/models/figs


In [12]:
plt.figure(figsize=(8,5))
ax = sns.histplot(ratings['rating'], bins=np.arange(R_MIN, R_MAX+0.5, 0.5), edgecolor='white')
plt.title("🎬 Distribusi Rating", fontsize=14)
plt.xlabel("Rating"); plt.ylabel("Frekuensi")
for p in ax.patches:
    height = p.get_height()
    if height>0:
        ax.annotate(f"{int(height):,}", (p.get_x()+p.get_width()/2, height),
                    ha='center', va='bottom', fontsize=8, rotation=90)
plt.tight_layout();
out = os.path.join(FIG_DIR, 'rating_distribution.png')
plt.savefig(out, dpi=150); plt.close()
print("Saved:", out)


  plt.tight_layout();
  plt.savefig(out, dpi=150); plt.close()


Saved: /content/drive/MyDrive/Penelitian/uas/models/figs/rating_distribution.png


In [13]:
# EDA-3 — Interaksi per User
user_cnt = ratings.groupby('userid').size()
user_cnt.to_csv(os.path.join(MODEL_DIR,'user_interaction_counts.csv'))

# linear
plt.figure(figsize=(8,5))
sns.histplot(user_cnt, bins=100, edgecolor='white')
plt.title("👤 Distribusi Interaksi per User (Linear)")
plt.xlabel("# Interaksi"); plt.ylabel("Frekuensi")
plt.tight_layout();
out1 = os.path.join(FIG_DIR, 'interaksi_per_user_linear.png')
plt.savefig(out1, dpi=150); plt.close()

# log-scale
plt.figure(figsize=(8,5))
sns.histplot(user_cnt, bins=100, edgecolor='white', log_scale=(False, True))
plt.title("👤 Distribusi Interaksi per User (Y Log-Scale)")
plt.xlabel("# Interaksi"); plt.ylabel("Frekuensi (log)")
plt.tight_layout();
out2 = os.path.join(FIG_DIR, 'interaksi_per_user_log.png')
plt.savefig(out2, dpi=150); plt.close()

print("Saved:", out1, "|", out2)


  plt.tight_layout();
  plt.savefig(out1, dpi=150); plt.close()
  plt.tight_layout();
  plt.savefig(out2, dpi=150); plt.close()


Saved: /content/drive/MyDrive/Penelitian/uas/models/figs/interaksi_per_user_linear.png | /content/drive/MyDrive/Penelitian/uas/models/figs/interaksi_per_user_log.png


In [14]:
# EDA-4 — Interaksi per Item
item_cnt = ratings.groupby('movieid').size()
item_cnt.to_csv(os.path.join(MODEL_DIR,'item_interaction_counts.csv'))

# linear
plt.figure(figsize=(8,5))
sns.histplot(item_cnt, bins=100, edgecolor='white')
plt.title("🍿 Distribusi Interaksi per Item (Linear)")
plt.xlabel("# Interaksi per Film"); plt.ylabel("Frekuensi")
plt.tight_layout();
out1 = os.path.join(FIG_DIR, 'interaksi_per_item_linear.png')
plt.savefig(out1, dpi=150); plt.close()

# log-scale
plt.figure(figsize=(8,5))
sns.histplot(item_cnt, bins=100, edgecolor='white', log_scale=(False, True))
plt.title("🍿 Distribusi Interaksi per Item (Y Log-Scale)")
plt.xlabel("# Interaksi per Film"); plt.ylabel("Frekuensi (log)")
plt.tight_layout();
out2 = os.path.join(FIG_DIR, 'interaksi_per_item_log.png')
plt.savefig(out2, dpi=150); plt.close()

# Top 20 film terpopuler
top_items = item_cnt.sort_values(ascending=False).head(20).rename('count').reset_index()
top_items = top_items.merge(movies[['movieid','title']], left_on='movieid', right_on='movieid', how='left')
plt.figure(figsize=(10,6))
sns.barplot(data=top_items, y='title', x='count', orient='h')
plt.title("🏆 Top 20 Film Terpopuler (berdasarkan jumlah interaksi)")
plt.xlabel("Jumlah Interaksi"); plt.ylabel("Film")
plt.tight_layout();
out3 = os.path.join(FIG_DIR, 'top20_popular_movies.png')
plt.savefig(out3, dpi=150); plt.close()

top_items.to_csv(os.path.join(MODEL_DIR,'top20_popular_movies.csv'), index=False)
print("Saved:", out1, "|", out2, "|", out3)


  plt.tight_layout();
  plt.savefig(out1, dpi=150); plt.close()
  plt.tight_layout();
  plt.savefig(out2, dpi=150); plt.close()
  plt.tight_layout();
  plt.savefig(out3, dpi=150); plt.close()


Saved: /content/drive/MyDrive/Penelitian/uas/models/figs/interaksi_per_item_linear.png | /content/drive/MyDrive/Penelitian/uas/models/figs/interaksi_per_item_log.png | /content/drive/MyDrive/Penelitian/uas/models/figs/top20_popular_movies.png


In [15]:
# EDA-5 — Sparsity & Heatmap (subset)
n_users = ratings['userid'].nunique()
n_items = ratings['movieid'].nunique()
density = len(ratings) / (n_users * n_items)
with open(os.path.join(MODEL_DIR,'sparsity.txt'), 'w') as f:
    f.write(f"Interactions: {len(ratings):,}\nUsers: {n_users:,}\nItems: {n_items:,}\nDensity: {density:.8f}\n")
print(f"Sparsity density = {density:.8f}")

# Heatmap subset (random 100 user x 100 item)
np.random.seed(42)
sample_users = np.random.choice(ratings['userid'].unique(), size=min(100, n_users), replace=False)
sample_items = np.random.choice(ratings['movieid'].unique(), size=min(100, n_items), replace=False)
sub = ratings[ratings['userid'].isin(sample_users) & ratings['movieid'].isin(sample_items)]

pivot = sub.pivot_table(index='userid', columns='movieid', values='rating')
plt.figure(figsize=(10,7))
sns.heatmap(pivot, cmap='viridis', cbar=True)
plt.title("🔥 Heatmap Interaksi User–Item (subset 100×100)")
plt.xlabel("Item (movieid)"); plt.ylabel("User (userid)")
plt.tight_layout();
out = os.path.join(FIG_DIR, 'heatmap_user_item_subset.png')
plt.savefig(out, dpi=150); plt.close()
print("Saved:", out)


Sparsity density = 0.00539985


  plt.tight_layout();
  plt.savefig(out, dpi=150); plt.close()


Saved: /content/drive/MyDrive/Penelitian/uas/models/figs/heatmap_user_item_subset.png


In [16]:
# EDA-6 — Genre & Tag
movies['genres'] = movies['genres'].fillna('(no genres listed)')
movies['genres_list'] = movies['genres'].apply(lambda s: s.split('|'))
all_genres = pd.Series([g for lst in movies['genres_list'] for g in lst if g and g!='(no genres listed)'])
top_genres = all_genres.value_counts().head(20)
top_genres.to_csv(os.path.join(MODEL_DIR,'top_genres.csv'))

plt.figure(figsize=(10,6))
sns.barplot(x=top_genres.values, y=top_genres.index, orient='h')
plt.title("🎭 Top 20 Genres")
plt.xlabel("Frekuensi"); plt.ylabel("Genre")
plt.tight_layout();
out1 = os.path.join(FIG_DIR, 'top_genres.png')
plt.savefig(out1, dpi=150); plt.close()

# Tag user (lowercase, strip)
tags['tag'] = tags['tag'].fillna('').str.lower().str.strip()
top_tags = tags['tag'].value_counts().head(40)
top_tags.to_csv(os.path.join(MODEL_DIR,'top_user_tags.csv'))

plt.figure(figsize=(10,10))
sns.barplot(x=top_tags.values, y=top_tags.index, orient='h')
plt.title("🏷️ Top 40 User Tags")
plt.xlabel("Frekuensi"); plt.ylabel("Tag")
plt.tight_layout();
out2 = os.path.join(FIG_DIR, 'top_user_tags.png')
plt.savefig(out2, dpi=150); plt.close()

print("Saved:", out1, "|", out2)


  plt.tight_layout();
  plt.savefig(out1, dpi=150); plt.close()
  plt.tight_layout();
  plt.savefig(out2, dpi=150); plt.close()


Saved: /content/drive/MyDrive/Penelitian/uas/models/figs/top_genres.png | /content/drive/MyDrive/Penelitian/uas/models/figs/top_user_tags.png


In [17]:
# EDA-7 — Tren Tahun & Korelasi Popularitas–Rating
year_counts = movies['year'].dropna().astype(int).value_counts().sort_index()
plt.figure(figsize=(12,4))
sns.lineplot(x=year_counts.index, y=year_counts.values)
plt.title("📅 Jumlah Film per Tahun (berdasarkan judul)")
plt.xlabel("Tahun"); plt.ylabel("Jumlah Film")
plt.tight_layout();
out1 = os.path.join(FIG_DIR, 'film_per_year.png')
plt.savefig(out1, dpi=150); plt.close()
movie_stats = ratings.groupby('movieid').agg(
    pop=('rating','size'),
    mean_rating=('rating','mean')
).reset_index()
movie_stats = movie_stats.merge(movies[['movieid','title']], on='movieid', how='left')

plt.figure(figsize=(8,6))
sns.scatterplot(data=movie_stats, x='pop', y='mean_rating', alpha=0.3)
plt.xscale('log')
plt.title("📈 Popularitas vs Rata-rata Rating (log scale pada Popularitas)")
plt.xlabel("Popularitas (# rating, log)"); plt.ylabel("Mean Rating")
plt.tight_layout();
out2 = os.path.join(FIG_DIR, 'pop_vs_mean_rating.png')
plt.savefig(out2, dpi=150); plt.close()

movie_stats.sort_values('pop', ascending=False).head(50).to_csv(os.path.join(MODEL_DIR,'top50_pop_vs_rating.csv'), index=False)
print("Saved:", out1, "|", out2)


  plt.tight_layout();
  plt.savefig(out1, dpi=150); plt.close()
  plt.tight_layout();
  plt.savefig(out2, dpi=150); plt.close()


Saved: /content/drive/MyDrive/Penelitian/uas/models/figs/film_per_year.png | /content/drive/MyDrive/Penelitian/uas/models/figs/pop_vs_mean_rating.png


In [18]:
# EDA-8 — Ringkasan & daftar gambar
exports = [
    'rating_distribution.png',
    'interaksi_per_user_linear.png',
    'interaksi_per_user_log.png',
    'interaksi_per_item_linear.png',
    'interaksi_per_item_log.png',
    'top20_popular_movies.png',
    'heatmap_user_item_subset.png',
    'top_genres.png',
    'top_user_tags.png',
    'film_per_year.png',
    'pop_vs_mean_rating.png'
]
print("== Gambar tersimpan di:", FIG_DIR)
for fn in exports:
    p = os.path.join(FIG_DIR, fn)
    print(" •", fn, "✓" if os.path.exists(p) else "×")
with open(os.path.join(MODEL_DIR,'eda_images.json'),'w') as f:
    json.dump({'figs':[os.path.join('figs',x) for x in exports]}, f, indent=2)

print("\nEDA lengkap ✔️")


== Gambar tersimpan di: /content/drive/MyDrive/Penelitian/uas/models/figs
 • rating_distribution.png ✓
 • interaksi_per_user_linear.png ✓
 • interaksi_per_user_log.png ✓
 • interaksi_per_item_linear.png ✓
 • interaksi_per_item_log.png ✓
 • top20_popular_movies.png ✓
 • heatmap_user_item_subset.png ✓
 • top_genres.png ✓
 • top_user_tags.png ✓
 • film_per_year.png ✓
 • pop_vs_mean_rating.png ✓

EDA lengkap ✔️
