<a href="https://colab.research.google.com/github/AgilAptanaDwiPutra/UAP_PRAKTIKUM/blob/main/UAP_MODEL_PRETRAINED_TAB_NET.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [30]:
!pip install pytorch-tabnet



Import Library dan load dataset

In [42]:
import os
import numpy as np
import pandas as pd
import joblib

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

from pytorch_tabnet.tab_model import TabNetRegressor

In [43]:
df = pd.read_csv("dataset_lagu.csv")

TARGET_COL = "popularity"
print(df.shape)

(114000, 21)


Preprocessing

In [44]:
DROP_COLS = [
    "track_id",
    "track_name",
    "album_id",
    "album_name",
    "artist_id",
    "artist_name"
]

for c in DROP_COLS:
    if c in df.columns:
        df.drop(columns=c, inplace=True)

In [45]:
# pastikan track_genre ada
assert "track_genre" in df.columns, "track_genre tidak ditemukan"

df = pd.get_dummies(df, columns=["track_genre"], prefix="genre")

Penyesuaian dimensi untuk tabnet

In [46]:
df = df.select_dtypes(include=["int64", "float64"])
assert df.select_dtypes(include=["object"]).empty

In [47]:
X = df.drop(columns=[TARGET_COL])
y = df[TARGET_COL].values.reshape(-1, 1)

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

Splitting data

In [48]:
X_train, X_test, y_train, y_test = train_test_split(
    X_scaled, y,
    test_size=0.2,
    random_state=42
)

Model TABNET

In [49]:
model = TabNetRegressor(
    n_d=32,
    n_a=32,
    n_steps=5,
    gamma=1.5,
    lambda_sparse=1e-4,
    seed=42,
    verbose=1
)

model.fit(
    X_train=X_train,
    y_train=y_train,
    eval_set=[(X_test, y_test)],
    max_epochs=50,
    patience=8,
    batch_size=4096,
    virtual_batch_size=512,
    num_workers=0,
    drop_last=False
)



epoch 0  | loss: 884.05121| val_0_mse: 505.0528|  0:00:06s
epoch 1  | loss: 491.71296| val_0_mse: 482.94308|  0:00:14s
epoch 2  | loss: 482.42515| val_0_mse: 476.23218|  0:00:21s
epoch 3  | loss: 479.53263| val_0_mse: 473.02069|  0:00:29s
epoch 4  | loss: 475.24198| val_0_mse: 471.13593|  0:00:36s
epoch 5  | loss: 476.30922| val_0_mse: 469.59561|  0:00:43s
epoch 6  | loss: 473.39733| val_0_mse: 467.78754|  0:00:51s
epoch 7  | loss: 468.59319| val_0_mse: 461.91855|  0:00:57s
epoch 8  | loss: 466.76683| val_0_mse: 459.97803|  0:01:06s
epoch 9  | loss: 464.04009| val_0_mse: 459.15201|  0:01:14s
epoch 10 | loss: 461.24632| val_0_mse: 457.53683|  0:01:21s
epoch 11 | loss: 463.09563| val_0_mse: 459.00894|  0:01:28s
epoch 12 | loss: 460.30597| val_0_mse: 451.88596|  0:01:35s
epoch 13 | loss: 459.49634| val_0_mse: 453.7113|  0:01:43s
epoch 14 | loss: 456.90525| val_0_mse: 450.47601|  0:01:50s
epoch 15 | loss: 456.67085| val_0_mse: 451.05045|  0:01:57s
epoch 16 | loss: 457.47743| val_0_mse: 452



Evaluasi

In [50]:
y_pred = model.predict(X_test)

rmse = np.sqrt(mean_squared_error(y_test, y_pred))
mae = mean_absolute_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print("ðŸ“Š Regression Metrics:")
print({
    "RMSE": rmse,
    "MAE": mae,
    "R2": r2
})

ðŸ“Š Regression Metrics:
{'RMSE': np.float64(20.85530156856105), 'MAE': 16.802974700927734, 'R2': 0.11862123012542725}


Save Model

In [54]:
SAVE_DIR = "saved_model"
os.makedirs(SAVE_DIR, exist_ok=True)

model.save_model(os.path.join(SAVE_DIR, "tabnet_model"))

joblib.dump(
    {
        "feature_cols": X.columns.tolist(),
        "scaler": scaler
    },
    os.path.join(SAVE_DIR, "preprocessing.pkl")
)

print(os.listdir(SAVE_DIR))

Successfully saved model at saved_model/tabnet_model.zip
['preprocessing.pkl', 'tabnet_model.zip']


In [52]:
rm -rf saved_model