In [16]:
import tensorflow as tf
import pandas as pd
import numpy as np

data = pd.read_csv("movie_data.csv")
print("Data Awal:")
print(data.head())

Data Awal:
                           name  year  duration                       genre  \
0             Daniel the Wizard  2004  1h 21min        Comedy Crime Fantasy   
1                      Smolensk  2016        2h              Drama Thriller   
2                    Foodfight!  2012  1h 31min  Animation Action Adventure   
3              Saving Christmas  2014  1h 19min               Comedy Family   
4  Superbabies: Baby Geniuses 2  2004  1h 28min        Comedy Family Sci-Fi   

   rating                                        description  \
0     1.2  Evil assassins want to kill Daniel Kublbock, t...   
1     1.2  An inspired story of people affected by the 20...   
2     1.3  The evil Brand X joins a supermarket that beco...   
3     1.3  His annual Christmas party faltering thanks to...   
4     1.5  A group of smart-talking toddlers find themsel...   

            director                                             stars  
0        Ulli Lommel  Daniel Küblböck Ulli Lommel Rudolf

In [17]:
# ===========================================================
# Konversi durasi dari format teks ke menit
# ===========================================================
def convert_duration(d):
    """Ubah '1h 21min' → 81 (menit)."""
    try:
        h, m = 0, 0
        if isinstance(d, str):
            if "h" in d:
                h = int(d.split("h")[0].strip())
                d = d.split("h")[1]
            if "min" in d:
                m = int(d.split("min")[0].strip())
        return h * 60 + m
    except:
        return np.nan

# Terapkan konversi ke kolom baru
data["duration_minutes"] = data["duration"].apply(convert_duration)

# Hapus data yang durasinya tidak valid
data = data.dropna(subset=["duration_minutes"])

In [19]:
# ===========================================================
# Siapkan Input dan Label
# ===========================================================
# year dan duration (dalam menit) jadi input
x = data[["year", "duration_minutes"]].astype(float).values

# label: rating tinggi (1 jika >=7, else 0)
y = np.where(data["rating"] >= 7, 1, 0)

# Normalisasi sederhana
x = x / np.max(x, axis=0)

# Split manual 80:20
split_index = int(0.8 * len(x))
x_train, x_val = x[:split_index], x[split_index:]
y_train, y_val = y[:split_index], y[split_index:]

In [22]:
# ===========================================================
# Membangun Model
# ===========================================================
model = tf.keras.Sequential([
    tf.keras.layers.Input(shape=(2,)),
    tf.keras.layers.Dense(16, activation='relu'),
    tf.keras.layers.Dense(8, activation='relu'),
    tf.keras.layers.Dense(1, activation='sigmoid')
])

model.summary()

In [24]:
# ===========================================================
# Compile Model
# ===========================================================
model.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=['accuracy'])

In [28]:
# ===========================================================
# Training Model
# ===========================================================
history = model.fit(
    x_train, y_train,
    epochs=30,
    batch_size=16,
    validation_data=(x_val, y_val),
    verbose=1
)

Epoch 1/30
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 17ms/step - accuracy: 0.0000e+00 - loss: 0.8117 - val_accuracy: 0.0000e+00 - val_loss: 0.7622
Epoch 2/30
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 0.1100 - loss: 0.7247 - val_accuracy: 1.0000 - val_loss: 0.6804
Epoch 3/30
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 1.0000 - loss: 0.6459 - val_accuracy: 1.0000 - val_loss: 0.6060
Epoch 4/30
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 1.0000 - loss: 0.5734 - val_accuracy: 1.0000 - val_loss: 0.5369
Epoch 5/30
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - accuracy: 1.0000 - loss: 0.5066 - val_accuracy: 1.0000 - val_loss: 0.4704
Epoch 6/30
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - accuracy: 1.0000 - loss: 0.4463 - val_accuracy: 1.0000 - val_loss: 0.4191
Epoch 7/30
[1m13/13[0m [32m━

In [29]:
# ===========================================================
# Evaluasi Model
# ===========================================================
loss, acc = model.evaluate(x_val, y_val, verbose=0)
print(f"\n Training Accuracy (epoch terakhir): {history.history['accuracy'][-1]*100:.2f}%")
print(f"Validation Accuracy: {history.history['val_accuracy'][-1]*100:.2f}%")


 Training Accuracy (epoch terakhir): 100.00%
Validation Accuracy: 100.00%


In [30]:
# ===========================================================
# Simpan Model
# ===========================================================
model.save("model_posttest5.keras")
print("\nModel tersimpan sebagai 'model_posttest5.keras'")


Model tersimpan sebagai 'model_posttest5.keras'


# MARKDOWN
1. Data CLeaning
   - Kolom 'duration' dikonversi dari teks ("1h 39m") menjadi numerik (duration_minutes)
   - Data dengan nilai durasi tidak valid dihapus
2. Normalisasi & Encoding
   - Kolom numerik dinormalisasi agar data seragam
   - Kolom rating diubah menjadi label biner: 1 jika >= 7, 0 jika < 7
3. Feature & Split data
   - Fitur digunakan: year, duration_minutes
   - Data dibagi menjadi 80% training dan 20% validation
4. Model DNN
   - Model sequential dengan:
   - input(2) -> Dense(16, relu) -> Dense(8, relu) -> Dense(1, sigmoid)
   - Compile = optimizer='adam', lost='binary_crossentropy', metrics='accuracy'.
5. Training
   - Model dilatih 30 epoch, akurasi & validasi > 70%
   - Model efektif memprediksi kategori rating film tinggi atau rendah