In [31]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

# Load Dataset
df_movie = pd.read_csv('movie_metadata.csv')

# Target: 1 jika genre mengandung 'Drama', 0 jika tidak
df_movie['is_drama'] = df_movie['genres'].apply(lambda x: 1 if 'Drama' in str(x) else 0)

# Bagi data menjadi fitur (X) dan target (y)
X = df[['duration', 'imdb_score']]
y = df['is_drama']

# Bagi data menjadi data latih dan data uji
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Skala fitur menggunakan StandardScaler (Penting untuk KNN)
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Bangun model k-Nearest Neighbor
k = 5 # jumlah tetangga terdekat
model = KNeighborsClassifier(n_neighbors=k)
model.fit(X_train, y_train)

# rediksi pada data uji
y_pred = model.predict(X_test)

# Evaluasi model
accuracy = accuracy_score(y_test, y_pred)
print(f"Akurasi: {accuracy}")

print("\nLaporan Klasifikasi:")
print(classification_report(y_test, y_pred))

print("\nMatriks Konfusi:")
print(confusion_matrix(y_test, y_pred))

# Contoh penggunaan model untuk prediksi baru
# [duration (menit), imdb_score]
new_movie_data = [[120, 8.0]] 
new_movie_data_scaled = scaler.transform(new_movie_data)
prediction = model.predict(new_movie_data_scaled)

if prediction[0] == 1:
    print("\nPrediksi: Film baru ini berkemungkinan besar bergenre Drama.")
else:
    print("\nPrediksi: Film baru ini kemungkinan bukan genre Drama.")

Akurasi: 0.6093439363817097

Laporan Klasifikasi:
              precision    recall  f1-score   support

           0       0.61      0.58      0.59       493
           1       0.61      0.64      0.63       513

    accuracy                           0.61      1006
   macro avg       0.61      0.61      0.61      1006
weighted avg       0.61      0.61      0.61      1006


Matriks Konfusi:
[[284 209]
 [184 329]]

Prediksi: Film baru ini berkemungkinan besar bergenre Drama.


