<a href="https://colab.research.google.com/github/MHAFIDR/Data-Mining-Klasifikasi/blob/main/Python-update-titanic.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, classification_report

# --- LANGKAH 1: LOAD DATA ---
# Pastikan file titanic_clean.csv sudah diupload
df = pd.read_csv('titanic_clean.csv')

print("=== 5 Baris Pertama Data Titanic ===")
print(df.head())

# --- LANGKAH 2: VISUALISASI ---
# Kita lihat hubungan Umur dan Harga Tiket terhadap Keselamatan
plt.figure(figsize=(10, 6))
sns.scatterplot(x='Age', y='Fare', hue='Survived', data=df, palette='seismic')
plt.title('Sebaran Penumpang Titanic (Umur vs Harga Tiket)')
plt.xlabel('Umur')
plt.ylabel('Harga Tiket')
plt.legend(title='Status (1=Selamat)')
plt.show()

# --- LANGKAH 3: SPLITTING DATA ---
# X = Fitur (Kelas, Gender, Umur, Keluarga, Harga)
X = df.drop('Survived', axis=1)
# y = Target (Selamat/Tidak)
y = df['Survived']

# Bagi data 80% latih, 20% tes
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# --- LANGKAH 4: TRAINING MODEL ---
# Kita pakai n_neighbors=5
model = KNeighborsClassifier(n_neighbors=5)
model.fit(X_train, y_train)
print("\nModel Titanic berhasil dilatih!")

# --- LANGKAH 5: EVALUASI ---
y_pred = model.predict(X_test)
akurasi = accuracy_score(y_test, y_pred)
print(f"\n=== AKURASI MODEL: {akurasi * 100:.2f}% ===")
print(classification_report(y_test, y_pred))

# --- LANGKAH 6: SIMULASI PREDIKSI MANUAL ---
print("\n=== Simulasi Prediksi: Jack vs Rose ===")

# Format Input: [Pclass, Sex, Age, SibSp, Parch, Fare]
# Ingat: Sex 0 = Pria, 1 = Wanita

# Jack: Kelas 3 (Ekonomi), Pria (0), Umur 20, Sendiri, Tiket Murah
data_jack = [3, 0, 20, 0, 0, 7.5]

# Rose: Kelas 1 (Eksekutif), Wanita (1), Umur 17, Bawa Ortu, Tiket Mahal
data_rose = [1, 1, 17, 0, 1, 100.0]

input_baru = pd.DataFrame([data_jack, data_rose], columns=X.columns)
hasil_prediksi = model.predict(input_baru)

for i, nama in enumerate(['Jack', 'Rose']):
    status = "SELAMAT" if hasil_prediksi[i] == 1 else "TIDAK SELAMAT"
    print(f"Prediksi untuk {nama}: {status}")