In [1]:
import pandas as pd
import numpy as np

# Load dataset lo
df = pd.read_csv("cleaned_student_mental_health_depression.csv")

# Ambil fitur yang dipakai
df = df[['Gender', 'Age', 'Year', 'CGPA', 'Depression']]

print("=== 5 DATA AWAL ===")
print(df.head())
print("\nJumlah total data:", len(df))

=== 5 DATA AWAL ===
   Gender   Age  Year  CGPA  Depression
0       0  18.0     1   3.0           1
1       1  21.0     2   3.0           0
2       1  19.0     1   3.0           1
3       0  22.0     3   3.0           1
4       1  23.0     4   3.0           0

Jumlah total data: 101


In [2]:
from sklearn.model_selection import train_test_split

X = df[['Gender', 'Age', 'Year', 'CGPA']].values
y = df['Depression'].values

# Split 80:20
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, shuffle=True
)

print("Jumlah data latih:", len(X_train))
print("Jumlah data uji:", len(X_test))

# Gabungkan untuk preview
df_test = pd.DataFrame(X_test, columns=['Gender', 'Age', 'Year', 'CGPA'])
df_test['Actual'] = y_test
print("\n=== 5 DATA UJI TERATAS ===")
print(df_test.head())


Jumlah data latih: 80
Jumlah data uji: 21

=== 5 DATA UJI TERATAS ===
   Gender   Age  Year  CGPA  Actual
0     1.0  23.0   1.0   3.0       0
1     1.0  21.0   1.0   3.0       0
2     0.0  18.0   3.0   3.0       0
3     1.0  19.0   3.0   3.0       1
4     1.0  18.0   1.0   3.5       0


In [3]:
from sklearn.neighbors import KNeighborsClassifier

for k in [1, 3, 5, 7]:
    print(f"\n===== K = {k} =====")
    
    knn = KNeighborsClassifier(n_neighbors=k)
    knn.fit(X_train, y_train)
    preds = knn.predict(X_test)

    df_result = df_test.copy()
    df_result['Prediksi'] = preds

    print("\n=== 5 TERATAS ===")
    print(df_result.head(5))

    print("\n=== 5 TERBAWAH ===")
    print(df_result.tail(5))

    akurasi = (df_result['Actual'] == df_result['Prediksi']).mean() * 100
    print(f"\nAkurasi: {akurasi:.2f}%")



===== K = 1 =====

=== 5 TERATAS ===
   Gender   Age  Year  CGPA  Actual  Prediksi
0     1.0  23.0   1.0   3.0       0         0
1     1.0  21.0   1.0   3.0       0         0
2     0.0  18.0   3.0   3.0       0         0
3     1.0  19.0   3.0   3.0       1         1
4     1.0  18.0   1.0   3.5       0         0

=== 5 TERBAWAH ===
    Gender   Age  Year  CGPA  Actual  Prediksi
16     0.0  18.0   1.0   3.0       1         0
17     0.0  19.0   3.0   0.0       0         0
18     0.0  18.0   1.0   3.0       1         0
19     1.0  18.0   2.0   3.0       0         0
20     0.0  23.0   1.0   3.0       1         0

Akurasi: 52.38%

===== K = 3 =====

=== 5 TERATAS ===
   Gender   Age  Year  CGPA  Actual  Prediksi
0     1.0  23.0   1.0   3.0       0         0
1     1.0  21.0   1.0   3.0       0         0
2     0.0  18.0   3.0   3.0       0         0
3     1.0  19.0   3.0   3.0       1         0
4     1.0  18.0   1.0   3.5       0         0

=== 5 TERBAWAH ===
    Gender   Age  Year  CGPA  Act

In [4]:
from sklearn.metrics import pairwise_distances

# Hitung jarak dari semua data test ke semua training
dist_all = pairwise_distances(X_test, X_train)

print("=== DETAIL JARAK UNTUK 5 DATA TERATAS ===")
for i in range(5):
    print(f"\n>>> DATA UJI KE-{i}")
    jarak = dist_all[i]
    idx_rank = np.argsort(jarak)[:7]  # ambil 7 tetangga sebagai contoh

    df_detail = pd.DataFrame(X_train[idx_rank], columns=['Gender','Age','Year','CGPA'])
    df_detail['Depression'] = y_train[idx_rank]
    df_detail['Jarak'] = jarak[idx_rank]
    df_detail['Rank'] = np.arange(1, len(idx_rank)+1)
    print(df_detail)

print("\n=== DETAIL JARAK UNTUK 5 DATA TERBAWAH ===")
for i in range(len(X_test)-5, len(X_test)):
    print(f"\n>>> DATA UJI KE-{i}")
    jarak = dist_all[i]
    idx_rank = np.argsort(jarak)[:7]

    df_detail = pd.DataFrame(X_train[idx_rank], columns=['Gender','Age','Year','CGPA'])
    df_detail['Depression'] = y_train[idx_rank]
    df_detail['Jarak'] = jarak[idx_rank]
    df_detail['Rank'] = np.arange(1, len(idx_rank)+1)
    print(df_detail)


=== DETAIL JARAK UNTUK 5 DATA TERATAS ===

>>> DATA UJI KE-0
   Gender   Age  Year  CGPA  Depression     Jarak  Rank
0     0.0  23.0   1.0   3.0           0  1.000000     1
1     1.0  24.0   1.0   3.0           0  1.000000     2
2     0.0  23.0   1.0   3.5           1  1.118034     3
3     0.0  23.0   1.0   2.5           1  1.118034     4
4     0.0  24.0   1.0   3.0           0  1.414214     5
5     1.0  24.0   2.0   3.0           1  1.414214     6
6     0.0  24.0   1.0   3.0           0  1.414214     7

>>> DATA UJI KE-1
   Gender   Age  Year  CGPA  Depression     Jarak  Rank
0     1.0  21.0   2.0   3.0           0  1.000000     1
1     0.0  21.0   1.0   3.5           0  1.118034     2
2     1.0  20.0   2.0   3.0           0  1.414214     3
3     0.0  20.0   2.0   3.0           1  1.732051     4
4     1.0  19.0   1.0   3.0           1  2.000000     5
5     1.0  19.0   1.0   3.0           0  2.000000     6
6     1.0  19.0   1.0   3.5           0  2.061553     7

>>> DATA UJI KE-2
   Ge