In [55]:
# =========================================================
# 1. IMPORT CSV MANUAL
# =========================================================
import csv
import math
import statistics

def load_csv(path):
    data = []
    with open(path, encoding="utf-8") as f:
        reader = csv.DictReader(f)
        for row in reader:
            data.append(row)
    return data

data = load_csv("student_perfomance_predict.csv")
print("Total Data:", len(data))

Total Data: 1000


In [58]:
# =========================================================
# 2. KONVERSI NILAI DAN HANDLE MISSING VALUE
# =========================================================

feature_cols = [c for c in data[0].keys() if c != "FinalGrade"]

def safe_float(x):
    try:
        if x is None or x.strip() == "":
            return None
        return float(x)
    except:
        return None

# Konversi data
X_raw = []
y_raw = []

for row in data:
    feats = [safe_float(row[c]) for c in feature_cols]
    target = safe_float(row["FinalGrade"])

    X_raw.append(feats)
    y_raw.append(target)

# ===== IMPUTASI FITUR DENGAN MEAN (DENGAN FALLBACK) =====
mean_col = {}

for i in range(len(feature_cols)):
    vals = [row[i] for row in X_raw if row[i] is not None]
    
    # jika kolom berisi semua None → isi dengan 0
    if len(vals) == 0:
        mean_col[i] = 0.0
    else:
        mean_col[i] = statistics.mean(vals)

# isi nilai kosong
for row in X_raw:
    for i in range(len(row)):
        if row[i] is None:
            row[i] = mean_col[i]


# ===== Imputasi target dengan mean =====
mean_y = statistics.mean([v for v in y_raw if v is not None])
y_raw = [(mean_y if v is None else v) for v in y_raw]

In [59]:
# =========================================================
# 3. NORMALISASI MIN-MAX
# =========================================================
min_val = {i: min(row[i] for row in X_raw) for i in range(len(feature_cols))}
max_val = {i: max(row[i] for row in X_raw) for i in range(len(feature_cols))}

def minmax_scale(x, i):
    if max_val[i] == min_val[i]:
        return 0.0
    return (x - min_val[i]) / (max_val[i] - min_val[i])

X = []
for row in X_raw:
    X.append([minmax_scale(row[i], i) for i in range(len(row))])

y = y_raw

In [60]:
# =========================================================
# 4. SPLIT DATA MANUAL (TRAIN 80% – TEST 20%)
# =========================================================
def train_test_split(X, y, test_size=0.2):
    total = len(X)
    cutoff = int(total * (1 - test_size))

    X_train = X[:cutoff]
    y_train = y[:cutoff]
    
    X_test = X[cutoff:]
    y_test = y[cutoff:]

    return X_train, X_test, y_train, y_test

X_train, X_test, y_train, y_test = train_test_split(X, y)

print("Train Data:", len(X_train))
print("Test Data:", len(X_test))

Train Data: 800
Test Data: 200


In [61]:
# =========================================================
# 5. IMPLEMENTASI KNN REGRESSOR MANUAL
# =========================================================
class KNNRegressor:

    def __init__(self, k=5):
        self.k = k

    def fit(self, X_train, y_train):
        self.X_train = X_train
        self.y_train = y_train

    def distance(self, a, b):
        return math.sqrt(sum((a[i] - b[i])**2 for i in range(len(a))))

    def predict_one(self, x):
        distances = []
        for i in range(len(self.X_train)):
            d = self.distance(x, self.X_train[i])
            distances.append((d, self.y_train[i]))

        distances.sort(key=lambda t: t[0])
        k_neighbors = distances[:self.k]

        return sum(val for d, val in k_neighbors) / len(k_neighbors)

    def predict(self, X):
        return [self.predict_one(row) for row in X]

In [62]:
# =========================================================
# 6. TRAINING MODEL
# =========================================================
model = KNNRegressor(k=7)
model.fit(X_train, y_train)

print("Model berhasil ditraining.")

Model berhasil ditraining.


In [63]:
# =========================================================
# 7. EVALUASI MODEL (MAE, MSE, RMSE, R²)
# =========================================================
y_pred = model.predict(X_test)

def MAE(y_true, y_pred):
    return sum(abs(y_true[i] - y_pred[i]) for i in range(len(y_true))) / len(y_true)

def MSE(y_true, y_pred):
    return sum((y_true[i] - y_pred[i])**2 for i in range(len(y_true))) / len(y_true)

def RMSE(y_true, y_pred):
    return math.sqrt(MSE(y_true, y_pred))

def R2(y_true, y_pred):
    mean_y = sum(y_true) / len(y_true)
    ss_res = sum((y_true[i] - y_pred[i])**2 for i in range(len(y_true)))
    ss_tot = sum((y_true[i] - mean_y)**2 for i in range(len(y_true)))
    return 1 - (ss_res / ss_tot)

print("\n=== EVALUASI MODEL ===")
print("MAE  :", MAE(y_test, y_pred))
print("MSE  :", MSE(y_test, y_pred))
print("RMSE :", RMSE(y_test, y_pred))
print("R²   :", R2(y_test, y_pred))


=== EVALUASI MODEL ===
MAE  : 8.191320684523799
MSE  : 96.76471544729209
RMSE : 9.836905786236446
R²   : -0.1549818805791483


In [64]:
# =========================================================
# Prediksi Data Mahasiswa Baru
# =========================================================

new_student = [
    85,   # AttendanceRate
    10,   # StudyHoursPerWeek
    82,   # PreviousGrade
    1,    # ExtracurricularActivities
    0,    # Online Classes Taken
    2     # Parental Support
]

# scaling
new_scaled = [
    (new_student[i] - min_val[i]) / (max_val[i] - min_val[i]) 
    if max_val[i] != min_val[i] else 0
    for i in range(len(new_student))
]

predicted_grade = model.predict_one(new_scaled)

print("\n=== PREDIKSI MAHASISWA BARU ===")
print("Input Baru:", new_student)
print("Output Prediksi Nilai:", predicted_grade)



=== PREDIKSI MAHASISWA BARU ===
Input Baru: [85, 10, 82, 1, 0, 2]
Output Prediksi Nilai: 81.00431547619048
