In [1]:
import csv
import math

# ===========================
#  LOAD DATASET
# ===========================
def load_dataset(path):
    X, y = [], []
    with open(path, "r") as f:
        reader = csv.DictReader(f)
        for row in reader:
            if row['FinalGrade'] == "" or row['FinalGrade'] is None:
                continue

            features = [
                safe_float(row['AttendanceRate']),
                safe_float(row['StudyHoursPerWeek']),
                safe_float(row['PreviousGrade']),
                safe_float(row['ExtracurricularActivities']),
                encode_parent_support(row['ParentalSupport']),
                safe_float(row['Study Hours']),
                safe_float(row['Attendance (%)']),
                encode_bool(row['Online Classes Taken'])
            ]
            X.append(features)
            y.append(float(row['FinalGrade']))
    return X, y


# Safely convert to float
def safe_float(x):
    try:
        return float(x)
    except:
        return 0.0

# Encode ParentalSupport text → numeric
def encode_parent_support(value):
    if value.lower() == "high":
        return 3
    elif value.lower() == "medium":
        return 2
    elif value.lower() == "low":
        return 1
    return 0

# Encode Boolean values (True/False)
def encode_bool(value):
    return 1.0 if str(value).lower() == "true" else 0.0


# ===========================
#  KNN MODEL IMPLEMENTATION
# ===========================
class KNNRegressor:

    def __init__(self, k=3):
        self.k = k
        self.X_train = []
        self.y_train = []

    def fit(self, X, y):
        self.X_train = X
        self.y_train = y

    @staticmethod
    def distance(a, b):
        return math.sqrt(sum((a[i] - b[i]) ** 2 for i in range(len(a))))

    def predict_single(self, sample):
        distances = []
        for i in range(len(self.X_train)):
            dist = self.distance(sample, self.X_train[i])
            distances.append((dist, self.y_train[i]))

        distances.sort(key=lambda x: x[0])
        k_nearest = distances[:self.k]

        return sum(val for _, val in k_nearest) / self.k

    def predict(self, X):
        return [self.predict_single(x) for x in X]


# ===========================
#   LOAD TEST INPUT
# ===========================
def load_test_inputs(path):
    X = []
    with open(path, "r") as f:
        reader = csv.DictReader(f)
        for row in reader:
            features = [
                safe_float(row['AttendanceRate']),
                safe_float(row['StudyHoursPerWeek']),
                safe_float(row['PreviousGrade']),
                safe_float(row['ExtracurricularActivities']),
                safe_float(row['ParentalSupport']),
                safe_float(row['Study Hours']),
                safe_float(row['Attendance (%)']),
                safe_float(row['Online Classes Taken'])
            ]
            X.append(features)
    return X


# ===========================
#  MAIN SYSTEM
# ===========================
if __name__ == "__main__":

    print("Loading training dataset...")
    X, y = load_dataset("student_perfomance_predict.csv")
    print("Total data:", len(X))

    # Split manually: 80% train, 20% test
    split = int(0.8 * len(X))
    X_train, y_train = X[:split], y[:split]
    X_test, y_test = X[split:], y[split:]

    # Train model
    print("\nTraining model KNN...")
    knn = KNNRegressor(k=3)
    knn.fit(X_train, y_train)

    # Show sample predictions from test set
    print("\nTesting model with internal test data:")
    for i in range(min(5, len(X_test))):
        pred = knn.predict_single(X_test[i])
        print(f"Actual: {y_test[i]}  |  Predicted: {pred:.2f}")

    # Load external test input file
    print("\nLoading test_input_samples.csv...")
    test_samples = load_test_inputs("test_input_samples.csv")

    print("\nPrediksi untuk test_input_samples.csv:")
    for i, sample in enumerate(test_samples):
        pred = knn.predict_single(sample)
        print(f"Input {i+1} → Predicted Final Grade = {pred:.2f}")


Loading training dataset...
Total data: 960

Training model KNN...

Testing model with internal test data:
Actual: 92.0  |  Predicted: 85.67
Actual: 78.0  |  Predicted: 85.00
Actual: 87.0  |  Predicted: 81.33
Actual: 80.0  |  Predicted: 82.33
Actual: 88.0  |  Predicted: 80.00

Loading test_input_samples.csv...

Prediksi untuk test_input_samples.csv:
Input 1 → Predicted Final Grade = 84.00
Input 2 → Predicted Final Grade = 83.00
Input 3 → Predicted Final Grade = 82.00
