In [None]:

import unittest
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, roc_auc_score

DATA_PATH = r"C:\Users\Amatek\Downloads\MACHINE LEARNING PROJET S1\ml_final\data\dataset_final.csv"
dataset = pd.read_csv(DATA_PATH, index_col=0)


X = dataset.drop(columns=["valve_optimal"])
y = dataset["valve_optimal"]


In [None]:
class TestDataSplit(unittest.TestCase):
    def test_train_test_split_shapes(self):
        X_train = X.iloc[:2000]
        y_train = y.iloc[:2000]
        X_test = X.iloc[2000:]
        y_test = y.iloc[2000:]

        self.assertEqual(X_train.shape[0], 2000)
        self.assertEqual(y_train.shape[0], 2000)
        self.assertEqual(X_test.shape[0], len(X) - 2000)
        self.assertEqual(y_test.shape[0], len(y) - 2000)

suite = unittest.TestLoader().loadTestsFromTestCase(TestDataSplit)
unittest.TextTestRunner(verbosity=2).run(suite)


test_train_test_split_shapes (__main__.TestDataSplit.test_train_test_split_shapes) ... ok

----------------------------------------------------------------------
Ran 1 test in 0.003s

OK


<unittest.runner.TextTestResult run=1 errors=0 failures=0>

In [None]:
class TestNaNCorrection(unittest.TestCase):
    def test_fillna(self):
        X_train = X.iloc[:2000].fillna(0)
        X_test  = X.iloc[2000:].fillna(0)

        self.assertFalse(X_train.isna().any().any())
        self.assertFalse(X_test.isna().any().any())


suite = unittest.TestLoader().loadTestsFromTestCase(TestNaNCorrection)
unittest.TextTestRunner(verbosity=2).run(suite)


test_fillna (__main__.TestNaNCorrection.test_fillna) ... ok

----------------------------------------------------------------------
Ran 1 test in 0.013s

OK


<unittest.runner.TextTestResult run=1 errors=0 failures=0>

In [None]:
class TestScaling(unittest.TestCase):
    def test_scaling_shapes_and_no_nan(self):
        X_train = X.iloc[:2000].fillna(0)
        X_test  = X.iloc[2000:].fillna(0)

        scaler = StandardScaler()
        X_train_scaled = scaler.fit_transform(X_train)
        X_test_scaled  = scaler.transform(X_test)

        # VÃ©rifications
        self.assertEqual(X_train_scaled.shape, X_train.shape)
        self.assertEqual(X_test_scaled.shape, X_test.shape)
        self.assertFalse(np.isnan(X_train_scaled).any())
        self.assertFalse(np.isnan(X_test_scaled).any())


suite = unittest.TestLoader().loadTestsFromTestCase(TestScaling)
unittest.TextTestRunner(verbosity=2).run(suite)


test_scaling_shapes_and_no_nan (__main__.TestScaling.test_scaling_shapes_and_no_nan) ... ok

----------------------------------------------------------------------
Ran 1 test in 0.015s

OK


<unittest.runner.TextTestResult run=1 errors=0 failures=0>

In [None]:
class TestKNNTraining(unittest.TestCase):
    def test_knn_fit_predict(self):
        X_train = X.iloc[:2000].fillna(0)
        y_train = y.iloc[:2000]
        X_test  = X.iloc[2000:].fillna(0)
        y_test  = y.iloc[2000:]

        scaler = StandardScaler()
        X_train_scaled = scaler.fit_transform(X_train)
        X_test_scaled  = scaler.transform(X_test)

        knn = KNeighborsClassifier(n_neighbors=5)
        knn.fit(X_train_scaled, y_train)

        y_train_pred = knn.predict(X_train_scaled)
        y_test_pred  = knn.predict(X_test_scaled)

    
        self.assertTrue(set(np.unique(y_train_pred)).issubset({0, 1}))
        self.assertTrue(set(np.unique(y_test_pred)).issubset({0, 1}))

        
        self.assertEqual(y_train_pred.shape[0], y_train.shape[0])
        self.assertEqual(y_test_pred.shape[0], y_test.shape[0])


suite = unittest.TestLoader().loadTestsFromTestCase(TestKNNTraining)
unittest.TextTestRunner(verbosity=2).run(suite)


test_knn_fit_predict (__main__.TestKNNTraining.test_knn_fit_predict) ... ok

----------------------------------------------------------------------
Ran 1 test in 0.296s

OK


<unittest.runner.TextTestResult run=1 errors=0 failures=0>

In [None]:
class TestKNN_AUC(unittest.TestCase):
    def test_auc_calculation(self):
        X_train = X.iloc[:2000].fillna(0)
        y_train = y.iloc[:2000]
        X_test  = X.iloc[2000:].fillna(0)
        y_test  = y.iloc[2000:]

        scaler = StandardScaler()
        X_train_scaled = scaler.fit_transform(X_train)
        X_test_scaled  = scaler.transform(X_test)

        k_values = range(1, 21)
        auc_scores = []

        for k in k_values:
            knn = KNeighborsClassifier(n_neighbors=k)
            knn.fit(X_train_scaled, y_train)
            y_proba = knn.predict_proba(X_test_scaled)[:, 1]
            auc = roc_auc_score(y_test, y_proba)
            auc_scores.append(auc)

    
        best_k = k_values[np.argmax(auc_scores)]
        best_auc = max(auc_scores)

        self.assertTrue(1 <= best_k <= 20)
        self.assertTrue(0 <= best_auc <= 1)

suite = unittest.TestLoader().loadTestsFromTestCase(TestKNN_AUC)
unittest.TextTestRunner(verbosity=2).run(suite)


test_auc_calculation (__main__.TestKNN_AUC.test_auc_calculation) ... ok

----------------------------------------------------------------------
Ran 1 test in 0.164s

OK


<unittest.runner.TextTestResult run=1 errors=0 failures=0>