### FILES PROCESSING 

In [57]:
import os
from pathlib import Path

def get_files_name(path_train, path_test):
    """
    Get the files name of test and train folder in the current working directory.
    """
    workdirectory = os.getcwd()

    train_folder = Path(workdirectory) / path_train
    train_files = os.listdir(train_folder)

    test_folder = Path(workdirectory) / path_test
    test_files = os.listdir(test_folder)

    paths_train = [os.path.join(train_folder, nome) for nome in train_files if os.path.isfile(os.path.join(train_folder, nome))]
    paths_test = [os.path.join(test_folder, nome) for nome in test_files if os.path.isfile(os.path.join(test_folder, nome))]
    
    return paths_train, paths_test, train_files, test_files

#path_train, path_test = get_files_name('first_classifiers/train_sets', 'first_classifiers/test_sets')

### DATA PROCESSING

In [60]:
import pandas as pd

def get_data(train_path, test_path): 
    

    data_train = pd.read_csv(train_path)
    data_test = pd.read_csv(test_path)

    out_train = data_train["label"].values 

    if '1d' in train_path:
        # TRAINING SET
        in_train = data_train[["feature"]].values 

        # TESTING SET
        in_new = data_test[["feature"]].values 
        
    else:
        if '2d' in train_path:
            # TRAINING SET
            feature_columns = ["x1", "x2"]       
        elif '3d' in train_path:
            # TRAINING SET
            feature_columns = ["x1", "x2", "x3"]
        else: 
            feature_columns = ["x1", "x2", "x3", "x4"]

        in_train = data_train[feature_columns].values 
        in_new = data_test[feature_columns].values 

    return in_train, out_train, in_new, data_test


### CALCULATE METRICS

In [55]:
from sklearn.metrics import accuracy_score

def accuracy_metric(data_test, out_pred, outfile,train_f, test_f, k, accuracy_validation):

    out_true = data_test[["label"]].values

    # Calculate the accuracy of the model
    accuracy_test = accuracy_score(out_true, out_pred)

    if not outfile.tell() == 0:
        outfile.write("\n\n")
        
    outfile.write("╔════════════════════════════════════════════╗\n")
    outfile.write("║          Classification Results            ║\n")
    outfile.write("╚════════════════════════════════════════════╝\n")
    outfile.write(f" Train File: {train_f:<35} \n") 
    outfile.write(f" Test File:  {test_f:<35} \n") 
    outfile.write("══════════════════════════════════════════════\n")
    outfile.write(f" {'Accuracy':<30} {accuracy_test:>12.3f} \n")
    outfile.write(f" {'Error':<30} {(1-accuracy_test):>12.3f} \n")
    outfile.write(f" {'Best K':<30} {k:>12.3f} \n")
    outfile.write(f" {'Best Validation Accuracy':<30} {accuracy_validation:>12.3f} \n")
    outfile.write("══════════════════════════════════════════════")


### CLASSIFIERS 

In [59]:
# CLASSIFIER_1 KNN
import numpy as np
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import GridSearchCV

path_train, path_test, train_file, test_file = get_files_name('train_sets', 'test_sets')


output_filename = "knn_classification_results.txt"

with open(output_filename, 'w', encoding='utf-8') as outfile:   
    for train, test, train_f, test_f in zip(path_train, path_test,train_file, test_file):

        in_train, out_train, in_test, data_test = get_data(train, test)

        # Create a KNN regressor model with 9 neighbors
        model = KNeighborsClassifier()

        param_grid = {'n_neighbors': np.arange(1, 31)} # k: [1-30]
        print(grid_search)

        grid_search = GridSearchCV(estimator=model, param_grid=param_grid, cv=5, scoring='accuracy', verbose=1) 
        #cv:fold number; scoring:metric to evaluate model performance; estimator:model to tune.
        
        grid_search.fit(in_train, out_train)

        best_knn_model = grid_search.best_estimator_

        out_pred = best_knn_model.predict(in_test)

        accuracy_metric(data_test, out_pred, outfile, train_f, test_f, grid_search.best_params_['n_neighbors'], grid_search.best_score_ )
    
print(f"Results save in: {output_filename}")


GridSearchCV(cv=5, estimator=KNeighborsClassifier(),
             param_grid={'n_neighbors': array([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17,
       18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30])},
             scoring='accuracy', verbose=3)
Fitting 5 folds for each of 30 candidates, totalling 150 fits
GridSearchCV(cv=5, estimator=KNeighborsClassifier(),
             param_grid={'n_neighbors': array([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17,
       18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30])},
             scoring='accuracy', verbose=1)
Fitting 5 folds for each of 30 candidates, totalling 150 fits
GridSearchCV(cv=5, estimator=KNeighborsClassifier(),
             param_grid={'n_neighbors': array([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17,
       18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30])},
             scoring='accuracy', verbose=1)
Fitting 5 folds for each of 30 candidates, totalling 1