## Import Libraries and Database

In [None]:
# Call the libraries
import sklearn as sk
import numpy as np
import pandas as pd
import matplotlib.pyplot as ptl

# Load the database
data = pd.read_csv("CTG.csv")

# Delete the first column
data = data.drop(data.columns[0], axis=1)

data.head()

## Create the Dataframe

In [None]:
df_data = pd.DataFrame(data)
df_data = df_data.drop(columns=["NSP"])
df_data.info()

## Separate the Dataframe for Training, Testing and Validation

In [None]:
from sklearn.model_selection import train_test_split

# Separate 50% of the data for training
x_train, x_temp, y_train, y_temp = train_test_split(df_data, data["NSP"], test_size=0.5, random_state=42)

# Separate 25% of the data for validation and 25% for testing
x_validation, x_test, y_validation, y_test = train_test_split(x_temp, y_temp, test_size=0.5, random_state=42)

# Convert the data into numpy arrays
x_validation = np.ascontiguousarray(x_validation)

## Find the Best Model

In [None]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score

higher = -1
model_score = []

# Test the KNN with different K and metrics
for i in ("distance", "uniform"):
    for j in range(1, 100):
        KNN = KNeighborsClassifier(n_neighbors=j, weights=i)
        KNN.fit(x_train, y_train)
        pred = KNN.predict(x_validation)
        
        # Save the best model
        if accuracy_score(y_validation, pred) > higher:
            best_model = KNN
            best_k = j
            best_w = i
            higher = accuracy_score(y_validation, pred)

        # Save the accuracy of each model
        model_score.append(accuracy_score(y_validation, pred))

## Plot the Accuracy of Each Model

In [None]:
# Graph the accuracy of each model
ptl.figure (figsize=(11,7))
ptl.plot(range(len(model_score)), model_score, color='blue', linestyle='dashed', marker='o')
ptl.xlabel('Modelo')
ptl.ylabel('Accuracy')

# Show the best configuration
print("K:", best_k," Métrica:", best_w," Acc:", higher)

## Test The Best Model

In [None]:
from sklearn.metrics import confusion_matrix

pred = best_model.predict(x_test)
accuracy_score = accuracy_score(y_test, pred)
print("Accuracy:", accuracy_score)
confusion_matrix(y_test, pred)
