In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, classification_report
from sklearn.preprocessing import LabelEncoder

def load_data(file_path):
    return pd.read_csv(file_path)

def preprocess_data(df):
    df.drop("Country", axis=1, inplace=True)
    
    df['Severity_None'].replace({1: 'None', 0: 'No'}, inplace=True)
    df['Severity_Mild'].replace({1: 'Mild', 0: 'No'}, inplace=True)
    df['Severity_Moderate'].replace({1: 'Moderate', 0: 'No'}, inplace=True)
    df['Severity_Severe'].replace({1: 'Severe', 0: 'No'}, inplace=True)
    
    def remove(lista):
        lista = set(lista)
        lista.discard("No")
        final = ''.join(lista)
        return final
    
    df['Condition'] = df['Severity_None'] + df['Severity_Mild'] + df['Severity_Moderate'] + df['Severity_Severe']
    df['Condition'] = df['Condition'].apply(remove)
    
    le = LabelEncoder()
    df['Condition'] = le.fit_transform(df['Condition'])
    
    return df

def encode_categorical_features(X_train, X_test):
    X_train = pd.get_dummies(X_train)
    X_test = pd.get_dummies(X_test)
    
    # Establecer los nombres de las características directamente
    X_train.columns = X_train.columns.astype(str)
    X_test.columns = X_test.columns.astype(str)
    
    return X_train, X_test

def train_knn_model(X_train, y_train, n_neighbors=3):
    knn = KNeighborsClassifier(n_neighbors=n_neighbors)
    knn.fit(X_train, y_train)
    return knn

def evaluate_model(model, X_test, y_test):
    try:
        print("Starting prediction...")
        y_pred = []

        for i in range(0, len(X_test), 1000):
            batch_X = X_test.iloc[i:i + 1000]
            batch_X_np = np.ascontiguousarray(batch_X.to_numpy())
            batch_pred = model.predict(batch_X_np)
            y_pred.extend(batch_pred)

        y_pred = np.array(y_pred)

        print("Prediction completed.")

        accuracy = accuracy_score(y_test, y_pred)
        print(f"Precisión del modelo KNN: {accuracy:.2f}")

        report = classification_report(y_test, y_pred)
        print(report)
    except Exception as e:
        print(f"Error during evaluation: {e}")

In [2]:
# Cargar datos
df = load_data("Cleaned-Data.csv").head(1000)

In [3]:
# Preprocesar datos
df = preprocess_data(df)

In [4]:
# Dividir datos
X = df.drop(['Condition'], axis=1)
y = df['Condition']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)

In [5]:
# Codificar características categóricas
X_train, X_test = encode_categorical_features(X_train, X_test)

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt

sns.pairplot(df, hue='Condition')
plt.show()

KeyboardInterrupt: 

Error in callback <function flush_figures at 0x7f6f0c7fdf80> (for post_execute):


In [None]:
# Entrenar modelo
knn_model = train_knn_model(X_train, y_train)

In [None]:
# Evaluar modelo
evaluate_model(knn_model, X_test, y_test)