In [25]:
import pandas as pd
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
from sklearn.impute import SimpleImputer
from sklearn.metrics import classification_report

In [26]:
train_df = pd.read_excel('../Data/train.xlsx')
validation_df = pd.read_excel('../Data/validation.xlsx')
test_df = pd.read_excel('../Data/test.xlsx')

In [27]:
y_train = train_df['Treatment_Type']
X_train = train_df.drop(columns=['Treatment_Type'])
y_val = validation_df['Treatment_Type']
X_val = validation_df.drop(columns=['Treatment_Type'])

In [28]:

for col in X_train.columns:
    if pd.api.types.is_datetime64_any_dtype(X_train[col]):
        X_train = X_train.drop(columns=[col])
        X_val = X_val.drop(columns=[col])

imputer = SimpleImputer(strategy='mean')
X_train = pd.DataFrame(imputer.fit_transform(X_train), columns=X_train.columns)
X_val = pd.DataFrame(imputer.transform(X_val), columns=X_val.columns)

neighbors = [3, 5]
weights_list = ['uniform', 'distance']
p_values = [1, 2] 

results = []

In [29]:
for k in neighbors:
    for w in weights_list:
        for p in p_values:
            model = KNeighborsClassifier(n_neighbors=k, weights=w, p=p)
            model.fit(X_train, y_train)
            y_pred = model.predict(X_val)
            acc = accuracy_score(y_val, y_pred)
            results.append({
                'n_neighbors': k,
                'weights': w,
                'p': p,
                'accuracy': acc
            })

In [30]:
results_df = pd.DataFrame(results)
results_df = results_df.sort_values(by='accuracy', ascending=False).reset_index(drop=True)
print("Resultados del experimento con KNN:")
print(results_df)

Resultados del experimento con KNN:
   n_neighbors   weights  p  accuracy
0            5   uniform  1  0.260000
1            5  distance  1  0.258750
2            3  distance  2  0.257500
3            3   uniform  1  0.253750
4            3  distance  1  0.253750
5            5  distance  2  0.244375
6            5   uniform  2  0.243750
7            3   uniform  2  0.241875


In [31]:

print("\nMejor configuración:")
print(results_df.iloc[0])


Mejor configuración:
n_neighbors          5
weights        uniform
p                    1
accuracy          0.26
Name: 0, dtype: object


In [32]:
X_full = pd.concat([X_train, X_val])
y_full = pd.concat([y_train, y_val])
X_test = test_df.drop(columns=["Treatment_Type"])
y_test = test_df["Treatment_Type"]

In [34]:
for col in X_test.columns:
    if pd.api.types.is_datetime64_any_dtype(X_test[col]):
        X_test = X_test.drop(columns=[col])
        if col in X_full.columns:
            X_full = X_full.drop(columns=[col])

In [35]:
X_full = pd.DataFrame(imputer.fit_transform(X_full), columns=X_full.columns)
X_test = pd.DataFrame(imputer.transform(X_test), columns=X_test.columns)


In [36]:
best_knn = KNeighborsClassifier(n_neighbors=5, weights='uniform', p=2)
best_knn.fit(X_full, y_full)
y_pred = best_knn.predict(X_test)

In [37]:
print("Accuracy (KNN) en test:", accuracy_score(y_test, y_pred))
print("Reporte de clasificación:\n", classification_report(y_test, y_pred))

Accuracy (KNN) en test: 0.263
Reporte de clasificación:
                precision    recall  f1-score   support

 Chemotherapy       0.26      0.34      0.29       499
Immunotherapy       0.30      0.33      0.32       511
    Radiation       0.28      0.26      0.27       500
      Surgery       0.18      0.11      0.14       490

     accuracy                           0.26      2000
    macro avg       0.25      0.26      0.25      2000
 weighted avg       0.26      0.26      0.26      2000

