In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.impute import SimpleImputer
from sklearn.feature_selection import SelectKBest, f_classif

In [2]:
# Data Loading
df = pd.read_csv('Thyroid Disease.csv')

In [3]:
# Data Cleaning
# Handling missing values
imputer = SimpleImputer(strategy='mean')
# Encoding categorical variables
label_encoder = LabelEncoder()

In [4]:
# Handle non-numeric data
for column in df.columns:
    if df[column].dtype == 'object':
        df[column] = label_encoder.fit_transform(df[column].astype(str))

df_cleaned = imputer.fit_transform(df)

In [5]:
# Feature Selection
X = df_cleaned[:, :-2]  # Features, excluding Stage and Response
y = df_cleaned[:, -1]   # Response variable

selector = SelectKBest(score_func=f_classif, k=10)
X_selected = selector.fit_transform(X, y)

# Splitting Data
X_train, X_test, y_train, y_test = train_test_split(X_selected, y, test_size=0.2, random_state=42)

# Data Scaling
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [6]:
# K-Nearest Neighbors (KNN)
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

knn = KNeighborsClassifier(n_neighbors=5)
knn.fit(X_train, y_train)
y_pred_knn = knn.predict(X_test)

print("KNN Accuracy:", accuracy_score(y_test, y_pred_knn))

KNN Accuracy: 0.8571428571428571
Confusion Matrix for KNN:
[[57  1]
 [10  9]]
Classification Report for KNN:
              precision    recall  f1-score   support

         0.0       0.85      0.98      0.91        58
         1.0       0.90      0.47      0.62        19

    accuracy                           0.86        77
   macro avg       0.88      0.73      0.77        77
weighted avg       0.86      0.86      0.84        77



In [7]:
print("Confusion Matrix for KNN:")

Confusion Matrix for KNN:


In [8]:
print(confusion_matrix(y_test, y_pred_knn))

[[57  1]
 [10  9]]
