In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

In [None]:
# load the dataset
df = pd.read_csv('data.csv')

In [None]:
# drop unnecessary columns
df.drop(['id', 'Unnamed: 32'], axis=1, inplace=True)

In [None]:
# rename the columns
df.rename(columns=lambda x: x.replace(' ', '_'), inplace=True)


In [None]:
# label encoding
label_encoder = LabelEncoder()
df.diagnosis = label_encoder.fit_transform(df.diagnosis)

In [None]:
# split the dataset into train and test sets
X = df.drop('diagnosis', axis=1)
y = df['diagnosis']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

In [None]:
# scaling the features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [None]:
# train the Naive Bayes classifier
nb = GaussianNB()
nb.fit(X_train, y_train)



In [None]:
# train the KNN model
knn = KNeighborsClassifier(n_neighbors=5)
knn.fit(X_train, y_train)


In [None]:
# Evaluate the performance of the models on the test set
nb_pred = nb.predict(X_test)
knn_pred = knn.predict(X_test)



In [None]:
# Result Naive Bayes
print('Naive Bayes:')
print('Accuracy:', accuracy_score(y_test, nb_pred))
print('Precision:', precision_score(y_test, nb_pred))
print('Recall:', recall_score(y_test, nb_pred))
print('F1 score:', f1_score(y_test, nb_pred))


Naive Bayes:
Accuracy: 0.935672514619883
Precision: 0.9193548387096774
Recall: 0.9047619047619048
F1 score: 0.912


In [None]:
# Result KNN
print('KNN:')
print('Accuracy:', accuracy_score(y_test, knn_pred))
print('Precision:', precision_score(y_test, knn_pred))
print('Recall:', recall_score(y_test, knn_pred))
print('F1 score:', f1_score(y_test, knn_pred))

KNN:
Accuracy: 0.9590643274853801
Precision: 0.9516129032258065
Recall: 0.9365079365079365
F1 score: 0.944
