## Naive Bayes classifier and SVM - Predict whether a tumor is malignant or benign

### Import required libraries

In [1]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

### Load the dataset

In [2]:
df = pd.read_csv('breast_cancer.csv')

### Data cleaning

In [3]:
df.isnull().sum()  # Check for missing values

diagnosis                  0
radius_mean                0
texture_mean               0
perimeter_mean             0
area_mean                  0
smoothness_mean            0
compactness_mean           0
concavity_mean             0
concave points_mean        0
symmetry_mean              0
fractal_dimension_mean     0
radius_se                  0
texture_se                 0
perimeter_se               0
area_se                    0
smoothness_se              0
compactness_se             0
concavity_se               0
concave points_se          0
symmetry_se                0
fractal_dimension_se       0
radius_worst               0
texture_worst              0
perimeter_worst            0
area_worst                 0
smoothness_worst           0
compactness_worst          0
concavity_worst            0
concave points_worst       0
symmetry_worst             0
fractal_dimension_worst    0
dtype: int64

#### 1. Feature scaling

In [4]:
scaler = StandardScaler()
df.iloc[:, 1:] = scaler.fit_transform(df.iloc[:, 1:])

#### 2. Feature renaming

In [5]:
df.columns = ['diagnosis', 'mean_radius', 'mean_texture', 'mean_perimeter', 'mean_area', 'mean_smoothness',
              'mean_compactness', 'mean_concavity', 'mean_concave_points', 'mean_symmetry', 'mean_fractal_dimension',
              'radius_error', 'texture_error', 'perimeter_error', 'area_error', 'smoothness_error', 'compactness_error',
              'concavity_error', 'concave_points_error', 'symmetry_error', 'fractal_dimension_error', 'worst_radius',
              'worst_texture', 'worst_perimeter', 'worst_area', 'worst_smoothness', 'worst_compactness', 'worst_concavity',
              'worst_concave_points', 'worst_symmetry', 'worst_fractal_dimension']

#### 3. Label Encoding

In [6]:
le = LabelEncoder()
df['diagnosis'] = le.fit_transform(df['diagnosis'])

### Split the dataset into train and test sets

In [7]:
X_train, X_test, y_train, y_test = train_test_split(df.drop('diagnosis', axis=1), df['diagnosis'], test_size=0.2, random_state=42)

### Train the models

#### 1. Naive Bayes classifier

In [8]:
gnb = GaussianNB()
gnb.fit(X_train, y_train)

GaussianNB()

#### 2. SVM

In [9]:
svm = SVC()
svm.fit(X_train, y_train)

SVC()

### Test the models

#### 1. Naive Bayes classifier

In [10]:
y_pred = gnb.predict(X_test)
print("Naive Bayes Classifier:\n")
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Precision:", precision_score(y_test, y_pred))
print("Recall:", recall_score(y_test, y_pred))
print("F1-score:", f1_score(y_test, y_pred))

Naive Bayes Classifier:

Accuracy: 0.9649122807017544
Precision: 0.975609756097561
Recall: 0.9302325581395349
F1-score: 0.9523809523809524


#### 2. SVM

In [11]:
y_pred = svm.predict(X_test)
print("\nSVM:\n")
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Precision:", precision_score(y_test, y_pred))
print("Recall:", recall_score(y_test, y_pred))
print("F1-score:", f1_score(y_test, y_pred))


SVM:

Accuracy: 0.9736842105263158
Precision: 0.9761904761904762
Recall: 0.9534883720930233
F1-score: 0.9647058823529412
