In [1]:
import pandas as pd
import numpy as np
from sklearn import preprocessing
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
import warnings



In [2]:
class BreastCancerModel:
    def __init__(self, data_path='breast cancer.csv'):
        self.data = pd.read_csv(data_path)
        self.data = self.data.loc[:, ~self.data.columns.str.contains('^Unnamed')]
        self.data = self.data.drop('id', axis=1)

        label_encoder = preprocessing.LabelEncoder()
        self.data['diagnosis'] = label_encoder.fit_transform(self.data['diagnosis'])

        self.X = self.data.iloc[:, 1:31]
        self.y = self.data.iloc[:, 0:1]

        self.X_train, self.X_test, self.y_train, self.y_test = train_test_split(
            self.X, self.y, test_size=0.25, random_state=42)

    def scale_data(self):
        scaler = StandardScaler()
        self.X_train = scaler.fit_transform(self.X_train)
        self.X_test = scaler.transform(self.X_test)

In [3]:
class SupportVectorMachine:
    def __init__(self, X_train, y_train, X_test, y_test):
        self.svm_model = SVC(kernel='linear')
        self.X_train, self.y_train, self.X_test, self.y_test = X_train, y_train, X_test, y_test

    def train(self):
        self.svm_model.fit(self.X_train, self.y_train.values.ravel())

    def predict(self):
        return self.svm_model.predict(self.X_test)

    def evaluate(self):
        y_pred = self.predict()
        accuracy = accuracy_score(self.y_test, y_pred)
        print("SVM Accuracy:", accuracy)


In [7]:
class LogisticRegressionModel:
    def __init__(self, X_train, y_train, X_test, y_test):
        self.X_train = np.column_stack((np.ones(X_train.shape[0]), X_train))
        self.X_test = np.column_stack((np.ones(X_test.shape[0]), X_test))
        self.y_train = y_train.values.ravel()
        self.y_test = y_test.values.ravel()

    @staticmethod
    def sigmoid(z):
        return 1 / (1 + np.exp(-z))

    def cost_function(self, theta):
        m = len(self.y_train)
        h = self.sigmoid(self.X_train @ theta)
        J = -1 / m * (self.y_train @ np.log(h) + (1 - self.y_train) @ np.log(1 - h))
        return J

    def gradient_descent(self, theta, alpha, num_iterations):
        m = len(self.y_train)
        J_history = []

        for _ in range(num_iterations):
            h = self.sigmoid(self.X_train @ theta)
            gradient = self.X_train.T @ (h - self.y_train) / m
            theta = theta - alpha * gradient
            J_history.append(self.cost_function(theta))

        return theta, J_history

    def train(self):
        alpha = 0.01
        num_iterations = 1500
        theta = np.random.rand(self.X_train.shape[1])
        theta, _ = self.gradient_descent(theta, alpha, num_iterations)

        y_pred = (self.sigmoid(self.X_test @ theta) >= 0.5).astype(int)
        accuracy = accuracy_score(self.y_test, y_pred)
        print("Logistic Regression Accuracy:", accuracy)

In [8]:
class NeuralNetworkModel:
    def __init__(self, X_train, y_train, X_test, y_test):
        input_dim = X_train.shape[1]
        self.classifier = Sequential()
        self.classifier.add(Dense(units=6, kernel_initializer='uniform', activation='relu', input_dim=input_dim))
        self.classifier.add(Dense(units=6, kernel_initializer='uniform', activation='relu'))
        self.classifier.add(Dense(units=1, kernel_initializer='uniform', activation='sigmoid'))

        self.classifier.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
        self.X_train, self.y_train, self.X_test, self.y_test = X_train, y_train, X_test, y_test

    def train(self, epochs=100, batch_size=10):
        self.classifier.fit(self.X_train, self.y_train, batch_size=batch_size, epochs=epochs, verbose=0)

    def evaluate(self):
        print('*' * 20)
        train_score, train_acc = self.classifier.evaluate(self.X_train, self.y_train, batch_size=10)
        print('Train score:', train_score)
        print('Train accuracy:', train_acc)

        y_pred = (self.classifier.predict(self.X_test) > 0.5).astype(int)
        test_score, test_acc = self.classifier.evaluate(self.X_test, self.y_test, batch_size=10)
        print('Test score:', test_score)
        print('Test accuracy:', test_acc)

In [9]:
if __name__ == "__main__":
    warnings.filterwarnings("ignore")

    breast_cancer = BreastCancerModel()
    breast_cancer.scale_data()

    svm_model = SupportVectorMachine(breast_cancer.X_train, breast_cancer.y_train,
                                     breast_cancer.X_test, breast_cancer.y_test)
    svm_model.train()
    svm_model.evaluate()

    logistic_regression = LogisticRegressionModel(breast_cancer.X_train, breast_cancer.y_train,
                                                  breast_cancer.X_test, breast_cancer.y_test)
    logistic_regression.train()

    neural_network = NeuralNetworkModel(breast_cancer.X_train, breast_cancer.y_train,
                                       breast_cancer.X_test, breast_cancer.y_test)
    neural_network.train()
    neural_network.evaluate()

SVM Accuracy: 0.972027972027972
Logistic Regression Accuracy: 0.965034965034965
********************
Train score: 0.006586557719856501
Train accuracy: 0.9976525902748108
Test score: 0.16093645989894867
Test accuracy: 0.9790209531784058
