In [1]:
# Load libraries

import pandas as pd
import numpy as np
import time
from datetime import datetime
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix

# Decision Tree
from sklearn.tree import DecisionTreeClassifier

# Neural Network
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

# SVM
from sklearn.svm import SVC

# Load data
data = pd.read_csv('breast-cancer 2.csv')

# Encode the diagnosis column
label_encoder = LabelEncoder()
data['diagnosis'] = label_encoder.fit_transform(data['diagnosis'])

# Select only numeric columns
numeric_data = data.select_dtypes(include=[float, int])

# Split the data
X = numeric_data.drop('diagnosis', axis=1)
y = numeric_data['diagnosis']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Scale features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Function to evaluate model performance
def evaluate_model(y_true, y_pred, model_name, training_time, prediction_time, start_train, end_train, start_pred, end_pred):
    accuracy = accuracy_score(y_true, y_pred)
    precision = precision_score(y_true, y_pred, average='weighted')
    recall = recall_score(y_true, y_pred, average='weighted')
    f1 = f1_score(y_true, y_pred, average='weighted')
    
    print(f"{model_name} Performance:")
    print(f"Training Time: {training_time:.4f} seconds (Started: {start_train}, Ended: {end_train})")
    print(f"Prediction Time: {prediction_time:.4f} seconds (Started: {start_pred}, Ended: {end_pred})")
    print(f"Accuracy: {accuracy:.4f}")
    print(f"Precision: {precision:.4f}")
    print(f"Recall: {recall:.4f}")
    print(f"F1 Score: {f1:.4f}")
    print(f"Confusion Matrix:\n{confusion_matrix(y_true, y_pred)}\n")
    
    return {
        'Model': model_name,
        'Accuracy': accuracy,
        'Precision': precision,
        'Recall': recall,
        'F1 Score': f1,
        'Training Time': training_time,
        'Prediction Time': prediction_time,
        'Start Training': start_train,
        'End Training': end_train,
        'Start Prediction': start_pred,
        'End Prediction': end_pred
    }

# 1. Decision Tree Model
dt_model = DecisionTreeClassifier(random_state=42)

# Measure training time and timestamps
start_train_dt = datetime.now()
start_time = time.time()
dt_model.fit(X_train, y_train)
dt_training_time = time.time() - start_time
end_train_dt = datetime.now()

# Measure prediction time and timestamps
start_pred_dt = datetime.now()
start_time = time.time()
dt_y_pred = dt_model.predict(X_test)
dt_prediction_time = time.time() - start_time
end_pred_dt = datetime.now()

# Evaluate Decision Tree Model
dt_results = evaluate_model(y_test, dt_y_pred, "Decision Tree", dt_training_time, dt_prediction_time, start_train_dt, end_train_dt, start_pred_dt, end_pred_dt)

# 2. Neural Network Model
nn_model = keras.Sequential([
    layers.Input(shape=(X_train_scaled.shape[1],)),
    layers.Dense(64, activation='relu'),
    layers.Dense(32, activation='relu'),
    layers.Dense(1, activation='sigmoid')
])

nn_model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Measure training time and timestamps
start_train_nn = datetime.now()
start_time = time.time()
nn_model.fit(X_train_scaled, y_train, epochs=10, batch_size=32, verbose=0)
nn_training_time = time.time() - start_time
end_train_nn = datetime.now()

# Measure prediction time and timestamps
start_pred_nn = datetime.now()
start_time = time.time()
nn_y_pred_prob = nn_model.predict(X_test_scaled)
nn_prediction_time = time.time() - start_time
end_pred_nn = datetime.now()

# Convert probabilities to binary predictions
nn_y_pred = (nn_y_pred_prob > 0.5).astype(int).flatten()

# Evaluate Neural Network Model
nn_results = evaluate_model(y_test, nn_y_pred, "Neural Network", nn_training_time, nn_prediction_time, start_train_nn, end_train_nn, start_pred_nn, end_pred_nn)

# 3. SVM Model
svm_model = SVC(kernel='linear', random_state=42)

# Measure training time and timestamps
start_train_svm = datetime.now()
start_time = time.time()
svm_model.fit(X_train_scaled, y_train)
svm_training_time = time.time() - start_time
end_train_svm = datetime.now()

# Measure prediction time and timestamps
start_pred_svm = datetime.now()
start_time = time.time()
svm_y_pred = svm_model.predict(X_test_scaled)
svm_prediction_time = time.time() - start_time
end_pred_svm = datetime.now()

# Evaluate SVM Model
svm_results = evaluate_model(y_test, svm_y_pred, "SVM", svm_training_time, svm_prediction_time, start_train_svm, end_train_svm, start_pred_svm, end_pred_svm)

# Compile results
results = pd.DataFrame([dt_results, nn_results, svm_results])

# Display results
print("Model Comparison Results")
print(results)


Decision Tree Performance:
Training Time: 0.0036 seconds (Started: 2024-08-05 22:53:32.832907, Ended: 2024-08-05 22:53:32.836569)
Prediction Time: 0.0003 seconds (Started: 2024-08-05 22:53:32.836575, Ended: 2024-08-05 22:53:32.836895)
Accuracy: 0.9386
Precision: 0.9390
Recall: 0.9386
F1 Score: 0.9387
Confusion Matrix:
[[67  4]
 [ 3 40]]

[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step 
Neural Network Performance:
Training Time: 0.3290 seconds (Started: 2024-08-05 22:53:32.851060, Ended: 2024-08-05 22:53:33.180101)
Prediction Time: 0.0293 seconds (Started: 2024-08-05 22:53:33.180110, Ended: 2024-08-05 22:53:33.209443)
Accuracy: 0.9737
Precision: 0.9737
Recall: 0.9737
F1 Score: 0.9736
Confusion Matrix:
[[70  1]
 [ 2 41]]

SVM Performance:
Training Time: 0.0012 seconds (Started: 2024-08-05 22:53:33.211150, Ended: 2024-08-05 22:53:33.212369)
Prediction Time: 0.0002 seconds (Started: 2024-08-05 22:53:33.212376, Ended: 2024-08-05 22:53:33.212539)
Accuracy: 0.9561
Preci