# Importing Dependencies

In [None]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import LabelEncoder
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn import svm
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn import tree
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score
from sklearn.metrics import accuracy_score , classification_report,ConfusionMatrixDisplay,precision_score,recall_score, f1_score,roc_auc_score,roc_curve, balanced_accuracy_score
from sklearn.model_selection import GridSearchCV

# Data Collection  

In [None]:
#Loading the breast cancer dataset from csv file to pandas dataframe
breast_cancer_data= pd.read_csv("Breast_cancer_data.csv")

# Data pre-processing

In [None]:
breast_cancer_data.keys()

In [None]:
#printing the first five rows of the dataframe
breast_cancer_data.head()

In [None]:
breast_cancer_data.shape

In [None]:
#CHecking the data types
breast_cancer_data.info()

In [None]:
#Removing the id column 
breast_cancer_data.drop(columns='id',axis=1, inplace=True)

In [None]:
# Checking for null values
breast_cancer_data.isnull().sum()

In [None]:
# Checking for missing values
breast_cancer_data.isna().sum()

In [None]:
# Statistical summary of the data- Descriptive Statistics
breast_cancer_data.describe()

In [None]:
breast_cancer_data['diagnosis'].value_counts()

# Exploratory Data Analysis

In [None]:
#Encoding the targeted column
label_encode= LabelEncoder()

In [None]:
labels= label_encode.fit_transform(breast_cancer_data['diagnosis'])

In [None]:
breast_cancer_data['target']=labels

In [None]:
breast_cancer_data.drop(columns='diagnosis', axis=1, inplace= True)

In [None]:
#Diagnosis column removed
breast_cancer_data.head()

In [None]:
breast_cancer_data['target'].value_counts()

In [None]:
values={}
for column in breast_cancer_data.columns:
    values[column]= breast_cancer_data[column].value_counts().shape[0]
pd.DataFrame(values, index=['Unique values in the dataset']).transpose()

# Data visualisation

In [None]:
#Countplot for the target column for checking the distribution of target
colors = ['skyblue', 'darkkhaki']
sns.countplot(x='target',data=breast_cancer_data, palette=colors)
#Benign=0
#Malignant=1

In [None]:
#creating a for loop to get the distribution plot for all columns

for column in breast_cancer_data:
    sns.displot(x=column, data= breast_cancer_data)

In [None]:
#Correlation of other features with breast cancer

In [None]:
breast_cancer_data.drop('target', axis=1).corrwith(breast_cancer_data.target).plot(kind='bar',grid=True, figsize=(10,6), title="Correlation with breast cancer", color='skyblue');

In [None]:
#Pairplot
sns.pairplot(breast_cancer_data)
plt.show()

In [None]:
#Scatter plot of first 2 columns
#Select first column of the dataframe as a series
first_column= breast_cancer_data.iloc[:,0]

#Select second column of the dataframe as a series
second_column= breast_cancer_data.iloc[:,1]

In [None]:
print(first_column)
print('.....................................')
print(second_column)

In [None]:
z = np.random.rand(1138)

colors = plt.cm.viridis(z)  # Convert z-coordinates to color values using a colormap


plt.scatter(x=first_column,y=second_column,c=colors)
# Set the x-axis color to red
plt.xlabel("radius_mean", color='red')

# Set the y-axis color to blue
plt.ylabel("texture_mean", color='blue')

# Add a title
plt.title("Biplot of two elements")

# Rotate the x-axis labels by 45 degrees
plt.xticks(rotation=45)

# Show the plot
plt.show()

In [None]:
#Outliers Detection
for column in breast_cancer_data:
    plt.figure()
    breast_cancer_data.boxplot([column])

In [None]:
#Correlation Matrix
correlation_matrix= breast_cancer_data.corr()

In [None]:
#construct a heat map to visualize the correlation matrix

plt.figure(figsize=(20,20))
sns.heatmap(correlation_matrix, cbar=True, fmt='.1f',annot= True, cmap= 'Blues')
plt.savefig("correlation heat map")

In [None]:
#multicollinearity problem

In [None]:
#Grouping the data based on the target
breast_cancer_data.groupby('target').mean()

In [None]:
#Separating the data and label

#X =breast_cancer_data.drop(columns='target',axis=1)
#Y = breast_cancer_data['target']
Y = (breast_cancer_data['target']).astype(int)
X = breast_cancer_data.loc[:, breast_cancer_data.columns != 'target']

In [None]:
print(X)

In [None]:
print(Y)

In [None]:
from sklearn.ensemble import ExtraTreesClassifier
model = ExtraTreesClassifier()
model.fit(X,Y)
print(model.feature_importances_) 

#plot graph of feature importances for better visualization
feat_importances = pd.Series(model.feature_importances_, index=X.columns)
plt.figure(figsize=(8,6))
feat_importances.nlargest(6).plot(kind='barh',color='skyblue')
plt.show()

# Data Standardization

In [None]:
scalar= StandardScaler()

In [None]:
scalar.fit(X)

In [None]:
standardized_data= scalar.transform(X)

In [None]:
print(standardized_data)

In [None]:
X= standardized_data

In [None]:
Y=breast_cancer_data['target']

# Splitting into train data and test data

In [None]:
X_train,X_test,Y_train,Y_test = train_test_split(X,Y,test_size= 0.2, stratify=Y,random_state=2)

In [None]:
print(X.shape,X_train.shape,X_test.shape)

# Model Fitting

# Support Vector Machine

In [None]:
svc = svm.SVC()


In [None]:
svc.fit(X_train, Y_train)


In [None]:
pred_svc = svc.predict(X_test)
pred_svc

In [None]:
print(classification_report(Y_test, pred_svc))

In [None]:
# Calculate the accuracy score for the default SVC model
accuracy_svc = accuracy_score(Y_test, pred_svc)
print("Accuracy score for default SVC model:", accuracy_svc)

# Turning for SVM

In [None]:
# defining parameter range
param_grid = {'C': [0.1, 1, 10, 100],
               'kernel': ['linear', 'poly', 'rbf'],
               'gamma': [0.01, 0.1, 1.0]}
 
gridsvm = GridSearchCV(svm.SVC(), param_grid, refit = True, verbose = 3)
  
# fitting the model for grid search
gridsvm.fit(X_train, Y_train)

In [None]:
print(gridsvm.best_params_)

In [None]:
#Let's run our SVC again with the best parameters.
svc2 = svm.SVC(C = 100, gamma =  0.01, kernel= 'rbf')
svc2.fit(X_train, Y_train)
pred_svc2 = svc2.predict(X_test)
print(classification_report(Y_test, pred_svc2))

In [None]:
#confusion matrix
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
cm = confusion_matrix(Y_test, pred_svc2, labels=gridsvm.classes_)
disp = ConfusionMatrixDisplay(confusion_matrix=cm,display_labels=gridsvm.classes_)
fig = plt.figure(figsize=(5, 5))
disp.plot(cmap=plt.cm.Blues) 
plt.grid(which='major')     #remove cell gridlines
plt.gcf().set_size_inches(6, 6)   # Adjust the size of the plot
plt.show()

In [None]:
# Calculate the accuracy score for the SVC model with the best parameters
accuracy_svc2 = accuracy_score(Y_test, pred_svc2)
print("Accuracy score for SVC model with best parameters:", accuracy_svc2)

In [None]:
svc2.score( X_test, Y_test)

In [None]:
# Calculate and store the metrics
accuracy = accuracy_score(Y_test, pred_svc2)
f1 = f1_score(Y_test, pred_svc2)
precision = precision_score(Y_test, pred_svc2)
recall = recall_score(Y_test, pred_svc2)

# Create a DataFrame to store the metrics
metrics_df = pd.DataFrame({
    'Metric': ['Accuracy', 'F1-Score', 'Precision', 'Recall'],
    'Value': [accuracy*100, f1*100, precision*100, recall*100]
})


# Create a bar chart to visualize the metrics
plt.figure(figsize=(8, 5))
plt.bar(metrics_df['Metric'], metrics_df['Value'],color='skyblue')
plt.xlabel('Metric')
plt.ylabel('Value')
plt.title('Evaluation Metrics for SVM')
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()


# Logistic regresssion

In [None]:
log= LogisticRegression()

In [None]:
log.fit(X_train,Y_train)

In [None]:
pred_log = log.predict(X_test)
pred_log



In [None]:
print(classification_report(Y_test, pred_log))

In [None]:
# Calculate the accuracy score for the default Logistic regresssion model
accuracy_log = accuracy_score(Y_test, pred_log)
print("Accuracy score for default Logistic regresssion model:", accuracy_log)

# Turning for Logistic regresssion

In [None]:
# defining parameter range
param_grid = {'C': [0.01, 0.1, 1, 10, 100],
               'solver': ['lbfgs', 'sag', 'newton-cg']}

 
gridlog = GridSearchCV(LogisticRegression(), param_grid, refit = True, verbose = 3)
  
# fitting the model for grid search
gridlog.fit(X_train, Y_train)

In [None]:
print(gridlog.best_params_)

In [None]:
#Let's run our SVC again with the best parameters.
log2 = LogisticRegression(C= 1,
    solver= 'sag',)
log2.fit(X_train, Y_train)
pred_log2 = log2.predict(X_test)
print(classification_report(Y_test, pred_log2))

In [None]:
#confusion matrix
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
cm = confusion_matrix(Y_test, pred_log2, labels=gridlog.classes_)
disp = ConfusionMatrixDisplay(confusion_matrix=cm,display_labels=gridlog.classes_)
fig = plt.figure(figsize=(5, 5))
disp.plot(cmap=plt.cm.Blues) 
plt.grid(which='major')     #remove cell gridlines
plt.gcf().set_size_inches(6, 6)   # Adjust the size of the plot
plt.show()

In [None]:
# Calculate the accuracy score for the Logistic regresssion model with the best parameters
accuracy_log2 = accuracy_score(Y_test, pred_log2)
print("Accuracy score for Logistic regresssion model with best parameters:", accuracy_log2)

In [None]:
# Calculate and store the metrics
accuracy = accuracy_score(Y_test,pred_log2)
f1 = f1_score(Y_test, pred_log2)
precision = precision_score(Y_test, pred_log2)
recall = recall_score(Y_test, pred_log2)

# Create a DataFrame to store the metrics
metrics_df = pd.DataFrame({
    'Metric': ['Accuracy', 'F1-Score', 'Precision', 'Recall'],
    'Value': [accuracy, f1, precision, recall]
})

# Create a bar chart to visualize the metrics
plt.figure(figsize=(8, 5))
plt.bar(metrics_df['Metric'], metrics_df['Value'],color='skyblue')
plt.xlabel('Metric')
plt.ylabel('Value')
plt.title('Evaluation Metrics for Logistic regresssion')
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()


# KNN

In [None]:
knn=KNeighborsClassifier()
knn.fit(X_train, Y_train)

In [None]:
pred_knn = knn.predict(X_test)
pred_knn


In [None]:
print(classification_report(Y_test, pred_knn))

In [None]:
# Calculate the accuracy score for the default KNN model
accuracy_knn = accuracy_score(Y_test, pred_knn)
print("Accuracy score for default KNN model:", accuracy_knn)

# Turning for K Nearest Neighbors

In [None]:
from sklearn.model_selection import GridSearchCV
# defining parameter range
param_grid = {'n_neighbors': [1,3,5,7,9,11,13,15,17,19],  #odd numbers because there are 2 classes in target coulmn
              'weights': ['distance', 'uniform']}  
gridKNN = GridSearchCV(KNeighborsClassifier(), param_grid, refit = True, verbose = 3)
  
# fitting the model for grid search
gridKNN.fit(X_train, Y_train)

In [None]:
print(gridKNN.best_params_)

In [None]:
#Let's run our SVC again with the best parameters.
knn2 = KNeighborsClassifier(n_neighbors = 13, weights= 'distance')
knn2.fit(X_train, Y_train)
pred_knn2 = knn2.predict(X_test)
print(classification_report(Y_test, pred_knn2))

In [None]:
#confusion matrix
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
cm = confusion_matrix(Y_test, pred_knn2, labels=gridKNN.classes_)
disp = ConfusionMatrixDisplay(confusion_matrix=cm,display_labels=gridKNN.classes_)
fig = plt.figure(figsize=(5, 5))
disp.plot(cmap=plt.cm.Blues) 
plt.grid(which='major')     #remove cell gridlines
plt.gcf().set_size_inches(6, 6)   # Adjust the size of the plot
plt.show()

In [None]:
# Calculate the accuracy score for the KNN model with the best parameters
accuracy_knn2 = accuracy_score(Y_test, pred_knn2)
print("Accuracy score for KNN model with best parameters:", accuracy_knn2)

In [None]:
# Calculate and store the metrics
accuracy = accuracy_score(Y_test, pred_knn2)
f1 = f1_score(Y_test, pred_knn2)
precision = precision_score(Y_test, pred_knn2)
recall = recall_score(Y_test, pred_knn2)

# Create a DataFrame to store the metrics
metrics_df = pd.DataFrame({
    'Metric': ['Accuracy', 'F1-Score', 'Precision', 'Recall'],
    'Value': [accuracy, f1, precision, recall]
})

# Create a bar chart to visualize the metrics
plt.figure(figsize=(8, 5))
plt.bar(metrics_df['Metric'], metrics_df['Value'],color='skyblue')
plt.xlabel('Metric')
plt.ylabel('Value')
plt.title('Evaluation Metrics for K-Nearest Neighbors')
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()


# Gaussian Naive Bayes

In [None]:
GNB=GaussianNB()
GNB.fit(X_train, Y_train)

In [None]:
pred_GNB = GNB.predict(X_test)
pred_GNB

In [None]:
print(classification_report(Y_test, pred_GNB))

In [None]:
# Calculate the accuracy score for the default Gaussian Naive Bayes model
accuracy_GNB = accuracy_score(Y_test, pred_GNB)
print("Accuracy score for default Gaussian Naive Bayes model:", accuracy_GNB)

# Turning for Gaussian Naive Bayes

In [None]:
from sklearn.model_selection import GridSearchCV
# defining parameter range
param_grid = {'var_smoothing': np.logspace(0, -9, num=100)}

gridGNB = GridSearchCV(GaussianNB(), param_grid, refit = True, verbose = 5)
  
# fitting the model for grid search
gridGNB.fit(X_train, Y_train)


In [None]:
print(gridGNB.best_params_)

In [None]:
#Let's run our SVC again with the best parameters.
GNB2 = GaussianNB(var_smoothing = 0.0657933224657568)
GNB2.fit(X_train, Y_train)
pred_GNB2 = GNB2.predict(X_test)
print(classification_report(Y_test, pred_GNB2))

In [None]:
# Calculate the accuracy score for the Gaussian Naive Bayes model with the best parameters
accuracy_GNB2 = accuracy_score(Y_test, pred_GNB2)
print("Accuracy score for Gaussian Naive Bayes model with best parameters:", accuracy_GNB2)

In [None]:
#confusion matrix
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
cm = confusion_matrix(Y_test, pred_GNB2, labels=gridGNB.classes_)
disp = ConfusionMatrixDisplay(confusion_matrix=cm,display_labels=gridGNB.classes_)
fig = plt.figure(figsize=(5, 5))
disp.plot(cmap=plt.cm.Blues) 
plt.grid(which='major')     #remove cell gridlines
plt.gcf().set_size_inches(6, 6)   # Adjust the size of the plot
plt.show()

In [None]:
# Calculate and store the metrics
accuracy = accuracy_score(Y_test, pred_GNB2)
f1 = f1_score(Y_test, pred_GNB2)
precision = precision_score(Y_test, pred_GNB2)
recall = recall_score(Y_test, pred_GNB2)

# Create a DataFrame to store the metrics
metrics_df = pd.DataFrame({
    'Metric': ['Accuracy', 'F1-Score', 'Precision', 'Recall'],
    'Value': [accuracy, f1, precision, recall]
})

# Create a bar chart to visualize the metrics
plt.figure(figsize=(8, 5))
plt.bar(metrics_df['Metric'], metrics_df['Value'],color='skyblue')
plt.xlabel('Metric')
plt.ylabel('Value')
plt.title('Evaluation Metrics for Gaussian Naive Bayes')
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()


# Cross-validation using scikit

In [None]:
from sklearn.model_selection import KFold

kf = KFold(n_splits=5, shuffle=True)

for train_index, test_index in kf.split(X):
    X_train, X_test = X[train_index], X[test_index]
    Y_train, Y_test = Y[train_index], Y[test_index]
    GNB2 = GaussianNB(var_smoothing = 1.2328467394420635e-09)
    GNB2.fit(X_train, Y_train)
    predictions = GNB2.predict(X_test)
    print(classification_report(predictions, Y_test))

In [None]:
# Calculate the accuracy score for the Gaussian Naive Bayes model with the best parameters
accuracy_GNB3 = accuracy_score(Y_test, predictions)
print("Accuracy score for Gaussian Naive Bayes model with best parameters:", accuracy_GNB3)

# Comparison Table

In [None]:
print("       Model              :          Accuracy          :    After Tuning     ")
print("-----------------------------------------------------------------------------")
print("Support Vector Machine    :  ",accuracy_svc,"      :     ",accuracy_svc2  )
print("Logistic regresssion      :  ",accuracy_log,"      :     ",accuracy_log2  )
print("k-nearest neighbours      :  ",accuracy_knn,"      :     ",accuracy_knn2  )
print("Gaussian Naive Bayes      :  ",accuracy_GNB,"      :     ",accuracy_GNB3  )



# Comparison graph- Before Tuning

In [None]:
fig = plt.figure() 

labels = ["SVM", "LG","KNN","NB"]
accuracy_values = [accuracy_svc, accuracy_log, accuracy_knn,accuracy_GNB]
r_accuracy_values = [round(v, 2) for v in accuracy_values]
plt.bar(labels,r_accuracy_values,color='skyblue')

for i,v in enumerate(r_accuracy_values):
    plt.text(i, v/2, str(v), ha='center', color='azure', fontsize=20,fontweight='bold')

plt.xlabel("Algorithm")
plt.ylabel("Accuracy")
plt.title("Accuracy of Different Algorithms")
plt.xticks(rotation=45)
plt.show()

# Comparison graph- Before Tuning vs After Tuning 

In [None]:
import matplotlib.pyplot as plt

# Create lists of labels and accuracy values
labels = ["SVM", "LR", "KNN", "NB"]
accuracy_values = [accuracy_svc, accuracy_log, accuracy_knn,accuracy_GNB]
accuracy_values2 = [accuracy_svc2, accuracy_log2, accuracy_knn2,accuracy_GNB3]

# Define colors for the bars
colors = [ 'darkkhaki','skyblue']

# Create a horizontal bar chart with different widths for the bars
bar_width = 0.3
x = range(len(labels))  # Set the x-axis positions for the bars

plt.figure(figsize=(10, 6))  # Adjust the figure size for better visibility
# Round accuracy values 
r_accuracy_values = [round(v,2) for v in accuracy_values] 
r_accuracy_values2 = [round(v,2) for v in accuracy_values2]

# Annotate text for first set of bars
for i, v in enumerate(r_accuracy_values):
    #x_val = v + 0.01
    x_val = v/2
    y_val = x[i]-0.1
    plt.text(x_val, y_val, str(v), ha='center',color='azure', fontsize=12,fontweight='bold') 
shifted_x = [p + bar_width / 2 for p in x]

# Annotate text for second set of bars  
for i, v in enumerate(r_accuracy_values2):
    x_val = v/2
    y_val = shifted_x[i]
    plt.text(x_val, y_val, str(v), ha='center', color='azure',fontsize=12, fontweight='bold')

# Plot the first set of accuracy values with a bar width of 0.3
plt.barh(x, accuracy_values, bar_width, label='Before Tuning', color=colors[0])


# Shift the x-axis positions slightly to avoid overlapping bars
shifted_x = [p + bar_width / 2 for p in x]

# Plot the second set of accuracy values with a shifted x-axis and a bar width of 0.3
plt.barh(shifted_x, accuracy_values2, bar_width, label='After Tuning', color=colors[1])

# Set limits for the y-axis to include all labels and bars
plt.ylim(-0.5, len(labels) - 0.5)

# Add labels and title
plt.xlabel("Accuracy")
plt.ylabel("Algorithm")
plt.title("Accuracy Comparison:Before Tuning vs After Tuning")


# Add label names to the y-axis
plt.yticks(x, labels)

# Add a legend to distinguish between the two datasets
plt.legend(loc='upper left')

# Show the plot
plt.tight_layout()
plt.show()


# Predictive system

In [None]:
scaler = StandardScaler()
input_data = (13.05,19.31,82.61,527.2,0.0806,0.03789,0.000692,0.004167,0.1819,0.05501,0.404,1.214,2.595,32.96,0.007491,0.008593,0.000692,0.004167,0.0219,0.00299,14.23,22.25,90.24,624.1,0.1021,0.06191,0.001845,0.01111,0.2439,0.06289)
#change the input_data to a numpy array
input_data_as_numpy_array = np.asarray(input_data)

# reshape the numpy array as we are predicting for one data point
input_data_reshaped = input_data_as_numpy_array.reshape(1,-1)

# standardizing the input data
input_data_std = scaler.fit_transform(input_data_reshaped)

prediction = knn2.predict(input_data_std)
print(prediction)

if(prediction[0] == 0):
  print('The tumor is Benign')

else:
  print('The tumor is Malignant')

# Deep Learning techniques:

In [None]:
import tensorflow as tf 
tf.random.set_seed(3)
from tensorflow import keras

# Convolutional Neural Networks (CNNs):

In [None]:
X =breast_cancer_data.drop(columns='target',axis=1)
Y = breast_cancer_data['target']

In [None]:
# Reshape the data into a 3D tensor for CNN input
X = np.array(X).reshape(X.shape[0], X.shape[1], 1)

In [None]:
# Split the data into training and test sets
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.3)

In [None]:
# Normalize the features
X_train = X_train.astype("float32")
X_test = X_test.astype("float32")
X_train = X_train / X_train.max()
X_test = X_test / X_test.max()

In [None]:
# Define the CNN model
model = keras.Sequential()
#model.add(BatchNormalization())
#model.add(BatchNormalization())
model.add(keras.layers.Conv1D(filters=32, kernel_size=3, activation="relu", input_shape=(X_train.shape[1], 1)))
model.add(keras.layers.MaxPooling1D(pool_size=2))
model.add(keras.layers.Flatten())
model.add(keras.layers.Dense(64, activation="relu"))
model.add(keras.layers.Dense(1, activation="sigmoid"))

In [None]:
# Compile the model
model.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])

In [None]:
# Train the model
model.fit(X_train, Y_train, epochs=30, batch_size=32)

# Recurrent Neural Networks (RNNs) Using LSTM Method

In [None]:
X =breast_cancer_data.drop(columns='target',axis=1)
Y = breast_cancer_data['target']

In [None]:
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.3, random_state=2)

In [None]:
scaler = StandardScaler()


In [None]:
X_train_std = scaler.fit_transform(X_train)

X_test_std = scaler.transform(X_test)

In [None]:
# Define the RNN model
model = keras.Sequential()
#model.add(keras.layers.Embedding(input_dim=1000, output_dim=64))
model.add(keras.layers.LSTM(units=64, return_sequences=True, input_shape=(X_train.shape[1], 1)))
model.add(keras.layers.LSTM(units=32))
model.add(keras.layers.Dense(128, activation='relu'),)
model.add(keras.layers.Dense(1, activation="sigmoid"))

In [None]:
# Compile the model
model.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])

In [None]:
# Train the model
history = model.fit(X_train_std, Y_train, validation_split=0.1, epochs=30)

# Multilayer perceptron (MLP)Model

In [None]:
X =breast_cancer_data.drop(columns='target',axis=1)
Y = breast_cancer_data['target']

In [None]:
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=2)

In [None]:
print(X.shape, X_train.shape, X_test.shape)

In [None]:
scaler = StandardScaler()

In [None]:

X_train_std = scaler.fit_transform(X_train)

X_test_std = scaler.transform(X_test)


In [None]:
print(X.shape, X_train.shape, X_test.shape)

In [None]:
# setting up the layers of Neural Network

model = keras.Sequential([
                          keras.layers.Flatten(input_shape=(30,)),
                          keras.layers.Dense(20, activation='relu'),
                          keras.layers.Dense(2, activation='sigmoid')
                            
])

In [None]:
# compiling the Neural Network

model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

In [None]:
 # training the Meural Network

history = model.fit(X_train_std, Y_train, validation_split=0.1, epochs=30)

In [None]:
from sklearn.metrics import roc_curve, roc_auc_score
import matplotlib.pyplot as plt
fig, ax = plt.subplots(figsize=(5, 4))
y_proba = model.predict(X_test)[:, 0]
# Calculate the ROC curve
fpr, tpr, thresholds = roc_curve(Y_test, y_proba)

# Calculate the AUC
auc = roc_auc_score(Y_test, y_proba)

# Plot the ROC curve
plt.plot(fpr, tpr, label='(AUC = %0.2f)' % auc)

# Print the AUC
print('AUC:', auc)

# Show the plot
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('ROC Curve')
plt.legend()
plt.show()
print("")
print('False Positive Rates:', fpr)
print("xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx")
print('True Positive Rates:', tpr)

# Comparison graph 

In [None]:
fig = plt.figure() 

labels = ["CNN", "RNN","MLP"]
accuracy_values = [90.83, 96.37, 99.27]
r_accuracy_values = [round(v, 2) for v in accuracy_values]
plt.bar(labels,r_accuracy_values,color='skyblue')

for i,v in enumerate(r_accuracy_values):
    plt.text(i, v/2, str(v), ha='center', color='azure', fontsize=20,fontweight='bold')

plt.xlabel("Algorithm")
plt.ylabel("Accuracy")
plt.title("Accuracy of Deep neural networks")
plt.xticks(rotation=45)
plt.show()

# Comparison Graph between ML and Deep learning

In [None]:

fig = plt.figure(figsize=(13,8))
labels = ["SVM", "LR", "KNN", "NB","CNN", "RNN","MLP"]
accuracy_values = [accuracy_svc2*100, accuracy_log2*100, accuracy_knn2*100,accuracy_GNB3*100,90.83, 96.37, 99.27]
r_accuracy_values = [round(v, 2) for v in accuracy_values]

# Define a list of colors for each bar
colors = ['skyblue'] * len(labels)
colors[-3:] = ['darkkhaki','darkkhaki','darkkhaki'] # Change the last 3 colors to red, green, and blue

# Plot the bars with the specified colors
plt.bar(labels, r_accuracy_values, color=colors)

# Annotate the bars with their accuracy values
for i, v in enumerate(r_accuracy_values):
    plt.text(i, v/2, str(v), ha='center', color='azure', fontsize=20, fontweight='bold')

plt.xlabel("Algorithm")
plt.ylabel("Accuracy")
plt.title("Accuracy comparison of all models")
plt.xticks(rotation=45)
plt.show()