# **_Initial Stage_**

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
import os

dataset_directory = '/content/drive/MyDrive/Officialdataset'

for dirname, _, filenames in os.walk(dataset_directory):
    for filename in filenames:
        print(os.path.join(dirname, filename))

In [None]:
!pip install tensorflow

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
import keras
from keras.models import Sequential
from keras.layers import Dense, Conv2D, MaxPooling2D, Flatten, Dropout, BatchNormalization
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix
from keras.callbacks import ReduceLROnPlateau
import cv2
import os
import ipywidgets as widgets
import io
from PIL import Image
import tqdm
from sklearn.utils import shuffle
import tensorflow as tf
from keras.optimizers import SGD
from tensorflow.keras.preprocessing import image
import numpy as np

In [None]:
X_train = []
Y_train = []
image_size = 150
labels = ['Normal','Malignant','Benign']
for i in labels:
    folderPath = os.path.join('/content/drive/MyDrive/Officialdataset (AUGMENTED)/Traning',i)
    for j in os.listdir(folderPath):
        img = cv2.imread(os.path.join(folderPath,j))
        img = cv2.resize(img,(image_size,image_size))
        X_train.append(img)
        Y_train.append(i)

for i in labels:
    folderPath = os.path.join('/content/drive/MyDrive/Officialdataset (AUGMENTED)/Testing',i)
    for j in os.listdir(folderPath):
        img = cv2.imread(os.path.join(folderPath,j))
        img = cv2.resize(img,(image_size,image_size))
        X_train.append(img)
        Y_train.append(i)

X_train = np.array(X_train)
Y_train = np.array(Y_train)


In [None]:
X_train,Y_train = shuffle(X_train,Y_train,random_state=101)
X_train.shape

In [None]:
X_train,X_test,y_train,y_test = train_test_split(X_train,Y_train,test_size=0.3,random_state=101)

In [None]:
y_train_new = []
for i in y_train:
    y_train_new.append(labels.index(i))
y_train=y_train_new
y_train = tf.keras.utils.to_categorical(y_train)

y_test_new = []
for i in y_test:
    y_test_new.append(labels.index(i))
y_test=y_test_new
y_test = tf.keras.utils.to_categorical(y_test)



---


# ***MODEL 1***


---



In [None]:
model = Sequential()
model.add(Conv2D(32,(3,3),activation = 'relu',input_shape=(150,150,3)))
model.add(Conv2D(64,(3,3),activation='relu'))
model.add(MaxPooling2D(2,2))
model.add(Dropout(0.3))
model.add(Conv2D(64,(3,3),activation='relu'))
model.add(Conv2D(64,(3,3),activation='relu'))
model.add(Dropout(0.3))
model.add(MaxPooling2D(2,2))
model.add(Dropout(0.3))
model.add(Conv2D(128,(3,3),activation='relu'))
model.add(Conv2D(128,(3,3),activation='relu'))
model.add(Conv2D(128,(3,3),activation='relu'))
model.add(MaxPooling2D(2,2))
model.add(Dropout(0.3))
model.add(Conv2D(128,(3,3),activation='relu'))
model.add(Conv2D(256,(3,3),activation='relu'))
model.add(MaxPooling2D(2,2))
model.add(Dropout(0.3))
model.add(Flatten())
model.add(Dense(512,activation = 'relu'))
model.add(Dense(512,activation = 'relu'))
model.add(Dropout(0.3))
model.add(Dense(3,activation='softmax'))

In [None]:
model.compile(loss='categorical_crossentropy',optimizer='adam',metrics=['accuracy'])

In [None]:
model.summary()

In [None]:
history = model.fit(X_train, y_train, epochs=10, validation_split=0.3, batch_size=256)

In [None]:
# Extract accuracy values from the history object
train_accuracy = history.history['accuracy']
val_accuracy = history.history['val_accuracy']

# Create a range of epochs
epochs = range(1, len(train_accuracy) + 1)

# Plot training and validation accuracy
plt.figure(figsize=(12, 6))
plt.plot(epochs, train_accuracy, 'r', label="Training Accuracy")
plt.plot(epochs, val_accuracy, 'b', label="Validation Accuracy")
plt.title('Training and Validation Accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend()
plt.grid(True)
plt.show()

In [None]:
train_loss = history.history['loss']
val_loss = history.history['val_loss']

# Create a range of epochs
epochs = range(1, len(train_loss) + 1)

# Plot training and validation loss
plt.figure(figsize=(12, 6))
plt.plot(epochs, train_loss, 'r', label="Training Loss")
plt.plot(epochs, val_loss, 'b', label="Validation Loss")
plt.title('Training and Validation Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.grid(True)
plt.show()

In [None]:
def load_and_preprocess_image(image_path, target_size=(150, 150)):
    img = cv2.imread(image_path)
    img = cv2.resize(img, target_size)
    img_array = np.array(img)
    img_array = img_array.reshape(1, target_size[0], target_size[1], 3)
    return img_array

# Specify the path to the image
image_path = '/content/drive/MyDrive/Officialdataset (AUGMENTED)/Testing/Benign/Benign_original_Bengin case (104).jpg_c06894e7-6305-477b-b5c4-8c3868c9fdf2.jpg'

# Load and preprocess the image
img_array = load_and_preprocess_image(image_path)

# Display the image
img = image.load_img(image_path)
plt.imshow(img, interpolation='nearest')
plt.show()

In [None]:
a = model.predict(img_array)
predicted_class = a.argmax()

# Define a dictionary to map class numbers to tumor information
tumor_info = {
    0: {
        'name': 'Normal',
        'details': 'Normal lung tissue without cancerous growth.',
        'precautions': 'No specific precautions needed.',
        'treatment': 'No cancer treatment required.'
    },
    1: {
        'name': 'Malignant',
        'details': 'Cancerous growth in the lung with potential for metastasis.',
        'precautions': 'Seek immediate medical attention. Avoid smoking and exposure to carcinogens.',
        'treatment': 'Treatment options include surgery, chemotherapy, and radiation therapy.'
    },
    2: {
        'name': 'Benign',
        'details': 'Non-cancerous growth or tumor in the lung.',
        'precautions': 'Monitor for any changes in health. Consult a healthcare professional.',
        'treatment': 'Treatment may be required only if the tumor causes symptoms.'
    }
}

# Display tumor information based on the predicted class
print("Predicted Tumor Type:", tumor_info[predicted_class]['name'])
print("Details:", tumor_info[predicted_class]['details'])
print("Precautions:", tumor_info[predicted_class]['precautions'])
print("Treatment:", tumor_info[predicted_class]['treatment'])


In [None]:
from sklearn.metrics import confusion_matrix, classification_report

y_pred = model.predict(X_test)

y_pred_classes = np.argmax(y_pred, axis=1)
y_true = np.argmax(y_test, axis=1)

confusion_mtx = confusion_matrix(y_true, y_pred_classes)

import seaborn as sns
plt.figure(figsize=(8, 6))
sns.heatmap(confusion_mtx, annot=True, fmt='d', cmap='Blues', xticklabels=labels, yticklabels=labels)
plt.xlabel('Predicted')
plt.ylabel('True')
plt.title('Confusion Matrix')
plt.show()

class_report = classification_report(y_true, y_pred_classes, target_names=labels)
print(class_report)


In [None]:
# Generate confusion matrix for Model 1 training set
y_train_pred_model1 = model.predict(X_train)
y_train_pred_classes_model1 = np.argmax(y_train_pred_model1, axis=1)
confusion_mtx_model1 = confusion_matrix(np.argmax(y_train, axis=1), y_train_pred_classes_model1)

# Plot the confusion matrix
plt.figure(figsize=(8, 6))
sns.heatmap(confusion_mtx_model1, annot=True, fmt='d', cmap='Blues', xticklabels=labels, yticklabels=labels)
plt.xlabel('Predicted')
plt.ylabel('True')
plt.title('Confusion Matrix - Model 1 (Training Set)')
plt.show()

# Print the classification report
class_report_model1 = classification_report(np.argmax(y_train, axis=1), y_train_pred_classes_model1, target_names=labels)
print("Classification Report - Model 1 (Training Set):\n", class_report_model1)




---


# ***MODEL 2***


---



In [None]:
from tensorflow.keras.optimizers import Adam
model2 = Sequential()

model2.add(Conv2D(32, (3, 3), activation='relu', input_shape=(150, 150, 3)))
model2.add(MaxPooling2D(pool_size=(2, 2)))

model2.add(Conv2D(64, (3, 3), activation='relu'))
model2.add(MaxPooling2D(pool_size=(2, 2)))

model2.add(Flatten())

model2.add(Dense(128, activation='relu'))
model2.add(Dropout(0.5))

model2.add(Dense(3, activation='softmax'))

model2.compile(optimizer=Adam(learning_rate=0.001), loss='categorical_crossentropy', metrics=['accuracy'])

model2.summary()

In [None]:
history2 = model2.fit(X_train, y_train, epochs=10, validation_split=0.3)

In [None]:
# Accessing accuracy values for model2
train_accuracy2 = history2.history['accuracy']
val_accuracy2 = history2.history['val_accuracy']

# Creating a range of epochs
epochs = range(1, len(train_accuracy2) + 1)

# Plotting training and validation accuracy for model2
plt.figure(figsize=(12, 6))
plt.plot(epochs, train_accuracy2, 'r', label="Training Accuracy")
plt.plot(epochs, val_accuracy2, 'b', label="Validation Accuracy")
plt.title('Training and Validation Accuracy (Model 2)')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend()
plt.grid(True)
plt.show()

In [None]:
acc2 = history2.history['accuracy']
val_acc2 = history2.history['val_accuracy']
loss2 = history2.history['loss']
val_loss2 = history2.history['val_loss']
epochs = range(len(acc2))

plt.figure(figsize=(14, 7))
plt.subplot(1, 2, 1)
plt.plot(epochs, acc2, 'r', label="Training Accuracy")
plt.plot(epochs, val_acc2, 'b', label="Validation Accuracy")
plt.title('Training and Validation Accuracy')
plt.legend(loc='best')

# Plotting training and validation loss
plt.subplot(1, 2, 2)
plt.plot(epochs, loss2, 'r', label="Training Loss")
plt.plot(epochs, val_loss2, 'b', label="Validation Loss")
plt.title('Training and Validation Loss')
plt.legend(loc='best')
plt.show()

In [None]:
# Specify the path to the image
image_path = '/content/drive/MyDrive/Officialdataset (AUGMENTED)/Testing/Benign/Benign_original_Bengin case (104).jpg_c06894e7-6305-477b-b5c4-8c3868c9fdf2.jpg'

# Load and preprocess the image
img_array = load_and_preprocess_image(image_path)

# Display the image
img = image.load_img(image_path)
plt.imshow(img, interpolation='nearest')
plt.show()

In [None]:
a = model2.predict(img_array)
predicted_class2 = a.argmax()

# Define a dictionary to map class numbers to tumor information
tumor_info = {
    0: {
        'name': 'Normal',
        'details': 'Normal lung tissue without cancerous growth.',
        'precautions': 'No specific precautions needed.',
        'treatment': 'No cancer treatment required.'
    },
    1: {
        'name': 'Malignant',
        'details': 'Cancerous growth in the lung with potential for metastasis.',
        'precautions': 'Seek immediate medical attention. Avoid smoking and exposure to carcinogens.',
        'treatment': 'Treatment options include surgery, chemotherapy, and radiation therapy.'
    },
    2: {
        'name': 'Benign',
        'details': 'Non-cancerous growth or tumor in the lung.',
        'precautions': 'Monitor for any changes in health. Consult a healthcare professional.',
        'treatment': 'Treatment may be required only if the tumor causes symptoms.'
    }
}

# Display tumor information based on the predicted class
print("Predicted Tumor Type:", tumor_info[predicted_class2]['name'])
print("Details:", tumor_info[predicted_class2]['details'])
print("Precautions:", tumor_info[predicted_class2]['precautions'])
print("Treatment:", tumor_info[predicted_class2]['treatment'])


In [None]:
from sklearn.metrics import confusion_matrix, classification_report

y_pred = model2.predict(X_test)

y_pred_classes = np.argmax(y_pred, axis=1)
y_true = np.argmax(y_test, axis=1)

confusion_mtx = confusion_matrix(y_true, y_pred_classes)

import seaborn as sns
plt.figure(figsize=(8, 6))
sns.heatmap(confusion_mtx, annot=True, fmt='d', cmap='Blues', xticklabels=labels, yticklabels=labels)
plt.xlabel('Predicted')
plt.ylabel('True')
plt.title('Confusion Matrix')
plt.show()

class_report = classification_report(y_true, y_pred_classes, target_names=labels)
print(class_report)


In [None]:
# Generate confusion matrix for Model 2 training set
y_train_pred_model2 = model2.predict(X_train)
y_train_pred_classes_model2 = np.argmax(y_train_pred_model2, axis=1)
confusion_mtx_model2 = confusion_matrix(np.argmax(y_train, axis=1), y_train_pred_classes_model2)

# Plot the confusion matrix
plt.figure(figsize=(8, 6))
sns.heatmap(confusion_mtx_model2, annot=True, fmt='d', cmap='Blues', xticklabels=labels, yticklabels=labels)
plt.xlabel('Predicted')
plt.ylabel('True')
plt.title('Confusion Matrix - Model 2 (Training Set)')
plt.show()

# Print the classification report
class_report_model2 = classification_report(np.argmax(y_train, axis=1), y_train_pred_classes_model2, target_names=labels)
print("Classification Report - Model 2 (Training Set):\n", class_report_model2)




---


# ***MODEL 3***


---



In [None]:
model3 = Sequential()

model3.add(Conv2D(32, (3, 3), activation='relu', input_shape=(150, 150, 3)))
model3.add(MaxPooling2D(pool_size=(2, 2)))
model3.add(Conv2D(64, (3, 3), activation='relu'))
model3.add(MaxPooling2D(pool_size=(2, 2)))
model3.add(Conv2D(128, (3, 3), activation='relu'))
model3.add(MaxPooling2D(pool_size=(2, 2)))
model3.add(Flatten())
model3.add(Dense(128, activation='relu'))
model3.add(Dropout(0.5))
model3.add(Dense(3, activation='softmax'))


model3.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

model3.summary()
history3 = model3.fit(X_train,y_train,epochs=10,validation_split=0.3)

In [None]:
3# Specify the path to the image
image_path = '/content/drive/MyDrive/Officialdataset (AUGMENTED)/Testing/Benign/Benign_original_Bengin case (104).jpg_c06894e7-6305-477b-b5c4-8c3868c9fdf2.jpg'

# Load and preprocess the image
img_array = load_and_preprocess_image(image_path)

# Display the image
img = image.load_img(image_path)
plt.imshow(img, interpolation='nearest')
plt.show()

In [None]:
a = model3.predict(img_array)
predicted_class3 = a.argmax()

# Define a dictionary to map class numbers to tumor information
tumor_info = {
    0: {
        'name': 'Normal',
        'details': 'Normal lung tissue without cancerous growth.',
        'precautions': 'No specific precautions needed.',
        'treatment': 'No cancer treatment required.'
    },
    1: {
        'name': 'Malignant',
        'details': 'Cancerous growth in the lung with potential for metastasis.',
        'precautions': 'Seek immediate medical attention. Avoid smoking and exposure to carcinogens.',
        'treatment': 'Treatment options include surgery, chemotherapy, and radiation therapy.'
    },
    2: {
        'name': 'Benign',
        'details': 'Non-cancerous growth or tumor in the lung.',
        'precautions': 'Monitor for any changes in health. Consult a healthcare professional.',
        'treatment': 'Treatment may be required only if the tumor causes symptoms.'
    }
}

# Display tumor information based on the predicted class
print("Predicted Tumor Type:", tumor_info[predicted_class3]['name'])
print("Details:", tumor_info[predicted_class3]['details'])
print("Precautions:", tumor_info[predicted_class3]['precautions'])
print("Treatment:", tumor_info[predicted_class3]['treatment'])

In [None]:
# Generate confusion matrix for Model 3 training set
y_train_pred_model3 = model3.predict(X_train)
y_train_pred_classes_model3 = np.argmax(y_train_pred_model3, axis=1)
confusion_mtx_model3 = confusion_matrix(np.argmax(y_train, axis=1), y_train_pred_classes_model3)

# Plot the confusion matrix
plt.figure(figsize=(8, 6))
sns.heatmap(confusion_mtx_model3, annot=True, fmt='d', cmap='Blues', xticklabels=labels, yticklabels=labels)
plt.xlabel('Predicted')
plt.ylabel('True')
plt.title('Confusion Matrix - Model 3 (Training Set)')
plt.show()

# Print the classification report
class_report_model3 = classification_report(np.argmax(y_train, axis=1), y_train_pred_classes_model3, target_names=labels)
print("Classification Report - Model 3 (Training Set):\n", class_report_model3)




---


# ***Ensembling & Voting***


---



In [None]:
from statistics import mode

# Make predictions using the three models
predictions_model1 = model.predict(img_array)
predictions_model2 = model2.predict(img_array)
predictions_model3 = model3.predict(img_array)

# Determine the class with the highest prediction confidence for each model
class_model1 = np.argmax(predictions_model1)
class_model2 = np.argmax(predictions_model2)
class_model3 = np.argmax(predictions_model3)

# Create a list to store the votes
votes = [class_model1, class_model2, class_model3]

# Use the mode function to find the most common class (the class with the most votes)
ensemble_prediction = mode(votes)

# Display tumor information based on the ensemble prediction
print("Ensemble Predicted Tumor Type:", tumor_info[ensemble_prediction]['name'])
print("Details:", tumor_info[ensemble_prediction]['details'])
print("Precautions:", tumor_info[ensemble_prediction]['precautions'])
print("Treatment:", tumor_info[ensemble_prediction]['treatment'])


In [None]:
from scipy.stats import mode

def ensemble_predict(models, X):
    # Get predictions from each model
    predictions = [np.argmax(model.predict(X), axis=1) for model in models]

    # Stack predictions and perform majority voting
    predictions = np.stack(predictions, axis=1)
    ensemble_prediction, _ = mode(predictions, axis=1)

    return ensemble_prediction.ravel()

# Assuming you have your models stored in a list
models = [model, model2, model3]

# Generate ensemble predictions on the test set
ensemble_predictions = ensemble_predict(models, X_test)


In [None]:
# If y_test is one-hot encoded
if len(y_test.shape) > 1 and y_test.shape[1] > 1:
    y_test_single_label = np.argmax(y_test, axis=1)
else:
    y_test_single_label = y_test
# Generate the confusion matrix
conf_matrix = confusion_matrix(y_test_single_label, ensemble_predictions)

# Generate the classification report
class_report = classification_report(y_test_single_label, ensemble_predictions, target_names=['Benign', 'Malignant', 'Normal'])

# Print the results
print("Confusion Matrix for Ensemble Model:")
print(conf_matrix)

print("\nClassification Report for Ensemble Model:")
print(class_report)


In [None]:
import pandas as pd
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

# Convert y_test to single-label format if it's one-hot encoded
if len(y_test.shape) > 1 and y_test.shape[1] > 1:
    y_test_single_label = np.argmax(y_test, axis=1)
else:
    y_test_single_label = y_test

# Assuming 'class_report' is a string, we parse it into a pandas DataFrame
report_data = []
lines = class_report.split('\n')
for line in lines[2:-5]:
    row_data = line.split()
    report_data.append({
        'Class': row_data[0],
        'Precision': float(row_data[1]),
        'Recall': float(row_data[2]),
        'F1-Score': float(row_data[3]),
        'Support': int(row_data[4])
    })

# Convert to DataFrame
report_df = pd.DataFrame.from_dict(report_data)

# Add the overall accuracy at the bottom
overall_accuracy = accuracy_score(y_test_single_label, ensemble_predictions)

# Create the new row with the correct number of values
new_row = pd.Series({
    'Class': 'Overall',
    'Precision': '-',
    'Recall': '-',
    'F1-Score': '-',
    'Support': len(y_test_single_label)
})

# Convert the new_row to a DataFrame and concatenate it with the original DataFrame
new_row_df = pd.DataFrame([new_row])
report_df = pd.concat([report_df, new_row_df], ignore_index=True)

# Add the overall accuracy as a new column
report_df['Accuracy'] = overall_accuracy

# Display the table
print(report_df)


In [None]:
import matplotlib.pyplot as plt

# Assuming you have saved the values for training and validation accuracy in lists
# Example lists:
train_accuracies = [1.00, 0.99, 0.96]
val_accuracies = [0.99, 0.98, 0.97]

# Plotting the ensemble accuracy (use real values from your models)
plt.plot(train_accuracies, label='Training Accuracy')
plt.plot(val_accuracies, label='Validation Accuracy')
plt.plot([overall_accuracy]*len(train_accuracies), label='Ensemble Accuracy', linestyle='--')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.title('Model Accuracy')
plt.legend()
plt.show()


In [None]:
import matplotlib.pyplot as plt

# Exclude the 'Overall' row to ensure we're only working with numeric values
numeric_df = report_df[report_df['Precision'] != '-']

# Plotting the ensemble precision, recall, and F1-score
metrics = ['Precision', 'Recall', 'F1-Score']
values = numeric_df[metrics].astype(float).mean().values

plt.plot(metrics, values, marker='o')
plt.title('Ensemble Model Performance')
plt.xlabel('Metrics')
plt.ylabel('Score')
plt.ylim(0, 1)
plt.show()
