In [1]:
!pip install kaggle



In [None]:
from google.colab import files
files.upload()

In [None]:
!mkdir -p ~/.kaggle
!cp kaggle.json ~/.kaggle/
!chmod 600 ~/.kaggle/kaggle.json

In [None]:
!kaggle datasets download -d mohamedhanyyy/chest-ctscan-images

In [None]:
!unzip chest-ctscan-images.zip -d chest_ctscan_images

In [None]:
import os
import cv2
import numpy as np

data = []
labels = []
dataset_dir = 'chest_ctscan_images'

# Load and preprocess the images
for subdir, dirs, files in os.walk(dataset_dir):
    for file in files:
        if file.lower().endswith(('.jpg', '.jpeg', '.png', '.bmp')):
            label = 1 if 'cancer' in subdir.lower() else 0  # Adjust this based on your dataset's structure
            image_path = os.path.join(subdir, file)
            img = cv2.imread(image_path)
            if img is not None:
                img = cv2.resize(img, (224, 224))  # Resize image for VGG16
                img = img / 255.0  # Normalize pixel values
                data.append(img)
                labels.append(label)

# Convert lists to numpy arrays
data = np.array(data)
labels = np.array(labels)

print(f"Loaded {len(data)} images.")

# Split the data into training and testing sets
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(data, labels, test_size=0.2, random_state=42)

In [None]:
from tensorflow.keras.applications import VGG16
from tensorflow.keras import layers, models, optimizers

vgg_base = VGG16(weights='imagenet', include_top=False, input_shape=(224, 224, 3))

# Freeze the convolutional base
for layer in vgg_base.layers:
    layer.trainable = False

# Add custom layers on top of VGG16
model = models.Sequential([
    vgg_base,
    layers.Flatten(),
    layers.Dense(512, activation='relu'),
    layers.Dropout(0.5),
    layers.Dense(1, activation='sigmoid')
])

# Compile the model
model.compile(optimizer=optimizers.Adam(learning_rate=1e-4),
              loss='binary_crossentropy',
              metrics=['accuracy'])

model.summary()

In [None]:
# Train the model
history = model.fit(X_train, y_train, epochs=10, validation_data=(X_test, y_test), batch_size=32)

In [None]:
# Unfreeze the last few layers of VGG16
for layer in vgg_base.layers[-4:]:
    layer.trainable = True

# Recompile the model with a lower learning rate for fine-tuning
model.compile(optimizer=optimizers.Adam(learning_rate=1e-5),
              loss='binary_crossentropy',
              metrics=['accuracy'])

# Fine-tune the model
history_fine = model.fit(X_train, y_train, epochs=5, validation_data=(X_test, y_test), batch_size=32)

In [None]:
# Evaluate the model on the test set
test_loss, test_acc = model.evaluate(X_test, y_test)
print(f"Test Accuracy: {test_acc:.2f}")


In [None]:
import matplotlib.pyplot as plt

# Plot training & validation accuracy values
plt.figure(figsize=(12, 4))
plt.subplot(1, 2, 1)
plt.plot(history.history['accuracy'], label='Train Accuracy')
plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
plt.plot(history_fine.history['accuracy'], label='Fine-tuned Train Accuracy')
plt.plot(history_fine.history['val_accuracy'], label='Fine-tuned Validation Accuracy')
plt.title('Model Accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend()

# Plot training & validation loss values
plt.subplot(1, 2, 2)
plt.plot(history.history['loss'], label='Train Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.plot(history_fine.history['loss'], label='Fine-tuned Train Loss')
plt.plot(history_fine.history['val_loss'], label='Fine-tuned Validation Loss')
plt.title('Model Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()

plt.show()

In [None]:
import matplotlib.pyplot as plt

# Plot training and validation accuracy
plt.figure(figsize=(10, 5))
plt.plot(history.history['accuracy'], label='Train Accuracy', color='blue')
plt.plot(history.history['val_accuracy'], label='Validation Accuracy', color='orange')
plt.plot(history_fine.history['accuracy'], label='Fine-tuned Train Accuracy', color='green')
plt.plot(history_fine.history['val_accuracy'], label='Fine-tuned Validation Accuracy', color='red')
plt.title('Training and Validation Accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend()
plt.grid(True)
plt.show()

In [None]:
# Plot training and validation loss
plt.figure(figsize=(10, 5))
plt.plot(history.history['loss'], label='Train Loss', color='blue')
plt.plot(history.history['val_loss'], label='Validation Loss', color='orange')
plt.plot(history_fine.history['loss'], label='Fine-tuned Train Loss', color='green')
plt.plot(history_fine.history['val_loss'], label='Fine-tuned Validation Loss', color='red')
plt.title('Training and Validation Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.grid(True)
plt.show()

In [None]:
model.save('vgg16_lung_cancer_detection_finetuned.h5')

In [None]:
from google.colab import files
uploaded = files.upload()
new_image_filename = list(uploaded.keys())[0]

In [None]:
new_image_path = new_image_filename
new_image = cv2.imread(new_image_path)
new_image = cv2.resize(new_image, (224, 224))
new_image = new_image / 255.0  # Normalize pixel values
new_image = np.expand_dims(new_image, axis=0)  # Add batch dimension

In [None]:
prediction = model.predict(new_image)

if prediction[0] > 0.5:
    print("Prediction: Cancerous Lung")
else:
    print("Prediction: Non-Cancerous Lung")
