In [None]:
#Augmentation
# Mount Google Drive
from google.colab import drive
drive.mount("/content/drive")

# Imports
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import cv2
import sklearn
import random
import tensorflow as tf
import os
from tqdm import tqdm
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import EfficientNetB0
from tensorflow.keras.callbacks import ReduceLROnPlateau, TensorBoard, ModelCheckpoint

# Constants
base_path = '/content/drive/MyDrive/Augmentation'  # NO train/test folder
labels = ['NORMAL', 'PNEUMONIA']
IMAGE_SIZE = 224
BATCH_SIZE = 32
VERBOSE = 1

# Load Data
x_data = []
y_data = []

print("Starting data loading...")
for label in labels:
    folderPath = os.path.join(base_path, label)
    if not os.path.exists(folderPath):
        print(f"Missing folder: {folderPath}")
        continue
    for filename in tqdm(os.listdir(folderPath), desc=f"Loading {label} images"):
        img_path = os.path.join(folderPath, filename)
        img = cv2.imread(img_path)
        if img is not None:
            img = cv2.resize(img, (IMAGE_SIZE, IMAGE_SIZE))
            x_data.append(img)
            y_data.append(label)
        else:
            print(f"Unreadable image: {img_path}")

x_data = np.array(x_data)
y_data = np.array(y_data)
x_data, y_data = sklearn.utils.shuffle(x_data, y_data, random_state=0)
print("✅ Dataset loading complete.")
print("Total images loaded:", len(x_data))

# Class balance plot
print("Plotting class distribution...")
sns.countplot(y_data)
plt.title("Class Distribution")
plt.show()

# Display sample images
for label in labels:
    for idx, y in enumerate(y_data):
        if y == label:
            plt.figure(figsize=(5, 5))
            plt.imshow(x_data[idx])
            plt.title(label)
            plt.axis('off')
            plt.show()
            break

# Data Augmentation — safe version (no .fit)
print("Applying image augmentation...")
datagen = ImageDataGenerator(
    rotation_range=30,
    width_shift_range=0.1,
    height_shift_range=0.1,
    zoom_range=0.2,
    horizontal_flip=True
)
# No .fit(x_data) here to prevent memory issues

# Split Data
print("Splitting data into train and test...")
x_train, x_test, y_train, y_test = train_test_split(
    x_data, y_data, test_size=0.10, random_state=0)

print("Shapes - X: Train:", x_train.shape, " Test:", x_test.shape)
print("Shapes - Y: Train:", y_train.shape, " Test:", y_test.shape)

# One-hot encode labels
print("Encoding labels...")
y_train = tf.keras.utils.to_categorical([labels.index(i) for i in y_train])
y_test = tf.keras.utils.to_categorical([labels.index(i) for i in y_test])

# Load EfficientNetB0
print("Loading EfficientNetB0 model...")
effnet = EfficientNetB0(weights='imagenet', include_top=False, input_shape=(IMAGE_SIZE, IMAGE_SIZE, 3))

# Add custom layers
x = tf.keras.layers.GlobalAveragePooling2D()(effnet.output)
x = tf.keras.layers.Dropout(0.5)(x)
output = tf.keras.layers.Dense(2, activation='softmax')(x)
model = tf.keras.models.Model(inputs=effnet.input, outputs=output)

# Compile model
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
model.summary()

# Callbacks
tensorboard = TensorBoard(log_dir='logs')
checkpoint = ModelCheckpoint("effnet.h5", monitor="val_accuracy", save_best_only=True, verbose=1)
reduce_lr = ReduceLROnPlateau(monitor='val_accuracy', factor=0.3, patience=2, min_delta=0.001, verbose=VERBOSE)

# Train model
print("Starting training...")
history = model.fit(
    x_train, y_train,
    validation_split=0.1,
    epochs=10,
    batch_size=BATCH_SIZE,
    verbose=VERBOSE,
    callbacks=[tensorboard, checkpoint, reduce_lr]
)

# Plot accuracy and loss
print("Plotting training results...")
epochs = range(10)
fig, ax = plt.subplots(1, 2, figsize=(14, 7))

ax[0].plot(epochs, history.history['accuracy'], 'go-', label='Training Accuracy')
ax[0].plot(epochs, history.history['val_accuracy'], 'ro-', label='Validation Accuracy')
ax[0].set_title('Training & Validation Accuracy')
ax[0].legend()
ax[0].set_xlabel('Epochs')
ax[0].set_ylabel('Accuracy')

ax[1].plot(epochs, history.history['loss'], 'g-o', label='Training Loss')
ax[1].plot(epochs, history.history['val_loss'], 'r-o', label='Validation Loss')
ax[1].set_title('Training & Validation Loss')
ax[1].legend()
ax[1].set_xlabel('Epochs')
ax[1].set_ylabel('Loss')

plt.show()

# Evaluate model
print("Evaluating on test set...")
result = model.evaluate(x_test, y_test)
print("✅ Test Loss:", result[0])
print("✅ Test Accuracy:", result[1] * 100, "%")

# Predictions
print("Generating predictions...")
predictions = model.predict(x_test)
pred_classes = np.argmax(predictions, axis=1)
true_classes = np.argmax(y_test, axis=1)

# Classification report
print("Classification Report:\n", classification_report(true_classes, pred_classes, target_names=labels))

# Confusion matrix
print("Plotting confusion matrix...")
cf_matrix = confusion_matrix(true_classes, pred_classes)
sns.heatmap(cf_matrix, annot=True, cmap='Blues', fmt='g', xticklabels=labels, yticklabels=labels)
plt.title("Confusion Matrix")
plt.xlabel("Predicted")
plt.ylabel("True")
plt.show()
