VGG-19 IMPLEMENTATION

In [None]:
import os
import numpy as np
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
from PIL import Image
from sklearn.model_selection import train_test_split
# from google.colab import drive
import random

In [None]:
# drive.mount('/content/drive')

In [None]:
IMG_SIZE = 224  # 224x224 as mentioned in paper
NUM_SAMPLES = 500

In [None]:
dataset_path = 'E://data'
train_path = os.path.join(dataset_path, 'train')
test_path = os.path.join(dataset_path, 'test')

In [None]:
train_benign_path = os.path.join(train_path, 'benign')
train_malignant_path = os.path.join(train_path, 'malignant')
test_benign_path = os.path.join(test_path, 'benign')
test_malignant_path = os.path.join(test_path, 'malignant')

In [None]:
print("Checking dataset structure...")
print(f"Dataset folder exists: {os.path.exists(dataset_path)}")
print(f"Train folder exists: {os.path.exists(train_path)}")
print(f"Test folder exists: {os.path.exists(test_path)}")

In [None]:
print("\nTrain folders:")
print(f"  Train/Benign exists: {os.path.exists(train_benign_path)}")
print(f"  Train/Malignant exists: {os.path.exists(train_malignant_path)}")

print("\nTest folders:")
print(f"  Test/Benign exists: {os.path.exists(test_benign_path)}")
print(f"  Test/Malignant exists: {os.path.exists(test_malignant_path)}")

In [None]:
train_benign_count = len([f for f in os.listdir(train_benign_path) if f.endswith('.jpg')])
print(f"  Train/Benign images: {train_benign_count}")

train_malignant_count = len([f for f in os.listdir(train_malignant_path) if f.endswith('.jpg')])
print(f"  Train/Malignant images: {train_malignant_count}")

test_benign_count = len([f for f in os.listdir(test_benign_path) if f.endswith('.jpg')])
print(f"  Test/Benign images: {test_benign_count}")

test_malignant_count = len([f for f in os.listdir(test_malignant_path) if f.endswith('.jpg')])
print(f"  Test/Malignant images: {test_malignant_count}")

Load Images

In [None]:
def load_images(folder_path, label, max_samples=500):
    images = []
    labels = []

    # Get all jpg files
    files = [f for f in os.listdir(folder_path) if f.endswith('.jpg')]

    # Randomly select max_samples files
    if len(files) > max_samples:
        files = random.sample(files, max_samples)

    print(f"Loading {len(files)} images from {folder_path.split('/')[-1]} folder...")

    # Define transforms
    transform = transforms.Compose([
        transforms.Resize((IMG_SIZE, IMG_SIZE)),
        transforms.ToTensor(),  # Converts PIL image to tensor and normalizes to [0,1]
    ])

    for i, filename in enumerate(files):
        try:
            # Load and transform image
            img_path = os.path.join(folder_path, filename)
            img = Image.open(img_path).convert('RGB')
            img_tensor = transform(img)

            # Convert to numpy for consistency with rest of code
            img_array = img_tensor.permute(1, 2, 0).numpy()  # CHW -> HWC

            images.append(img_array)
            labels.append(label)

            if (i + 1) % 100 == 0:
                print(f"  Processed {i + 1}/{len(files)} images")

        except Exception as e:
            print(f"  Error loading {filename}: {e}")
            continue

    return np.array(images), np.array(labels)

In [None]:
train_benign_images, train_benign_labels = load_images(train_benign_path, 0, NUM_SAMPLES)  # 0 for benign
train_malignant_images, train_malignant_labels = load_images(train_malignant_path, 1, NUM_SAMPLES)  # 1 for malignant

In [None]:
# Combine training data
X_train = np.concatenate([train_benign_images, train_malignant_images], axis=0)
y_train = np.concatenate([train_benign_labels, train_malignant_labels], axis=0)

In [None]:
test_benign_images, test_benign_labels = load_images(test_benign_path, 0, NUM_SAMPLES)  # 0 for benign
test_malignant_images, test_malignant_labels = load_images(test_malignant_path, 1, NUM_SAMPLES)  # 1 for malignant

In [None]:
# Combine test data
X_test = np.concatenate([test_benign_images, test_malignant_images], axis=0)
y_test = np.concatenate([test_benign_labels, test_malignant_labels], axis=0)

In [None]:
print(f"\n=== Dataset Summary ===")
print(f"Training set:")
print(f"  Total images: {len(X_train)}")
print(f"  Benign images: {np.sum(y_train == 0)}")
print(f"  Malignant images: {np.sum(y_train == 1)}")

In [None]:
print(f"\nTest set:")
print(f"  Total images: {len(X_test)}")
print(f"  Benign images: {np.sum(y_test == 0)}")
print(f"  Malignant images: {np.sum(y_test == 1)}")

In [None]:
print(f"\nImage specifications:")
print(f"  Image shape: {X_train[0].shape}")
print(f"  Pixel value range: [{X_train.min():.3f}, {X_train.max():.3f}]")