C:\Dataset\Herlev Dataset\train

In [10]:
!pip install opencv-python-headless numpy


Collecting opencv-python-headless
  Downloading opencv_python_headless-4.11.0.86-cp37-abi3-win_amd64.whl.metadata (20 kB)
Downloading opencv_python_headless-4.11.0.86-cp37-abi3-win_amd64.whl (39.4 MB)
   ---------------------------------------- 0.0/39.4 MB ? eta -:--:--
   ---------------------------------------- 0.3/39.4 MB ? eta -:--:--
   - -------------------------------------- 1.6/39.4 MB 4.7 MB/s eta 0:00:09
   --- ------------------------------------ 3.1/39.4 MB 5.8 MB/s eta 0:00:07
   ---- ----------------------------------- 4.7/39.4 MB 6.3 MB/s eta 0:00:06
   ----- ---------------------------------- 5.8/39.4 MB 6.3 MB/s eta 0:00:06
   ------ --------------------------------- 6.3/39.4 MB 6.0 MB/s eta 0:00:06
   ------ --------------------------------- 6.6/39.4 MB 5.1 MB/s eta 0:00:07
   ------- -------------------------------- 7.1/39.4 MB 4.5 MB/s eta 0:00:08
   ------- -------------------------------- 7.3/39.4 MB 4.2 MB/s eta 0:00:08
   ------- --------------------------------

In [12]:
pip install opencv-python-headless numpy


Note: you may need to restart the kernel to use updated packages.


In [14]:
import cv2        # For image processing tasks
import os         # For file and directory handling
import numpy as np  # For handling image arrays


In [16]:
def apply_gaussian_filter(image, kernel_size=5):
    """
    Apply Gaussian filter to reduce noise in the image.
    :param image: Input image (grayscale or RGB).
    :param kernel_size: Size of the Gaussian kernel.
    :return: Noise-reduced image.
    """
    return cv2.GaussianBlur(image, (kernel_size, kernel_size), 0)

def process_images(dataset_path, classes, img_size=(256, 256)):
    """
    Load, apply Gaussian filter, and resize images from the dataset.
    :param dataset_path: Path to the Herlev dataset.
    :param classes: List of class directories.
    :param img_size: Desired size for resizing the images.
    :return: Processed images and their corresponding labels.
    """
    images = []
    labels = []
    image_names = []

    for label, class_dir in enumerate(classes):
        class_path = os.path.join(dataset_path, class_dir)
        for file_name in os.listdir(class_path):
            file_path = os.path.join(class_path, file_name)
            # Read the image
            image = cv2.imread(file_path, cv2.IMREAD_COLOR)
            if image is not None:
                # Resize the image
                image = cv2.resize(image, img_size)
                # Apply Gaussian filter
                filtered_image = apply_gaussian_filter(image)
                # Store the filtered image, label, and image name
                images.append(filtered_image)
                labels.append(label)
                image_names.append(file_name)
   
    return np.array(images), np.array(labels), image_names

def segment_image(image):
    """
    Perform basic segmentation to isolate nuclei in the image.
    :param image: Input image (assumed to be grayscale or RGB).
    :return: Binary mask of the segmented region.
    """
    # Convert to grayscale if the image is in RGB
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    # Apply Otsu's thresholding
    _, binary_mask = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
    return binary_mask

def apply_segmentation(images):
    """
    Apply segmentation to a list of images.
    :param images: List of images to segment.
    :return: List of segmented masks.
    """
    segmented_images = [segment_image(img) for img in images]
    return np.array(segmented_images)

In [20]:

def calculate_nc_ratio(segmented_image):
    """
    Calculate the Nucleus-to-Cytoplasm (N/C) ratio.
    :param segmented_image: Binary mask where the nucleus is white (255) and the rest is black (0).
    :return: N/C ratio.
    """
    # Total area of the nucleus (white pixels)
    nucleus_area = np.sum(segmented_image == 255)
   
    # Total area of the cell (entire mask)
    total_area = segmented_image.size  # Total number of pixels in the image
   
    # Cytoplasm area = Total area - Nucleus area
    cytoplasm_area = total_area - nucleus_area
   
    # Avoid division by zero
    if cytoplasm_area == 0:
        return 0  # Return 0 if cytoplasm is not detected
   
    # Calculate and return the N/C ratio
    nc_ratio = nucleus_area / cytoplasm_area
    return nc_ratio

def save_images_and_ratios(images, segmented_images, labels, image_names, nc_ratios, output_dir):
    """
    Save the filtered images, segmented images, and N/C ratios to disk and a CSV file.
    :param images: List of filtered images.
    :param segmented_images: List of segmented binary masks.
    :param labels: List of class labels corresponding to the images.
    :param image_names: Original image names from the dataset.
    :param nc_ratios: List of N/C ratios for each segmented image.
    :param output_dir: Directory to save the images and CSV file.
    """
    # Create directories for saving filtered and segmented images
    filtered_dir = os.path.join(output_dir, "filtered_images")
    segmented_dir = os.path.join(output_dir, "segmented_images")
    os.makedirs(filtered_dir, exist_ok=True)
    os.makedirs(segmented_dir, exist_ok=True)

    # Path for the CSV file
    csv_file_path = os.path.join(output_dir, "image_data.csv")
   
    # Write data to the CSV file
    with open(csv_file_path, mode="w", newline="") as csv_file:
        csv_writer = csv.writer(csv_file)
        # Write the header
        csv_writer.writerow(["Image Name", "Class", "N/C Ratio"])
       
        # Save images and write their metadata to the CSV file
        for i, (image, segmented_image, label, nc_ratio, img_name) in enumerate(zip(images, segmented_images, labels, nc_ratios, image_names)):
            # Generate filenames
            filtered_filename = f"{img_name}"
            segmented_filename = f"{img_name}"

            # Save filtered image
            cv2.imwrite(os.path.join(filtered_dir, filtered_filename), image)

            # Save segmented image
            cv2.imwrite(os.path.join(segmented_dir, segmented_filename), segmented_image)

            # Write to CSV
            csv_writer.writerow([img_name, classes[label], nc_ratio])

    print(f"Filtered images, segmented images, and CSV file saved to {output_dir}")

In [26]:
import cv2
import os
import numpy as np
import csv

# Define the input dataset directory
herlev_dataset_path = "C:/Dataset/Herlev Dataset/train"  # Change this to the actual path of your dataset


classes = ['carcinoma_in_situ', 'light_dysplastic','moderate_dysplastic','normal_columnar','normal_intermediate','normal_superficiel','severe_dysplastic']  # Replace with actual class names or directory names

# Load and process the images
print("Processing images...")
images, labels, image_names = process_images(herlev_dataset_path, classes)

# Apply segmentation
print("Applying segmentation...")
segmented_images = apply_segmentation(images)

# Calculate N/C ratios
print("Calculating N/C ratios...")
nc_ratios = [calculate_nc_ratio(segmented_image) for segmented_image in segmented_images]

# Define the output directory for saving the processed images
output_directory = "C:/Dataset/Herlev Dataset/train/Processed_Data"  # Change to the desired output directory

# Save the images and ratios
print("Saving images and data...")
save_images_and_ratios(images, segmented_images, labels, image_names, nc_ratios, output_directory)

print("Processing complete!")


Processing images...
Applying segmentation...
Calculating N/C ratios...
Saving images and data...
Filtered images, segmented images, and CSV file saved to C:/Dataset/Herlev Dataset/train/Processed_Data
Processing complete!


In [28]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import models, transforms
from torch.utils.data import DataLoader, Dataset
import os
import cv2
import numpy as np
from sklearn.model_selection import train_test_split

# Define a custom Dataset class
class CervicalCancerDataset(Dataset):
    def __init__(self, image_paths, labels, transform=None):
        self.image_paths = image_paths
        self.labels = labels
        self.transform = transform

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        img_path = self.image_paths[idx]
        label = self.labels[idx]
        image = cv2.imread(img_path, cv2.IMREAD_COLOR)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)  # Convert BGR to RGB

        if self.transform:
            image = self.transform(image)

        return image, label

# Define data directories and classes
data_dir = "C:/Dataset/Herlev Dataset/train/Processed_Data/filtered_images"  # Update path
csv_file = "C:/Dataset/Herlev Dataset/train/Processed_Data/image_data.csv"  # Update path

# Load image paths and labels
import pandas as pd
data = pd.read_csv(csv_file)
image_paths = [os.path.join(data_dir, name) for name in data["Image Name"]]
labels = data["Class"].values

# Encode labels
from sklearn.preprocessing import LabelEncoder
label_encoder = LabelEncoder()
labels_encoded = label_encoder.fit_transform(labels)

# Train-test split
train_paths, val_paths, train_labels, val_labels = train_test_split(
    image_paths, labels_encoded, test_size=0.2, random_state=42
)

# Define image transformations
transform = transforms.Compose([
    transforms.ToPILImage(),
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),  # SqueezeNet normalization
])

# Create datasets and dataloaders
train_dataset = CervicalCancerDataset(train_paths, train_labels, transform=transform)
val_dataset = CervicalCancerDataset(val_paths, val_labels, transform=transform)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)

# Load SqueezeNet model
model = models.squeezenet1_1(pretrained=True)
model.classifier[1] = nn.Conv2d(512, len(label_encoder.classes_), kernel_size=(1, 1))  # Adjust classifier for your dataset
model.num_classes = len(label_encoder.classes_)
model = model.to("cuda" if torch.cuda.is_available() else "cpu")

# Define loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Training loop
num_epochs = 10
for epoch in range(num_epochs):
    model.train()
    train_loss = 0
    correct = 0
    total = 0

    for images, labels in train_loader:
        images, labels = images.to("cuda"), labels.to("cuda")

        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        train_loss += loss.item()
        _, predicted = outputs.max(1)
        total += labels.size(0)
        correct += predicted.eq(labels).sum().item()

    print(f"Epoch {epoch + 1}/{num_epochs}, Loss: {train_loss / len(train_loader):.4f}, Accuracy: {100 * correct / total:.2f}%")

# Validation loop
model.eval()
val_loss = 0
correct = 0
total = 0

with torch.no_grad():
    for images, labels in val_loader:
        images, labels = images.to("cuda"), labels.to("cuda")
        outputs = model(images)
        loss = criterion(outputs, labels)

        val_loss += loss.item()
        _, predicted = outputs.max(1)
        total += labels.size(0)
        correct += predicted.eq(labels).sum().item()

print(f"Validation Loss: {val_loss / len(val_loader):.4f}, Accuracy: {100 * correct / total:.2f}%")

# Save the trained model
torch.save(model.state_dict(), "squeezenet_cervical_cancer.pth")

# Prediction function
def predict(image_path):
    model.eval()
    image = cv2.imread(image_path, cv2.IMREAD_COLOR)
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    image = transform(image).unsqueeze(0).to("cuda" if torch.cuda.is_available() else "cpu")

    with torch.no_grad():
        output = model(image)
        _, predicted = output.max(1)
        return label_encoder.inverse_transform(predicted.cpu().numpy())[0]

# Example usage
example_image_path = "C:/Dataset/Herlev Dataset/test/example_image.jpg"  # Update path
predicted_class = predict(example_image_path)
print(f"Predicted Class: {predicted_class}")


ModuleNotFoundError: No module named 'torch'

In [32]:
!pip install torch torchvision torchaudio



Collecting torch
  Downloading torch-2.5.1-cp312-cp312-win_amd64.whl.metadata (28 kB)
Collecting torchvision
  Downloading torchvision-0.20.1-cp312-cp312-win_amd64.whl.metadata (6.2 kB)
Collecting torchaudio
  Downloading torchaudio-2.5.1-cp312-cp312-win_amd64.whl.metadata (6.5 kB)
Collecting sympy==1.13.1 (from torch)
  Downloading sympy-1.13.1-py3-none-any.whl.metadata (12 kB)
Downloading torch-2.5.1-cp312-cp312-win_amd64.whl (203.0 MB)
   ---------------------------------------- 0.0/203.0 MB ? eta -:--:--
   ---------------------------------------- 1.0/203.0 MB 7.2 MB/s eta 0:00:28
   ---------------------------------------- 2.4/203.0 MB 6.1 MB/s eta 0:00:33
    --------------------------------------- 3.7/203.0 MB 5.9 MB/s eta 0:00:34
    --------------------------------------- 4.5/203.0 MB 5.6 MB/s eta 0:00:36
   - -------------------------------------- 5.2/203.0 MB 5.1 MB/s eta 0:00:39
   - -------------------------------------- 6.3/203.0 MB 5.0 MB/s eta 0:00:40
   - -------------

In [37]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import models, transforms
from torch.utils.data import DataLoader, Dataset
import os
import cv2
import numpy as np
from sklearn.model_selection import train_test_split

# Define a custom Dataset class
class CervicalCancerDataset(Dataset):
    def __init__(self, image_paths, labels, transform=None):
        self.image_paths = image_paths
        self.labels = labels
        self.transform = transform

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        img_path = self.image_paths[idx]
        label = self.labels[idx]
        image = cv2.imread(img_path, cv2.IMREAD_COLOR)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)  # Convert BGR to RGB

        if self.transform:
            image = self.transform(image)

        return image, label



In [43]:
# Define data directories and classes
data_dir = "C:/Dataset/Herlev Dataset/train/Processed_Data/filtered_images"  # Update path
csv_file = "C:/Dataset/Herlev Dataset/train/Processed_Data/image_data.csv"  # Update path

# Load image paths and labels
import pandas as pd
data = pd.read_csv(csv_file)
image_paths = [os.path.join(data_dir, name) for name in data["Image Name"]]
labels = data["Class"].values





In [45]:
# Encode labels
from sklearn.preprocessing import LabelEncoder
label_encoder = LabelEncoder()
labels_encoded = label_encoder.fit_transform(labels)

# Train-test split
train_paths, val_paths, train_labels, val_labels = train_test_split(
    image_paths, labels_encoded, test_size=0.2, random_state=42
)

# Define image transformations
transform = transforms.Compose([
    transforms.ToPILImage(),
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),  # SqueezeNet normalization
])

# Create datasets and dataloaders
train_dataset = CervicalCancerDataset(train_paths, train_labels, transform=transform)
val_dataset = CervicalCancerDataset(val_paths, val_labels, transform=transform)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)




In [49]:
import torch
print(torch.cuda.is_available())  # Should return True if CUDA is available
print(torch.cuda.get_device_name(0))  # Name of the GPU if available


False


AssertionError: Torch not compiled with CUDA enabled

In [51]:
device = torch.device("cpu")


In [53]:
for images, labels in train_loader:
    images, labels = images.to(device), labels.to(device)
    optimizer.zero_grad()
    outputs = model(images)
    ...


In [60]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import transforms
from torchvision.models import squeezenet1_1, SqueezeNet1_1_Weights

# Set device
device = torch.device("cpu")

# Load model
model = squeezenet1_1(weights=SqueezeNet1_1_Weights.DEFAULT)
model = model.to(device)

# Define optimizer and loss
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Training loop
for epoch in range(num_epochs):
    model.train()
    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

    print(f"Epoch {epoch + 1}/{num_epochs}, Loss: {loss.item():.4f}")


RuntimeError: expected scalar type Long but found Int

In [62]:
for epoch in range(num_epochs):
    model.train()
    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)

        # Ensure labels are in Long format
        labels = labels.long()

        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

    print(f"Epoch {epoch + 1}/{num_epochs}, Loss: {loss.item():.4f}")


Epoch 1/10, Loss: 1.5615
Epoch 2/10, Loss: 1.6234
Epoch 3/10, Loss: 1.8305
Epoch 4/10, Loss: 0.4969
Epoch 5/10, Loss: 1.4279
Epoch 6/10, Loss: 1.1359
Epoch 7/10, Loss: 1.9260
Epoch 8/10, Loss: 0.6427
Epoch 9/10, Loss: 0.7513
Epoch 10/10, Loss: 0.6880


In [66]:
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    for images, labels in train_loader:
        images = images.to(device)
        labels = labels.to(device).long()  # Convert labels to LongTensor

        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

    print(f"Epoch {epoch + 1}/{num_epochs}, Loss: {running_loss / len(train_loader):.4f}")


Epoch 1/10, Loss: 3.5563
Epoch 2/10, Loss: 2.0871
Epoch 3/10, Loss: 1.6817
Epoch 4/10, Loss: 1.5203
Epoch 5/10, Loss: 1.5968
Epoch 6/10, Loss: 1.5228
Epoch 7/10, Loss: 1.3099
Epoch 8/10, Loss: 1.3348
Epoch 9/10, Loss: 1.3056
Epoch 10/10, Loss: 1.1935


In [70]:
model.eval()
correct = 0
total = 0
with torch.no_grad():
    for images, labels in val_loader:
        images, labels = images.to(device), labels.to(device).long()
        outputs = model(images)
        _, predicted = torch.max(outputs, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

accuracy = 100 * correct / total
print(f"Validation Accuracy: {accuracy:.2f}%")


Validation Accuracy: 46.51%


In [72]:
def predict_image(image_path, model, transform):
    model.eval()
    image = cv2.imread(image_path)
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    image = transform(image).unsqueeze(0).to(device)

    with torch.no_grad():
        output = model(image)
        _, predicted = torch.max(output, 1)

    return predicted.item()

# Example usage:
sample_image_path = "C:/Dataset/Herlev Dataset/test/carcinoma_in_situ/149315671-149315740-001.BMP"
predicted_class = predict_image(sample_image_path, model, transform)
print(f"Predicted Class: {predicted_class}")



Predicted Class: 0


In [76]:
# Save the trained model
torch.save(model.state_dict(), "squeezenet_cervical_cancer.pth")

# Prediction function
def predict(image_path):
    model.eval()
    image = cv2.imread(image_path, cv2.IMREAD_COLOR)
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    image = transform(image).unsqueeze(0).to("cuda" if torch.cuda.is_available() else "cpu")

    with torch.no_grad():
        output = model(image)
        _, predicted = output.max(1)
        return label_encoder.inverse_transform(predicted.cpu().numpy())[0]

# Example usage
example_image_path = "C:/Dataset/Herlev Dataset/test/severe_dysplastic/153826963-153826970-001.BMP"  # Update path
predicted_class = predict(example_image_path)
print(f"Predicted Class: {predicted_class}")

Predicted Class: moderate_dysplastic
