In [None]:

!pip install torch torchvision
!pip install pennylane

In [None]:
import torch
import torchvision
from torchvision import transforms, datasets
from torchvision.transforms import ToTensor
import torch.optim as optim
import torch.nn as nn
import pennylane as qml
from pennylane import numpy as np

use_cuda = torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu")

In [None]:
import os
import time
import shutil
import pathlib
import itertools
from PIL import Image
import cv2
import numpy as np
import pandas as pd
import seaborn as sns

In [None]:
data_dir = '/kaggle/input/lung-and-colon-cancer-histopathological-images/lung_colon_image_set'
filepaths = []
labels = []

folds = os.listdir(data_dir)
for fold in folds:
    foldpath = os.path.join(data_dir, fold)
    flist = os.listdir(foldpath)

    for f in flist:
        f_path = os.path.join(foldpath, f)
        filelist = os.listdir(f_path)

        for file in filelist:
            fpath = os.path.join(f_path, file)
            filepaths.append(fpath)

            if f == 'colon_aca':
                labels.append('Colon Adenocarcinoma')

            elif f == 'colon_n':
                labels.append('Colon Benign Tissue')

            elif f == 'lung_aca':
                labels.append('Lung Adenocarcinoma')

            elif f == 'lung_n':
                labels.append('Lung Benign Tissue')

            elif f == 'lung_scc':
                labels.append('Lung Squamous Cell Carcinoma')


fpaths = pd.Series(filepaths, name= 'filepaths')
labelss = pd.Series(labels, name='labels')
df = pd.concat([fpaths, labelss], axis= 1) #filepaths + labels in 1 df
print(df['labels'].value_counts())

In [None]:
df

In [None]:
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, classification_report
#train test validation datset 
strat = df['labels']
train_df, dummy_df = train_test_split(df,  train_size= 0.8, shuffle= True, random_state=42, stratify= strat)


strat = dummy_df['labels']
valid_df, test_df = train_test_split(dummy_df,  train_size= 0.5, shuffle= True, random_state=42, stratify= strat)

In [None]:
height=224
width=224
channels=3
batch_size=40
img_shape=(height, width, channels)
img_size=(height, width)
length=len(test_df)
test_batch_size=sorted([int(length/n) for n in range(1,length+1) if length % n ==0 and length/n<=80],reverse=True)[0]  
test_steps=int(length/test_batch_size)
print ( 'test batch size: ' ,test_batch_size, '  test steps: ', test_steps)
def scalar(img):
    return img/127.5-1  # scale pixel between -1 and +1

In [None]:
train_df

In [None]:
import pandas as pd



# Extract unique labels
unique_labels = train_df['labels'].unique()

# Create a mapping from string labels to numerical labels
label_map = {label: i for i, label in enumerate(unique_labels)}

# Map string labels to numerical labels in the DataFrame
train_df['numerical_labels'] = train_df['labels'].map(label_map)

# Save the updated DataFrame to a new CSV file
#df.to_csv('updated_data.csv', index=False)

# Display the first few rows of the updated DataFrame
print(train_df)

In [None]:
import torch
from torchvision import transforms
from torch.utils.data import Dataset, DataLoader

# Define a custom PyTorch dataset class
class CustomDataset(Dataset):
    def __init__(self, df, transform=None):
        self.df = df
        self.transform = transform

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        # Assuming your DataFrame has 'image' column containing image data and 'label' column containing labels
        filepath = self.df.iloc[idx]['filepaths']
        label = self.df.iloc[idx]['numerical_labels']
        
        image = Image.open(filepath).convert('RGB')
        
        if self.transform:
            image = self.transform(image)

        return image, label

# Step 1: Convert DataFrame to Custom Dataset
transform = transforms.Compose([
    #transforms.ToPILImage(),  # Convert numpy array to PIL Image
    transforms.Resize((224, 224)),  # Resize images to 32x32
    transforms.ToTensor(),         # Convert to tensor
    #transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))  # Normalize pixel values
])

train_data = CustomDataset(train_df, transform=transform)

In [None]:
import torch.nn as nn
import torch.nn.functional as F
import pennylane as qml

# Define the quantum circuit using PennyLane
n_qubits = 5
dev = qml.device("default.qubit", wires=n_qubits)

@qml.qnode(dev)
def qnode(inputs, weights):
    qml.AngleEmbedding(inputs, wires=range(n_qubits))
    qml.BasicEntanglerLayers(weights, wires=range(n_qubits))
    return [qml.expval(qml.PauliZ(wires=i)) for i in range(n_qubits)]

# Define the QLayer
n_layers = 3
weight_shapes = {"weights": (n_layers, n_qubits)}


# Define a simple CNN architecture
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        # Convolutional layer 1 with 1 input channels (for greyscale images), 16 output channels, and 5x5 kernel
        self.conv1 = nn.Conv2d(3, 4, 5, stride=1, padding=2)
        # Batch normalization after convolutional layer 1
        self.bn1 = nn.BatchNorm2d(4)
        # Max pooling layer with a 2x2 window
        self.pool = nn.MaxPool2d(2, 2)
        # Convolutional layer 2 with 16 input channels (from the previous layer), 32 output channels, and 5x5 kernel
        self.conv2 = nn.Conv2d(4, 16, 5, stride=1, padding=2)
        # Batch normalization after convolutional layer 2
        self.bn2 = nn.BatchNorm2d(16)
        # Quantum layer
        self.qlayer1 = qml.qnn.TorchLayer(qnode, weight_shapes)
        self.qlayer2 = qml.qnn.TorchLayer(qnode, weight_shapes)
        self.qlayer3 = qml.qnn.TorchLayer(qnode, weight_shapes)
        self.qlayer4 = qml.qnn.TorchLayer(qnode, weight_shapes)

        # Fully connected layers
        self.fc1 = nn.Linear(16 * 56 * 56, 120)
        self.fc2 = nn.Linear(120, 20)
        self.fc3 = nn.Linear(20, 10)

    def forward(self, x):
        # Propagate the input through the CNN layers
        x = self.pool(F.relu(self.bn1(self.conv1(x))))
        x = self.pool(F.relu(self.bn2(self.conv2(x))))
        # Flatten the output from the convolutional layers
        x = x.view(-1, 16 * 56* 56)
        # Pass the output to the quantum layer
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x_1, x_2, x_3, x_4 = torch.split(x, 5, dim=1)
        x_1 = self.qlayer1(x_1)
        x_2 = self.qlayer2(x_2)
        x_3 = self.qlayer3(x_3)
        x_4 = self.qlayer4(x_4)
        x = torch.cat([x_1, x_2, x_3, x_4], axis=1)
        x = self.fc3(x)
        return x
     

In [None]:
import datetime
dataset  = train_data

# Initialize your CNN model
cnn = Net()
cnn.to(device)
# Define loss function and optimizer
criterion = nn.CrossEntropyLoss()  # Cross-entropy loss for classification
optimizer = torch.optim.SGD(cnn.parameters(), lr=0.001, momentum=0.9)  # Stochastic Gradient Descent optimizer
# Split your data into training and validation sets
#train_size = int(0.8 * len(dataset))
#train_set, val_set = torch.utils.data.random_split(dataset, [train_size, len(dataset) - train_size])
train_loader = torch.utils.data.DataLoader(dataset, batch_size=4, shuffle=True)
#val_loader = torch.utils.data.DataLoader(val_set, batch_size=4, shuffle=False)
# Training loop
num_epochs = 5
for epoch in range(num_epochs):
    ct = datetime.datetime.now()
    print(f"{epoch=}, {ct}")
    running_loss = 0.0
    for i, data in enumerate(train_loader, 0):
        inputs, labels = data
        inputs = inputs.to(device)  # Move inputs to the appropriate device
        labels = labels.to(device)
        optimizer.zero_grad()  # Zero the parameter gradients to avoid accumulation
        outputs = cnn(inputs)  # Forward pass
        loss = criterion(outputs, labels)  # Compute the loss
        loss.backward()  # Backpropagation
        optimizer.step()  # Update the model parameters
print('Finished Training')

In [None]:
# Extract unique labels
unique_labels = dummy_df['labels'].unique()

# Create a mapping from string labels to numerical labels
label_map = {label: i for i, label in enumerate(unique_labels)}

# Map string labels to numerical labels in the DataFrame
dummy_df['numerical_labels'] = dummy_df['labels'].map(label_map)

# Save the updated DataFrame to a new CSV file
#df.to_csv('updated_data.csv', index=False)

# Display the first few rows of the updated DataFrame
print(dummy_df)

In [None]:
# Extract unique labels
unique_labels = test_df['labels'].unique()

# Create a mapping from string labels to numerical labels
label_map = {label: i for i, label in enumerate(unique_labels)}

# Map string labels to numerical labels in the DataFrame
test_df['numerical_labels'] = test_df['labels'].map(label_map)

# Save the updated DataFrame to a new CSV file
#df.to_csv('updated_data.csv', index=False)

# Display the first few rows of the updated DataFrame
print(test_df)

In [None]:
test_data = CustomDataset(test_df, transform=transform)

In [None]:
correct = 0
total = 0
# Set the model to evaluation mode
cnn.eval()
val_loader = torch.utils.data.DataLoader(test_data, batch_size=4, shuffle=False)
with torch.no_grad():
    for data in val_loader:
        images, labels = data
        images = images.to(device)  # Move inputs to the appropriate device
        labels = labels.to(device)
        outputs = cnn(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
print(f'Accuracy on the validation set: {100 * correct / total:.2f}%')