In [1]:
import os
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torchvision import transforms
from torch.utils.data import DataLoader, Dataset
import matplotlib.pyplot as plt
from PIL import Image
from sklearn.preprocessing import LabelEncoder

# Read the CSV file
data = pd.read_csv("train.csv")

# Initialize empty lists to store images and labels
images = []
labels = []

# Iterate over each row in the DataFrame
for index, row in data.iterrows():
    image_name = row['image_name'] + '.jpg'  # Add '.jpg' extension
    diagnosis = row['diagnosis']
    
    # Skip files without a named label or with label 'unknown'
    if pd.isna(diagnosis) or diagnosis == 'unknown':
        continue
    
    # Construct the file path
    file_path = os.path.join("C:/Developer/Cancer/train", diagnosis, image_name)
    
    # Check if the file exists
    if os.path.exists(file_path):
        # Load the image
        image = Image.open(file_path)
        
        # Append the image and label to the lists
        images.append(image)
        labels.append(diagnosis)
    else:
        print(f"File not found: {file_path}")

# Convert the images to RGB if not rgb already
images = [image.convert('RGB') if image.mode != 'RGB' else image for image in images]

# Use LabelEncoder to encode labels into numerical values
label_encoder = LabelEncoder()
labels = label_encoder.fit_transform(labels)

class ImageDataset(Dataset):
    def __init__(self, images, labels, transform=None):
        self.images = images
        self.labels = labels
        self.transform = transform

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        image = self.images[idx]
        label = self.labels[idx]

        if self.transform:
            image = self.transform(image)

        return image, label

transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
])

dataset = ImageDataset(images, labels, transform=transform)
dataloader = DataLoader(dataset, batch_size=5, shuffle=True)

# Define the CNN model
class CNNModel(nn.Module):
    def __init__(self, num_classes):
        super(CNNModel, self).__init__()
        self.conv1 = nn.Conv2d(3, 32, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, padding=1)
        self.conv3 = nn.Conv2d(64, 128, kernel_size=3, padding=1)
        self.pool = nn.MaxPool2d(2, 2)
        self.fc1 = nn.Linear(128 * 28 * 28, 512)
        self.fc2 = nn.Linear(512, num_classes)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = self.pool(F.relu(self.conv3(x)))
        x = x.view(-1, 128 * 28 * 28)
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return x

# Initialize the model
num_classes = len(set(labels))  # Number of unique classes
model = CNNModel(num_classes)

# Define loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Training the model
num_epochs = 5
for epoch in range(num_epochs):
    running_loss = 0.0
    for i, data in enumerate(dataloader, 0):
        inputs, labels = data[0], data[1]

        optimizer.zero_grad()

        outputs = model(inputs)
        
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        if i % 10 == 9:
            print('[%d, %5d] loss: %.3f' %
                  (epoch + 1, i + 1, running_loss / 10))
            running_loss = 0.0

print('Finished Training')

# Save the model if needed
# torch.save(model.state_dict(), 'model.pth')


[1,    10] loss: 1.168
[1,    20] loss: 0.603
[1,    30] loss: 0.585
[1,    40] loss: 0.459
[1,    50] loss: 0.596
[1,    60] loss: 0.745
[1,    70] loss: 0.664
[1,    80] loss: 0.724
[1,    90] loss: 0.459
[1,   100] loss: 0.571
[1,   110] loss: 0.672
[1,   120] loss: 0.643
[1,   130] loss: 0.478
[1,   140] loss: 0.520
[1,   150] loss: 0.663
[1,   160] loss: 0.517
[1,   170] loss: 0.281
[1,   180] loss: 0.436
[1,   190] loss: 0.779
[1,   200] loss: 0.617
[1,   210] loss: 0.414
[1,   220] loss: 0.459
[1,   230] loss: 0.409
[1,   240] loss: 0.488
[1,   250] loss: 0.471
[1,   260] loss: 0.503
[1,   270] loss: 0.447
[1,   280] loss: 0.250
[1,   290] loss: 0.533
[1,   300] loss: 0.345
[1,   310] loss: 0.215
[1,   320] loss: 0.757
[1,   330] loss: 0.523
[1,   340] loss: 0.409
[1,   350] loss: 0.834
[1,   360] loss: 0.667
[1,   370] loss: 0.409
[1,   380] loss: 0.599
[1,   390] loss: 0.295
[1,   400] loss: 0.352
[1,   410] loss: 0.606
[1,   420] loss: 0.413
[1,   430] loss: 0.507
[1,   440] 