In [1]:
import os
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torchvision import transforms
from torch.utils.data import DataLoader, Dataset
import matplotlib.pyplot as plt
from PIL import Image
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import accuracy_score
from tqdm import tqdm  # Import tqdm library

# Check if CUDA is available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

# Read the CSV file
data = pd.read_csv("train.csv")

# Initialize empty lists to store images and label
images = []
labels = []

# Iterate over each row in the DataFrame
for index, row in data.iterrows():
    image_name = row['image_name'] + '.jpg'  # Add '.jpg' extension
    diagnosis = row['diagnosis']
    
    # Skip files without a named label or with label 'unknown'
    if pd.isna(diagnosis) or diagnosis == 'unknown':
        continue
    
    # Construct the file path
    file_path = os.path.join("C:/Developer/Cancer/train", diagnosis, image_name)
    
    # Check if the file exists
    if os.path.exists(file_path):
        # Load the image and move it to GPU if available
        image = Image.open(file_path).convert('RGB')
        image = transforms.ToTensor()(image).to(device)
        
        # Append the image and label to the lists
        images.append(image)
        labels.append(diagnosis)
    else:
        print(f"File not found: {file_path}")  




cpu


In [None]:
# Use LabelEncoder to encode labels into numerical values
label_encoder = LabelEncoder()
labels = label_encoder.fit_transform(labels)

class ImageDataset(Dataset):
    def __init__(self, images, labels):
        self.images = images
        self.labels = labels

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        return self.images[idx], self.labels[idx]

dataset = ImageDataset(images, labels)



In [None]:
# Define the CNN model
class CNNModel(nn.Module):
    def __init__(self, num_classes):
        super(CNNModel, self).__init__()
        self.conv1 = nn.Conv2d(3, 32, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, padding=1)
        self.conv3 = nn.Conv2d(64, 128, kernel_size=3, padding=1)
        self.pool = nn.MaxPool2d(2, 2)
        self.fc1 = nn.Linear(128 * 28 * 28, 512)
        self.fc2 = nn.Linear(512, num_classes)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = self.pool(F.relu(self.conv3(x)))
        x = x.view(-1, 128 * 28 * 28)
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return x

# Initialize the model
num_classes = len(set(labels))  # Number of unique classes
model = CNNModel(num_classes).to(device)

# Define loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Train the model
dataloader = DataLoader(dataset, batch_size=16, shuffle=True)
num_epochs = 5
for epoch in range(num_epochs):
    running_loss = 0.0
    for i, data in tqdm(enumerate(dataloader), total=len(dataloader), desc=f"Epoch {epoch+1}/{num_epochs}", unit="batch"):
        inputs, labels = data[0].to(device), data[1].to(device)

        optimizer.zero_grad()

        outputs = model(inputs)
        
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        if i % 10 == 9:
            print('[%d, %5d] loss: %.3f' %
                  (epoch + 1, i + 1, running_loss / 10))
            running_loss = 0.0

print('Finished Training')