In [1]:
import pandas as pd

In [2]:
data = pd.read_csv("/projectnb/cs640grp/materials/UBC-OCEAN_CS640/test.csv")

In [3]:
data.head()

Unnamed: 0,image_id,label
0,0,
1,1,
2,2,
3,3,
4,4,


In [4]:
# data = pd.("/projectnb/cs640grp/materials/UBC-OCEAN_CS640/train_images_compressed_80/10077.jpg")

In [5]:
from PIL import Image
import matplotlib.pyplot as plt

In [6]:
Image.MAX_IMAGE_PIXELS = None 

In [7]:
img = Image.open("/projectnb/cs640grp/materials/UBC-OCEAN_CS640/train_images_compressed_80/10077.jpg")

In [8]:
# img

In [9]:
# plt.imshow(img)
# plt.axis('off') # to turn off axis
# plt.show()

In [10]:
import numpy as np

In [None]:
import torch
import torch.nn as nn
import torchvision.transforms as transforms
from torch.utils.data import Dataset, DataLoader
import pandas as pd
from PIL import Image
import os
from tqdm import tqdm
import timm
from sklearn.metrics import f1_score
import glob

# Check for GPU availability
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

class CancerSlideDataset(Dataset):
    def __init__(self, annotations, root_dir, transform=None):
        self.annotations = annotations
        self.root_dir = root_dir
        self.transform = transform
        self.label_mapping = {'CC': 0, 'MC': 1, 'HGSC': 2, 'LGSC': 3, 'EC': 4}

    def __len__(self):
        return len(self.annotations)

    def __getitem__(self, index):
        img_name = os.path.join(self.root_dir, str(self.annotations.iloc[index, 0])) + ".jpg"
        image = Image.open(img_name)
        label_str = self.annotations.iloc[index, 1]
        if self.transform:
            image = self.transform(image)
        label = self.label_mapping[label_str]
        label = torch.tensor(label, dtype=torch.long)
        return image, label

class CancerClassifier(nn.Module):
    def __init__(self, num_classes=5):
        super(CancerClassifier, self).__init__()
        self.model = timm.create_model('efficientnet_b4', pretrained=True)
        # Freeze all layers in the network
        for param in self.model.parameters():
            param.requires_grad = False
        # Replace the classifier layer with an unfrozen new one
        in_features = self.model.classifier.in_features
        self.model.classifier = nn.Linear(in_features, num_classes)

    def forward(self, x):
        return self.model(x)

def create_datasets(csv_file, root_dir, transform, train_ratio=0.8, limit=None):
    annotations = pd.read_csv(csv_file)
    if limit is not None:
        annotations = annotations.sample(frac=1).reset_index(drop=True)[:limit]
    train_size = int(len(annotations) * train_ratio)
    train_annotations = annotations[:train_size]
    val_annotations = annotations[train_size:]
    train_dataset = CancerSlideDataset(train_annotations, root_dir, transform)
    val_dataset = CancerSlideDataset(val_annotations, root_dir, transform)
    return train_dataset, val_dataset

transform = transforms.Compose([
    transforms.Resize(380),
    transforms.CenterCrop(380),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

train_dataset, val_dataset = create_datasets(
    csv_file='/projectnb/cs640grp/materials/UBC-OCEAN_CS640/train.csv', 
    root_dir='/projectnb/cs640grp/materials/UBC-OCEAN_CS640/train_images_compressed_80', 
    transform=transform,
    train_ratio=0.8  # 80% of the data will be used for training
)

train_loader = DataLoader(train_dataset, batch_size=4, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=4, shuffle=False)

model = CancerClassifier()
model.to(device)

# Define a directory where to save the models
model_save_dir = '/projectnb/cs640grp/students/gaurav57/model_saves/'
os.makedirs(model_save_dir, exist_ok=True)

# Check for the latest saved model in the model_save_dir and load it
model_files = glob.glob(model_save_dir + '/*.pth')
latest_model_file = max(model_files, key=os.path.getctime) if model_files else None

if latest_model_file:
    print(f"Loading weights from {latest_model_file}")
    model.load_state_dict(torch.load(latest_model_file, map_location=device))

# Define loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

# Training loop
for epoch in tqdm(range(14)):
    model.train()
    running_loss = 0.0
    correct_predictions = 0
    total_predictions = 0

    for i, data in enumerate(train_loader, 0):
        inputs, labels = data[0].to(device), data[1].to(device)
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()

        _, predicted = torch.max(outputs.data, 1)
        total_predictions += labels.size(0)
        correct_predictions += (predicted == labels).sum().item()

    training_accuracy = 100 * correct_predictions / total_predictions

    # Validation phase
    model.eval()
    val_running_loss = 0.0
    val_true = []
    val_pred = []

    with torch.no_grad():
        for i, data in enumerate(val_loader, 0):
            inputs, labels = data[0].to(device), data[1].to(device)
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            val_running_loss += loss.item()

            _, predicted = torch.max(outputs.data, 1)
            val_true.extend(labels.cpu().numpy())
            val_pred.extend(predicted.cpu().numpy())

    # Calculate total predictions for accuracy
    val_total_predictions = len(val_true)

    val_f1_score = f1_score(val_true, val_pred, average='weighted')
    val_accuracy = 100 * (sum(np.array(val_pred) == np.array(val_true)) / val_total_predictions) if val_total_predictions else 0
    print(f'Epoch {epoch + 1}, Training Loss: {running_loss / len(train_loader)}, Training Accuracy: {training_accuracy}%, Validation Loss: {val_running_loss / len(val_loader)}, Validation Accuracy: {val_accuracy}%, F1 Score: {val_f1_score}')

    # Save the model's state_dict after every epoch
    torch.save(model.state_dict(), os.path.join(model_save_dir, f'cancer_classifier_epoch_{epoch}.pth'))

print('Finished Training')


Using device: cuda:0
Loading weights from /projectnb/cs640grp/students/gaurav57/model_saves/cancer_classifier_epoch_1.pth


  0%|          | 0/14 [00:00<?, ?it/s]

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import confusion_matrix

# ... [rest of your code]

# After validation phase in your training loop

# Calculate the confusion matrix
cm = confusion_matrix(val_true, val_pred)
cm_normalized = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]  # Normalize the confusion matrix

# Plot the confusion matrix
plt.figure(figsize=(10, 8))
sns.heatmap(cm_normalized, annot=True, fmt=".2f", cmap="Blues", 
            xticklabels=train_dataset.label_mapping.keys(), 
            yticklabels=train_dataset.label_mapping.keys())

plt.title('Normalized Confusion Matrix')
plt.xlabel('Predicted label')
plt.ylabel('True label')
plt.show()

In [11]:
import torch
import torch.nn as nn
import torchvision.transforms as transforms
from torch.utils.data import Dataset, DataLoader
import pandas as pd
from PIL import Image
import os
from tqdm import tqdm
import timm
from sklearn.metrics import f1_score
import glob
import numpy as np

# Check for GPU availability
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

class CancerSlideDataset(Dataset):
    def __init__(self, annotations, root_dir, transform=None):
        self.annotations = annotations
        self.root_dir = root_dir
        self.transform = transform
        self.label_mapping = {'CC': 0, 'MC': 1, 'HGSC': 2, 'LGSC': 3, 'EC': 4}

    def __len__(self):
        return len(self.annotations)

    def __getitem__(self, index):
        img_name = os.path.join(self.root_dir, str(self.annotations.iloc[index, 0])) + ".jpg"
        image = Image.open(img_name)
        label_str = self.annotations.iloc[index, 1]
        if self.transform:
            image = self.transform(image)
        label = self.label_mapping[label_str]
        label = torch.tensor(label, dtype=torch.long)
        return image, label

class CancerClassifier(nn.Module):
    def __init__(self, num_classes=5):
        super(CancerClassifier, self).__init__()
        self.model = timm.create_model('efficientnet_b4', pretrained=True)
        for param in self.model.parameters():
            param.requires_grad = False
        in_features = self.model.classifier.in_features
        self.model.classifier = nn.Linear(in_features, num_classes)

    def forward(self, x):
        return self.model(x)

def create_datasets(csv_file, root_dir, transform, train_ratio=0.8, limit=None):
    annotations = pd.read_csv(csv_file)
    if limit is not None:
        annotations = annotations.sample(frac=1).reset_index(drop=True)[:limit]
    train_size = int(len(annotations) * train_ratio)
    train_annotations = annotations[:train_size]
    val_annotations = annotations[train_size:]
    train_dataset = CancerSlideDataset(train_annotations, root_dir, transform)
    val_dataset = CancerSlideDataset(val_annotations, root_dir, transform)
    return train_dataset, val_dataset

transform = transforms.Compose([
    transforms.Resize(380),
    transforms.CenterCrop(380),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

train_dataset, val_dataset = create_datasets(
    csv_file='/projectnb/cs640grp/materials/UBC-OCEAN_CS640/train.csv', 
    root_dir='/projectnb/cs640grp/materials/UBC-OCEAN_CS640/train_images_compressed_80', 
    transform=transform
)

train_loader = DataLoader(train_dataset, batch_size=4, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=4, shuffle=False)

model = CancerClassifier()
model.to(device)

model_save_dir = '/projectnb/cs640grp/students/gaurav57/model_saves/'
os.makedirs(model_save_dir, exist_ok=True)

model_files = glob.glob(model_save_dir + '/*.pth')
latest_model_file = max(model_files, key=os.path.getctime) if model_files else None

if latest_model_file:
    print(f"Loading weights from {latest_model_file}")
    model.load_state_dict(torch.load(latest_model_file, map_location=device))

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

for epoch in tqdm(range(14)):
    model.train()
    running_loss = 0.0
    correct_predictions = 0
    total_predictions = 0

    for i, data in enumerate(train_loader, 0):
        inputs, labels = data[0].to(device), data[1].to(device)
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()

        _, predicted = torch.max(outputs.data, 1)
        total_predictions += labels.size(0)
        correct_predictions += (predicted == labels).sum().item()

    training_accuracy = 100 * correct_predictions / total_predictions

    model.eval()
    val_running_loss = 0.0
    val_true = []
    val_pred = []

    with torch.no_grad():
        for i, data in enumerate(val_loader, 0):
            inputs, labels = data[0].to(device), data[1].to(device)
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            val_running_loss += loss.item()

            _, predicted = torch.max(outputs.data, 1)
            val_true.extend(labels.cpu().numpy())
            val_pred.extend(predicted.cpu().numpy())

    val_total_predictions = len(val_true)

    val_f1_score = f1_score(val_true, val_pred, average='weighted')
    val_accuracy = 100 * sum(np.array(val_pred) == np.array(val_true)) / val_total_predictions if val_total_predictions else 0
    print(f'Epoch {epoch + 1}, Training Loss: {running_loss / len(train_loader)}, Training Accuracy: {training_accuracy}%, Validation Loss: {val_running_loss / len(val_loader)}, Validation Accuracy: {val_accuracy}%, F1 Score: {val_f1_score}')

    torch.save(model.state_dict(), os.path.join(model_save_dir, f'cancer_classifier_epoch_{epoch}.pth'))

print('Finished Training')


Using device: cuda:0
Loading weights from /projectnb/cs640grp/students/gaurav57/model_saves/cancer_classifier_epoch_1.pth


  0%|          | 0/14 [07:45<?, ?it/s]


KeyboardInterrupt: 

In [29]:
import torch
import torch.nn as nn
import torchvision.transforms as transforms
from torch.utils.data import Dataset, DataLoader
import pandas as pd
from PIL import Image
import os
from tqdm import tqdm
import timm
from sklearn.metrics import f1_score
import glob
import numpy as np

# Check for GPU availability
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

class CancerSlideDataset(Dataset):
    def __init__(self, annotations, root_dir, transform=None):
        self.annotations = annotations
        self.root_dir = root_dir
        self.transform = transform
        self.label_mapping = {'CC': 0, 'MC': 1, 'HGSC': 2, 'LGSC': 3, 'EC': 4}

    def __len__(self):
        return len(self.annotations)

    def __getitem__(self, index):
        img_name = os.path.join(self.root_dir, str(self.annotations.iloc[index, 0])) + ".jpg"
        image = Image.open(img_name)
        label_str = self.annotations.iloc[index, 1]
        if self.transform:
            image = self.transform(image)
        label = self.label_mapping[label_str]
        label = torch.tensor(label, dtype=torch.long)
        return image, label

class CancerClassifier(nn.Module):
    def __init__(self, num_classes=5):
        super(CancerClassifier, self).__init__()
        self.model = timm.create_model('efficientnet_b4', pretrained=True)
        for param in self.model.parameters():
            param.requires_grad = False
            
        in_features = self.model.classifier.in_features
        
        self.additional_layer = nn.Linear(in_features, 128)
        
        # Add a ReLU activation function
        self.relu = nn.ReLU()
        
        # Modify the existing classifier
        self.model.classifier = nn.Sequential(
            self.additional_layer,
            self.relu,
            nn.Linear(128, num_classes)
        )

    def forward(self, x):
        return self.model(x)

def create_datasets(csv_file, root_dir, transform, train_ratio=0.8, limit=None):
    annotations = pd.read_csv(csv_file)
    if limit is not None:
        annotations = annotations.sample(frac=1).reset_index(drop=True)[:limit]
    train_size = int(len(annotations) * train_ratio)
    train_annotations = annotations[:train_size]
    val_annotations = annotations[train_size:]
    train_dataset = CancerSlideDataset(train_annotations, root_dir, transform)
    val_dataset = CancerSlideDataset(val_annotations, root_dir, transform)
    return train_dataset, val_dataset

transform = transforms.Compose([
    transforms.Resize(380),
    transforms.CenterCrop(380),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

limit=420
train_dataset, val_dataset = create_datasets(
    csv_file='/projectnb/cs640grp/materials/UBC-OCEAN_CS640/train.csv',
    root_dir='/projectnb/cs640grp/materials/UBC-OCEAN_CS640/train_images_compressed_80',
    transform=transform,limit=limit
)

# Use multi-threaded DataLoader for improved data loading performance
train_loader = DataLoader(train_dataset, batch_size=12, shuffle=True, num_workers=4, pin_memory=True)
val_loader = DataLoader(val_dataset, batch_size=12, shuffle=False, num_workers=4, pin_memory=True)

# Use DataParallel to leverage multiple GPUs
model = CancerClassifier()
if torch.cuda.device_count() > 1:
    print(f"Using {torch.cuda.device_count()} GPUs for training.")
    model = nn.DataParallel(model)
model.to(device)

model_save_dir = '/projectnb/cs640grp/students/gaurav57/model_saves/'
os.makedirs(model_save_dir, exist_ok=True)

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

for epoch in tqdm(range(14)):
    model.train()
    running_loss = 0.0
    correct_predictions = 0
    total_predictions = 0

    for i, data in enumerate(train_loader, 0):
        inputs, labels = data[0].to(device), data[1].to(device)
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()

        _, predicted = torch.max(outputs.data, 1)
        total_predictions += labels.size(0)
        correct_predictions += (predicted == labels).sum().item()

    training_accuracy = 100 * correct_predictions / total_predictions

    model.eval()
    val_running_loss = 0.0
    val_true = []
    val_pred = []

    with torch.no_grad():
        for i, data in enumerate(val_loader, 0):
            inputs, labels = data[0].to(device), data[1].to(device)
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            val_running_loss += loss.item()

            _, predicted = torch.max(outputs.data, 1)
            val_true.extend(labels.cpu().numpy())
            val_pred.extend(predicted.cpu().numpy())

    val_total_predictions = len(val_true)

    val_f1_score = f1_score(val_true, val_pred, average='weighted')
    val_accuracy = 100 * sum(np.array(val_pred) == np.array(val_true)) / val_total_predictions if val_total_predictions else 0
    print(f'Epoch {epoch + 1}, Training Loss: {running_loss / len(train_loader)}, Training Accuracy: {training_accuracy}%, Validation Loss: {val_running_loss / len(val_loader)}, Validation Accuracy: {val_accuracy}%, F1 Score: {val_f1_score}')

    torch.save(model.state_dict(), os.path.join(model_save_dir, f'cancer_classifier_epoch_{epoch}.pth'))

print('Finished Training')


Using device: cuda:0
Using 4 GPUs for training.


  0%|          | 0/14 [00:00<?, ?it/s]

Epoch 1, Training Loss: 1.4976695775985718, Training Accuracy: 38.98809523809524%, Validation Loss: 1.4089620113372803, Validation Accuracy: 48.80952380952381%, F1 Score: 0.3332644533223125


  7%|▋         | 1/14 [04:01<52:25, 241.97s/it]

Epoch 2, Training Loss: 1.4166774451732635, Training Accuracy: 38.392857142857146%, Validation Loss: 1.319058861051287, Validation Accuracy: 48.80952380952381%, F1 Score: 0.3332644533223125


 14%|█▍        | 2/14 [08:15<49:43, 248.67s/it]

Epoch 3, Training Loss: 1.3220444364207131, Training Accuracy: 43.154761904761905%, Validation Loss: 1.2628597361700875, Validation Accuracy: 57.142857142857146%, F1 Score: 0.5048206037168331


 21%|██▏       | 3/14 [12:16<44:58, 245.34s/it]

Epoch 4, Training Loss: 1.287743717432022, Training Accuracy: 46.42857142857143%, Validation Loss: 1.2613974128450667, Validation Accuracy: 52.38095238095238%, F1 Score: 0.42844060701203557


 29%|██▊       | 4/14 [16:22<40:56, 245.63s/it]

Epoch 5, Training Loss: 1.267365193792752, Training Accuracy: 46.13095238095238%, Validation Loss: 1.216684980051858, Validation Accuracy: 52.38095238095238%, F1 Score: 0.48756105006105005


 36%|███▌      | 5/14 [20:29<36:55, 246.20s/it]

Epoch 6, Training Loss: 1.1773614627974374, Training Accuracy: 52.976190476190474%, Validation Loss: 1.1999859384128027, Validation Accuracy: 51.19047619047619%, F1 Score: 0.40716312483695544


 43%|████▎     | 6/14 [24:44<32:59, 247.46s/it]


KeyboardInterrupt: 

In [12]:
import torch
import torch.nn as nn
import torchvision.transforms as transforms
from torch.utils.data import Dataset, DataLoader
import pandas as pd
from PIL import Image
import os
from tqdm import tqdm
import timm
from sklearn.metrics import f1_score
import glob
import numpy as np

# Check for GPU availability
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

class CancerSlideDataset(Dataset):
    def __init__(self, annotations, root_dir, transform=None):
        self.annotations = annotations
        self.root_dir = root_dir
        self.transform = transform
        self.label_mapping = {'CC': 0, 'MC': 1, 'HGSC': 2, 'LGSC': 3, 'EC': 4}

    def __len__(self):
        return len(self.annotations)

    def __getitem__(self, index):
        img_name = os.path.join(self.root_dir, str(self.annotations.iloc[index, 0])) + ".jpg"
        image = Image.open(img_name)
        label_str = self.annotations.iloc[index, 1]
        if self.transform:
            image = self.transform(image)
        label = self.label_mapping[label_str]
        label = torch.tensor(label, dtype=torch.long)
        return image, label

class CancerClassifier(nn.Module):
    def __init__(self, num_classes=5):
        super(CancerClassifier, self).__init__()
        self.model = timm.create_model('efficientnet_b4', pretrained=True)
        for param in self.model.parameters():
            param.requires_grad = False
            
        in_features = self.model.classifier.in_features
        
        # Add an additional layer after EfficientNet
        self.additional_layer1 = nn.Linear(in_features, 256)
        
        # Add a ReLU activation function
        self.relu1 = nn.ReLU()
        
        # Add another layer
        self.additional_layer2 = nn.Linear(256, 128)
        
        # Add another ReLU activation function
        self.relu2 = nn.ReLU()
        
        # Modify the existing classifier
        self.model.classifier = nn.Sequential(
            self.additional_layer1,
            self.relu1,
            self.additional_layer2,
            self.relu2,
            nn.Linear(128, num_classes)
        )

    def forward(self, x):
        return self.model(x)

def create_datasets(csv_file, root_dir, transform, train_ratio=0.8, limit=None):
    annotations = pd.read_csv(csv_file)
    if limit is not None:
        annotations = annotations.sample(frac=1).reset_index(drop=True)[:limit]
    train_size = int(len(annotations) * train_ratio)
    train_annotations = annotations[:train_size]
    val_annotations = annotations[train_size:]
    train_dataset = CancerSlideDataset(train_annotations, root_dir, transform)
    val_dataset = CancerSlideDataset(val_annotations, root_dir, transform)
    return train_dataset, val_dataset

transform = transforms.Compose([
    transforms.Resize(760),
    transforms.CenterCrop(760),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

limit=420
train_dataset, val_dataset = create_datasets(
    csv_file='/projectnb/cs640grp/materials/UBC-OCEAN_CS640/train.csv',
    root_dir='/projectnb/cs640grp/materials/UBC-OCEAN_CS640/train_images_compressed_80',
    transform=transform,limit=limit
)

# Use multi-threaded DataLoader for improved data loading performance
train_loader = DataLoader(train_dataset, batch_size=12, shuffle=True, num_workers=4, pin_memory=True)
val_loader = DataLoader(val_dataset, batch_size=12, shuffle=False, num_workers=4, pin_memory=True)

# Use DataParallel to leverage multiple GPUs
model = CancerClassifier()
if torch.cuda.device_count() > 1:
    print(f"Using {torch.cuda.device_count()} GPUs for training.")
    model = nn.DataParallel(model)
model.to(device)

model_save_dir = '/projectnb/cs640grp/students/gaurav57/model_saves/'
os.makedirs(model_save_dir, exist_ok=True)

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

for epoch in tqdm(range(14)):
    model.train()
    running_loss = 0.0
    correct_predictions = 0
    total_predictions = 0

    for i, data in enumerate(train_loader, 0):
        inputs, labels = data[0].to(device), data[1].to(device)
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()

        _, predicted = torch.max(outputs.data, 1)
        total_predictions += labels.size(0)
        correct_predictions += (predicted == labels).sum().item()

    training_accuracy = 100 * correct_predictions / total_predictions

    model.eval()
    val_running_loss = 0.0
    val_true = []
    val_pred = []

    with torch.no_grad():
        for i, data in enumerate(val_loader, 0):
            inputs, labels = data[0].to(device), data[1].to(device)
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            val_running_loss += loss.item()

            _, predicted = torch.max(outputs.data, 1)
            val_true.extend(labels.cpu().numpy())
            val_pred.extend(predicted.cpu().numpy())

    val_total_predictions = len(val_true)

    val_f1_score = f1_score(val_true, val_pred, average='weighted')
    val_accuracy = 100 * sum(np.array(val_pred) == np.array(val_true)) / val_total_predictions if val_total_predictions else 0
    print(f'Epoch {epoch + 1}, Training Loss: {running_loss / len(train_loader)}, Training Accuracy: {training_accuracy}%, Validation Loss: {val_running_loss / len(val_loader)}, Validation Accuracy: {val_accuracy}%, F1 Score: {val_f1_score}')

    torch.save(model.state_dict(), os.path.join(model_save_dir, f'cancer_classifier_epoch_{epoch}.pth'))

print('Finished Training')

Using device: cuda:0
Using 4 GPUs for training.


  7%|▋         | 1/14 [03:28<45:12, 208.66s/it]

Epoch 1, Training Loss: 1.4985384345054626, Training Accuracy: 40.17857142857143%, Validation Loss: 1.510535478591919, Validation Accuracy: 32.142857142857146%, F1 Score: 0.15637065637065636


 14%|█▍        | 2/14 [06:55<41:32, 207.71s/it]

Epoch 2, Training Loss: 1.4591884655611855, Training Accuracy: 42.55952380952381%, Validation Loss: 1.4434660843440466, Validation Accuracy: 32.142857142857146%, F1 Score: 0.15637065637065636


 21%|██▏       | 3/14 [10:19<37:45, 205.93s/it]

Epoch 3, Training Loss: 1.4529614022799902, Training Accuracy: 42.55952380952381%, Validation Loss: 1.4928434576307024, Validation Accuracy: 32.142857142857146%, F1 Score: 0.15637065637065636


 29%|██▊       | 4/14 [13:39<33:55, 203.58s/it]

Epoch 4, Training Loss: 1.4447604843548365, Training Accuracy: 42.55952380952381%, Validation Loss: 1.4586164440427507, Validation Accuracy: 32.142857142857146%, F1 Score: 0.15637065637065636


 36%|███▌      | 5/14 [17:06<30:44, 204.97s/it]

Epoch 5, Training Loss: 1.4400973277432578, Training Accuracy: 42.55952380952381%, Validation Loss: 1.4628713130950928, Validation Accuracy: 32.142857142857146%, F1 Score: 0.15637065637065636


 43%|████▎     | 6/14 [20:27<27:08, 203.54s/it]

Epoch 6, Training Loss: 1.4169709427016122, Training Accuracy: 42.55952380952381%, Validation Loss: 1.4352761847632272, Validation Accuracy: 32.142857142857146%, F1 Score: 0.15637065637065636


 43%|████▎     | 6/14 [20:33<27:25, 205.63s/it]


KeyboardInterrupt: 

In [13]:
import torch
import torch.nn as nn
import torch.distributed as dist
import torch.multiprocessing as mp
from torch.nn.parallel import DistributedDataParallel as DDP
from torch.utils.data import DataLoader, DistributedSampler
import torch.optim as optim
from sklearn.metrics import f1_score
import pandas as pd
from PIL import Image
import os
import glob
from tqdm import tqdm
import timm
import numpy as np
from torchvision import transforms

# Check for GPU availability
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

class CancerSlideDataset(torch.utils.data.Dataset):
    def __init__(self, annotations, root_dir, transform=None):
        self.annotations = annotations
        self.root_dir = root_dir
        self.transform = transform
        self.label_mapping = {'CC': 0, 'MC': 1, 'HGSC': 2, 'LGSC': 3, 'EC': 4}

    def __len__(self):
        return len(self.annotations)

    def __getitem__(self, index):
        img_name = os.path.join(self.root_dir, str(self.annotations.iloc[index, 0])) + ".jpg"
        image = Image.open(img_name)
        label_str = self.annotations.iloc[index, 1]
        if self.transform:
            image = self.transform(image)
        label = self.label_mapping[label_str]
        label = torch.tensor(label, dtype=torch.long)
        return image, label

class CancerClassifier(nn.Module):
    def __init__(self, num_classes=5):
        super(CancerClassifier, self).__init__()
        self.model = timm.create_model('efficientnet_b4', pretrained=True)
        # Freeze all layers in the network
        for param in self.model.parameters():
            param.requires_grad = False
        # Replace the classifier layer with an unfrozen new one
        in_features = self.model.classifier.in_features
        self.model.classifier = nn.Linear(in_features, num_classes)

    def forward(self, x):
        return self.model(x)

def create_datasets(csv_file, root_dir, transform, train_ratio=0.8, limit=None):
    annotations = pd.read_csv(csv_file)
    if limit is not None:
        annotations = annotations.sample(frac=1).reset_index(drop=True)[:limit]
    train_size = int(len(annotations) * train_ratio)
    train_annotations = annotations[:train_size]
    val_annotations = annotations[train_size:]
    train_dataset = CancerSlideDataset(train_annotations, root_dir, transform)
    val_dataset = CancerSlideDataset(val_annotations, root_dir, transform)
    return train_dataset, val_dataset

def train(rank, world_size):
    try:
        # Initialize distributed training
        dist.init_process_group("nccl", rank=rank, world_size=world_size)

        # Create datasets
        transform = transforms.Compose([
            transforms.Resize(380),
            transforms.CenterCrop(380),
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
        ])

        train_dataset, val_dataset = create_datasets(
            csv_file='/projectnb/cs640grp/materials/UBC-OCEAN_CS640/train.csv',
            root_dir='/projectnb/cs640grp/materials/UBC-OCEAN_CS640/train_images_compressed_80',
            transform=transform,
            train_ratio=0.8  # 80% of the data will be used for training
        )

        # Use DistributedSampler for loading the data in a distributed manner
        train_sampler = DistributedSampler(train_dataset, num_replicas=world_size, rank=rank)
        train_loader = DataLoader(train_dataset, batch_size=4, shuffle=False, sampler=train_sampler)
        val_loader = DataLoader(val_dataset, batch_size=4, shuffle=False)

        # Initialize the model
        model = CancerClassifier()
        model.to(rank)
        model = DDP(model, device_ids=[rank])

        # Define loss function and optimizer
        criterion = nn.CrossEntropyLoss()
        optimizer = optim.Adam(model.parameters(), lr=0.001)

        # Training loop
        for epoch in tqdm(range(14)):
            model.train()
            running_loss = 0.0
            correct_predictions = 0
            total_predictions = 0

            for i, data in enumerate(train_loader, 0):
                inputs, labels = data[0].to(rank), data[1].to(rank)
                optimizer.zero_grad()
                outputs = model(inputs)
                loss = criterion(outputs, labels)
                loss.backward()
                optimizer.step()
                running_loss += loss.item()

                _, predicted = torch.max(outputs.data, 1)
                total_predictions += labels.size(0)
                correct_predictions += (predicted == labels).sum().item()

            training_accuracy = 100 * correct_predictions / total_predictions

            # Validation phase
            model.eval()
            val_running_loss = 0.0
            val_true = []
            val_pred = []

            with torch.no_grad():
                for i, data in enumerate(val_loader, 0):
                    inputs, labels = data[0].to(rank), data[1].to(rank)
                    outputs = model(inputs)
                    loss = criterion(outputs, labels)
                    val_running_loss += loss.item()

                    _, predicted = torch.max(outputs.data, 1)
                    val_true.extend(labels.cpu().numpy())
                    val_pred.extend(predicted.cpu().numpy())

            # Calculate total predictions for accuracy
            val_total_predictions = len(val_true)

            val_f1_score = f1_score(val_true, val_pred, average='weighted')
            val_accuracy = 100 * (sum(np.array(val_pred) == np.array(val_true)) / val_total_predictions) if val_total_predictions else 0
            print(f'Epoch {epoch + 1}, Training Loss: {running_loss / len(train_loader)}, Training Accuracy: {training_accuracy}%, Validation Loss: {val_running_loss / len(val_loader)}, Validation Accuracy: {val_accuracy}%, F1 Score: {val_f1_score}')

        # Cleanup after training
        dist.destroy_process_group()
        
    except Exception as e:
        print(f"Error in process {rank}: {e}")
        raise e

def main():
    world_size = 4  # Number of GPUs
    mp.spawn(train, args=(world_size,), nprocs=world_size, join=True)

if __name__ == "__main__":
    try:
        main()
    except Exception as e:
        print(f"Error in main process: {e}")
        raise e


Using device: cuda:0


Traceback (most recent call last):
  File "<string>", line 1, in <module>
  File "/share/pkg.8/python3/3.10.12/install/lib/python3.10/multiprocessing/spawn.py", line 116, in spawn_main
    exitcode = _main(fd, parent_sentinel)
  File "/share/pkg.8/python3/3.10.12/install/lib/python3.10/multiprocessing/spawn.py", line 126, in _main
    self = reduction.pickle.load(from_parent)
AttributeError: Can't get attribute 'train' on <module '__main__' (built-in)>
Traceback (most recent call last):
  File "<string>", line 1, in <module>
  File "/share/pkg.8/python3/3.10.12/install/lib/python3.10/multiprocessing/spawn.py", line 116, in spawn_main
    exitcode = _main(fd, parent_sentinel)
  File "/share/pkg.8/python3/3.10.12/install/lib/python3.10/multiprocessing/spawn.py", line 126, in _main
    self = reduction.pickle.load(from_parent)
AttributeError: Can't get attribute 'train' on <module '__main__' (built-in)>
Traceback (most recent call last):
  File "<string>", line 1, in <module>
  File "/sha

Error in main process: process 2 terminated with exit code 1


ProcessExitedException: process 2 terminated with exit code 1

In [20]:
import torch
import torch.nn as nn
import torchvision.transforms as transforms
from torch.utils.data import Dataset, DataLoader
import pandas as pd
from PIL import Image
import os
from tqdm import tqdm
import timm
from sklearn.metrics import f1_score
import glob
import torch.multiprocessing as mp
import numpy as np

# Check for GPU availability
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# Set the start method to 'spawn' for CUDA compatibility
try:
    mp.get_start_method()
except RuntimeError:
    mp.set_start_method('spawn')

class CancerSlideDataset(Dataset):
    def __init__(self, annotations, root_dir, transform=None):
        self.annotations = annotations
        self.root_dir = root_dir
        self.transform = transform
        self.label_mapping = {'CC': 0, 'MC': 1, 'HGSC': 2, 'LGSC': 3, 'EC': 4}

    def __len__(self):
        return len(self.annotations)

    def __getitem__(self, index):
        img_name = os.path.join(self.root_dir, str(self.annotations.iloc[index, 0])) + ".jpg"
        image = Image.open(img_name)
        label_str = self.annotations.iloc[index, 1]
        if self.transform:
            image = self.transform(image)
        label = self.label_mapping[label_str]
        label = torch.tensor(label, dtype=torch.long)
        return image, label

class CancerClassifier(nn.Module):
    def __init__(self, num_classes=5):
        super(CancerClassifier, self).__init__()
        self.model = timm.create_model('efficientnet_b4', pretrained=True)
        # Freeze all layers in the network
        for param in self.model.parameters():
            param.requires_grad = False
        # Replace the classifier layer with an unfrozen new one
        in_features = self.model.classifier.in_features
        self.model.classifier = nn.Linear(in_features, num_classes)

    def forward(self, x):
        return self.model(x)

def create_datasets(csv_file, root_dir, transform, train_ratio=0.8, limit=None):
    annotations = pd.read_csv(csv_file)
    if limit is not None:
        annotations = annotations.sample(frac=1).reset_index(drop=True)[:limit]
    train_size = int(len(annotations) * train_ratio)
    train_annotations = annotations[:train_size]
    val_annotations = annotations[train_size:]
    train_dataset = CancerSlideDataset(train_annotations, root_dir, transform)
    val_dataset = CancerSlideDataset(val_annotations, root_dir, transform)
    return train_dataset, val_dataset

transform = transforms.Compose([
    transforms.Resize(380),
    transforms.CenterCrop(380),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

train_dataset, val_dataset = create_datasets(
    csv_file='/projectnb/cs640grp/materials/UBC-OCEAN_CS640/train.csv', 
    root_dir='/projectnb/cs640grp/materials/UBC-OCEAN_CS640/train_images_compressed_80', 
    transform=transform,
    train_ratio=0.8  # 80% of the data will be used for training
)

train_loader = DataLoader(train_dataset, batch_size=4, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=4, shuffle=False)

model = CancerClassifier()
model.to(device)

# Define a directory where to save the models
model_save_dir = '/projectnb/cs640grp/students/gaurav57/model_saves/'
os.makedirs(model_save_dir, exist_ok=True)

# Check for the latest saved model in the model_save_dir and load it
model_files = glob.glob(model_save_dir + '/*.pth')
latest_model_file = max(model_files, key=os.path.getctime) if model_files else None

if latest_model_file:
    print(f"Loading weights from {latest_model_file}")
    model.load_state_dict(torch.load(latest_model_file, map_location=device))

# Define loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

def train(rank, world_size):
    try:
        mp.get_start_method()
    except RuntimeError:
        mp.set_start_method('spawn')
        
    model = CancerClassifier()
    model.to(device)

    optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

    for epoch in tqdm(range(14)):
        model.train()
        running_loss = 0.0
        correct_predictions = 0
        total_predictions = 0

        for i, data in enumerate(train_loader, 0):
            inputs, labels = data[0].to(device), data[1].to(device)
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()

            _, predicted = torch.max(outputs.data, 1)
            total_predictions += labels.size(0)
            correct_predictions += (predicted == labels).sum().item()

        training_accuracy = 100 * correct_predictions / total_predictions

        model.eval()
        val_running_loss = 0.0
        val_true = []
        val_pred = []

        with torch.no_grad():
            for i, data in enumerate(val_loader, 0):
                inputs, labels = data[0].to(device), data[1].to(device)
                outputs = model(inputs)
                loss = criterion(outputs, labels)
                val_running_loss += loss.item()

                _, predicted = torch.max(outputs.data, 1)
                val_true.extend(labels.cpu().numpy())
                val_pred.extend(predicted.cpu().numpy())

        val_total_predictions = len(val_true)

        val_f1_score = f1_score(val_true, val_pred, average='weighted')
        val_accuracy = 100 * (sum(np.array(val_pred) == np.array(val_true)) / val_total_predictions) if val_total_predictions else 0
        print(f'Epoch {epoch + 1}, Training Loss: {running_loss / len(train_loader)}, Training Accuracy: {training_accuracy}%, Validation Loss: {val_running_loss / len(val_loader)}, Validation Accuracy: {val_accuracy}%, F1 Score: {val_f1_score}')

        torch.save(model.state_dict(), os.path.join(model_save_dir, f'cancer_classifier_epoch_{epoch}_rank_{rank}.pth'))

    print(f'Finished Training on GPU {rank}')

# def main():
#     try:
#         mp.get_start_method()
#     except RuntimeError:
#         mp.set_start_method('spawn')  # Set the start method to 'spawn' for CUDA compatibility

#     world_size = 4  # Number of GPUs
#     processes = []

#     for rank in range(world_size):
#         p = mp.Process(target=train, args=(rank, world_size))
#         p.start()
#         processes.append(p)

#     for p in processes:
#         p.join()

# if __name__ == "__main__":
#     main()
    
    
def main():
    world_size = 4  # Number of GPUs
    processes = []

    for rank in range(world_size):
        p = mp.Process(target=train, args=(rank, world_size))
        p.start()
        processes.append(p)

    for p in processes:
        p.join()

if __name__ == "__main__":
    main()


Using device: cuda:0
Loading weights from /projectnb/cs640grp/students/gaurav57/model_saves/cancer_classifier_epoch_2.pth


Process Process-27:
Process Process-28:
Traceback (most recent call last):
Traceback (most recent call last):
  File "/share/pkg.8/python3/3.10.12/install/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
    self.run()
  File "/share/pkg.8/python3/3.10.12/install/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
    self.run()
  File "/share/pkg.8/python3/3.10.12/install/lib/python3.10/multiprocessing/process.py", line 108, in run
    self._target(*self._args, **self._kwargs)
  File "/share/pkg.8/python3/3.10.12/install/lib/python3.10/multiprocessing/process.py", line 108, in run
    self._target(*self._args, **self._kwargs)
  File "/scratch/2834813.1.csgpu/ipykernel_82799/399626504.py", line 113, in train
    model.to(device)
  File "/scratch/2834813.1.csgpu/ipykernel_82799/399626504.py", line 113, in train
    model.to(device)
  File "/usr4/cs640/gaurav57/.local/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1160, in to
    return 

In [15]:
import torch
import torch.nn as nn
import torchvision.transforms as transforms
from torch.utils.data import Dataset, DataLoader
import pandas as pd
from PIL import Image
import os
from tqdm import tqdm
import timm
from sklearn.metrics import f1_score
import glob
import torch.multiprocessing as mp
import numpy as np

# Check for GPU availability
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

class CancerSlideDataset(Dataset):
    def __init__(self, csv_file, root_dir, transform=None, limit=None):
        # Load the CSV file with slide images and labels
        self.slide_labels = pd.read_csv(csv_file)
        self.root_dir = root_dir
        self.transform = transform
        self.limit = limit

    def __len__(self):
        # Return the number of slides in the dataset
        return len(self.slide_labels) if self.limit is None else min(len(self.slide_labels), self.limit)

    def __getitem__(self, idx):
        # Get the image and label for a specific index
        img_name = os.path.join(self.root_dir, self.slide_labels.iloc[idx, 0])
        image = Image.open(img_name)
        label = int(self.slide_labels.iloc[idx, 1])

        # Apply transformations if specified
        if self.transform:
            image = self.transform(image)

        return image, label

class CancerClassifier(nn.Module):
    def __init__(self):
        super(CancerClassifier, self).__init__()
        # Use a pre-trained ResNet model from timm library
        self.base_model = timm.create_model("resnet34", pretrained=True)
        in_features = self.base_model.fc.in_features
        # Modify the final fully connected layer for binary classification
        self.base_model.fc = nn.Linear(in_features, 2)

    def forward(self, x):
        # Forward pass through the model
        return self.base_model(x)

def create_datasets(csv_file, root_dir, transform, train_ratio=0.8, limit=None):
    # Load the dataset
    dataset = CancerSlideDataset(csv_file=csv_file, root_dir=root_dir, transform=transform, limit=limit)

    # Calculate the split index for training and validation
    split_idx = int(train_ratio * len(dataset))

    # Split the dataset into training and validation sets
    train_dataset = dataset[:split_idx]
    val_dataset = dataset[split_idx:]

    return train_dataset, val_dataset

transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

train_dataset, val_dataset = create_datasets(
    csv_file='/projectnb/cs640grp/students/gaurav57/train_labels.csv',
    root_dir='/projectnb/cs640grp/students/gaurav57/slides',
    transform=transform
)

train_loader = DataLoader(train_dataset, batch_size=4, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=4, shuffle=False)

model = CancerClassifier()
model.to(device)

# Define a directory where to save the models
model_save_dir = '/projectnb/cs640grp/students/gaurav57/model_saves/'
os.makedirs(model_save_dir, exist_ok=True)

# Check for the latest saved model in the model_save_dir and load it
model_files = glob.glob(model_save_dir + '/*.pth')
latest_model_file = max(model_files, key=os.path.getctime) if model_files else None

if latest_model_file:
    print(f"Loading weights from {latest_model_file}")
    model.load_state_dict(torch.load(latest_model_file, map_location=device))

# Define loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

def train(rank, world_size):
    # Set the device for the current process
    device = f'cuda:{rank}' if torch.cuda.is_available() else 'cpu'
    print(f"Training on device: {device}")

    # Initialize model, criterion, and optimizer on each process
    model = CancerClassifier()
    model.to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

    # Load the latest saved model if available
    model_files = glob.glob(model_save_dir + '/*.pth')
    latest_model_file = max(model_files, key=os.path.getctime) if model_files else None
    if latest_model_file:
        print(f"Loading weights from {latest_model_file}")
        model.load_state_dict(torch.load(latest_model_file, map_location=device))

    # Define number of epochs and train the model
    num_epochs = 5
    for epoch in range(num_epochs):
        model.train()
        train_loss = 0.0

        # Iterate over the training dataset
        for inputs, labels in train_loader:
            inputs, labels = inputs.to(device), labels.to(device)

            # Zero the parameter gradients
            optimizer.zero_grad()

            # Forward pass
            outputs = model(inputs)
            loss = criterion(outputs, labels)

            # Backward pass and optimization
            loss.backward()
            optimizer.step()

            train_loss += loss.item()

        # Calculate average training loss for the epoch
        avg_train_loss = train_loss / len(train_loader)

        # Print training statistics
        print(f"Process {rank}, Epoch [{epoch + 1}/{num_epochs}], Loss: {avg_train_loss}")

        # Save the model at the end of each epoch
        torch.save(model.state_dict(), f'{model_save_dir}/cancer_classifier_epoch_{epoch}.pth')

    print(f"Process {rank} finished training")

def main():
    world_size = 4  # Number of GPUs
    mp.set_start_method('spawn')  # Set start method to 'spawn'
    processes = []

    for rank in range(world_size):
        p = mp.Process(target=train, args=(rank, world_size))
        p.start()
        processes.append(p)

    for p in processes:
        p.join()

if __name__ == "__main__":
    main()


Using device: cuda:0


FileNotFoundError: [Errno 2] No such file or directory: '/projectnb/cs640grp/students/gaurav57/train_labels.csv'