In [2]:
import os
import torch
import pandas as pd
from glob import glob
from torch.utils.data import Dataset, DataLoader
from PIL import Image
from torchvision import transforms
from transformers import ViTForImageClassification, ViTConfig, TrainingArguments, Trainer
from sklearn.metrics import accuracy_score, f1_score, classification_report, roc_curve, auc
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
import numpy as np

  from .autonotebook import tqdm as notebook_tqdm
Client is not authorized to connect to Server

In [2]:
os.environ['CUDA_VISIBLE_DEVICES'] = '3'
os.environ['CUDA_LAUNCH_BLOCKING'] = '1'  

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")  # "cuda:0" will refer to GPU 3
print(f"Using device: {device}")

file_path = 'train_df.csv'
train_df_main = pd.read_csv(file_path)

# Map image paths to the DataFrame
all_image_paths = {os.path.basename(x): x for x in glob(os.path.join('images*', '*', '*.png'))}
train_df_main["FilePath"] = train_df_main["Image Index"].map(all_image_paths)

# Drop 'No Finding' column
train_df_main.drop(['No Finding'], axis=1, inplace=True)

# Selected labels
selected_labels = ['Effusion', 'Infiltration', 'Mass', 'Nodule', 'Atelectasis', 'Pneumothorax']

# Filter for selected labels
train_df_main = train_df_main[['Image Index', 'FilePath'] + selected_labels]

# Calculate the sum of diseases per patient
train_df_main['Total Diseases'] = train_df_main[selected_labels].sum(axis=1)

# Filter patients with exactly one disease
one_disease_df = train_df_main[train_df_main['Total Diseases'] == 1]

# Calculate the frequency of each disease
disease_counts = one_disease_df[selected_labels].sum()

# Filter diseases with at least 2000 samples
frequent_diseases = disease_counts[disease_counts >= 2000].index.tolist()

# Filter the DataFrame for these frequent diseases
train_df_main = one_disease_df[one_disease_df[frequent_diseases].eq(1).any(axis=1)]

# Check if the subset was created correctly
print(train_df_main.head())
print(f"Total images in dataset: {len(train_df_main)}")

# Specify the directory where you want to save the file
output_directory = './output_directory'

# Ensure the directory exists
if not os.path.exists(output_directory):
    os.makedirs(output_directory)

# Define the output file path
output_file_path = os.path.join(output_directory, 'train_df_main_full.csv')

# Save to a new CSV (optional)
try:
    train_df_main.to_csv(output_file_path, index=False)
    print(f"Dataset saved to {output_file_path}")
except PermissionError as e:
    print(f"PermissionError: {e}. Could not save the file at {output_file_path}")

# Define the output file path
output_file_path = os.path.join(output_directory, 'subset_df_new.csv')

# Define custom dataset for ViT
class CustomImageDataset(Dataset):
    def __init__(self, df, labels, transform=None):
        self.df = df
        self.labels = labels
        self.transform = transform
        self.image_paths = df['FilePath'].values
        self.label_values = df[labels].values

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        img_path = self.image_paths[idx]
        image = Image.open(img_path).convert("RGB")
        if self.transform:
            image = self.transform(image)
        label = torch.tensor(self.label_values[idx], dtype=torch.float32)
        return {"pixel_values": image, "labels": label}

# Custom transform function
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

# Create train-test split
train_df, test_df = train_test_split(train_df_main, test_size=0.2, random_state=42)

# Create datasets and dataloaders
train_dataset = CustomImageDataset(train_df, frequent_diseases, transform=transform)
test_dataset = CustomImageDataset(test_df, frequent_diseases, transform=transform)

train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)  # Increased batch size
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

# Initialize the model from scratch
config = ViTConfig(
    hidden_size=768,
    num_attention_heads=12,
    num_hidden_layers=12,
    image_size=224,
    patch_size=16,
    num_labels=len(frequent_diseases)
)
model = ViTForImageClassification(config).to(device)

# Define training arguments
training_args = TrainingArguments(
    output_dir='./results',
    num_train_epochs=20,
    per_device_train_batch_size=64,
    per_device_eval_batch_size=64,
    warmup_steps=100,
    weight_decay=0.01,
    logging_dir='./logs',
    logging_steps=10,
    evaluation_strategy="epoch",
    save_strategy="epoch",
    fp16=True,  
)

# Define Trainer
class CustomTrainer(Trainer):
    def compute_loss(self, model, inputs, return_outputs=False):
        labels = inputs.pop("labels").to(device)
        outputs = model(**inputs)
        loss = torch.nn.BCEWithLogitsLoss()(outputs.logits, labels)
        return (loss, outputs) if return_outputs else loss

    def compute_metrics(self, p):
        preds = torch.sigmoid(p.predictions).cpu().numpy()
        labels = p.label_ids
        preds = (preds > 0.5).astype(int)
        accuracy = (preds == labels).mean()
        return {"accuracy": accuracy}

trainer = CustomTrainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=test_dataset,
)

# Train the model
trainer.train()

# Save the model
model.save_pretrained('./trained_model')

# # Evaluation on test set
# def evaluate_model(trainer, dataset):
#     trainer.model.eval()  # Set the model to evaluation mode
#     predictions, labels = [], []
#     for batch in DataLoader(dataset, batch_size=32):
#         inputs = {"pixel_values": batch["pixel_values"].to(device)}
#         with torch.no_grad():
#             outputs = trainer.model(**inputs)
#         logits = outputs.logits.detach().cpu().numpy()
#         predictions.append(logits)
#         labels.append(batch["labels"].numpy())
#     predictions = np.concatenate(predictions, axis=0)
#     labels = np.concatenate(labels, axis=0)
#     predictions = (predictions > 0.5).astype(int)
#     acc = accuracy_score(labels, predictions)
#     f1 = f1_score(labels, predictions, average='macro')
#     return predictions, labels, acc, f1

# # Evaluate the model
# predictions, labels, accuracy, f1 = evaluate_model(trainer, test_dataset)
# print(f"Test Accuracy: {accuracy:.4f}")
# print(f"Test F1 Score: {f1:.4f}")

# # Classification report
# print("Classification Report:")
# print(classification_report(labels, predictions, target_names=frequent_diseases))

# # Plot all ROC curves in one graph
# plt.figure(figsize=(10, 8))
# for i, label in enumerate(frequent_diseases):
#     fpr, tpr, _ = roc_curve(labels[:, i], predictions[:, i])
#     roc_auc = auc(fpr, tpr)
#     plt.plot(fpr, tpr, lw=2, label=f'ROC curve of {label} (area = {roc_auc:.2f})')

# plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--')
# plt.xlim([0.0, 1.0])
# plt.ylim([0.0, 1.05])
# plt.xlabel('False Positive Rate')
# plt.ylabel('True Positive Rate')
# plt.title('Receiver Operating Characteristic for all labels')
# plt.legend(loc="lower right")
# plt.show()

Using device: cuda:0
         Image Index                            FilePath  Effusion  \
2   00000001_002.png  images_001/images/00000001_002.png         1   
7   00000003_003.png  images_001/images/00000003_003.png         0   
19  00000005_006.png  images_001/images/00000005_006.png         0   
25  00000008_002.png  images_001/images/00000008_002.png         0   
27  00000010_000.png  images_001/images/00000010_000.png         0   

    Infiltration  Mass  Nodule  Atelectasis  Pneumothorax  Total Diseases  
2              0     0       0            0             0               1  
7              1     0       0            0             0               1  
19             1     0       0            0             0               1  
25             0     0       1            0             0               1  
27             1     0       0            0             0               1  
Total images in dataset: 31085
Dataset saved to ./output_directory/train_df_main_full.csv


Detected kernel version 5.4.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.


Epoch,Training Loss,Validation Loss
1,0.4004,0.410731
2,0.4079,0.407516
3,0.4207,0.409602
4,0.4042,0.40903
5,0.4078,0.405485
6,0.4008,0.401431
7,0.3854,0.403665
8,0.3946,0.398229
9,0.392,0.397405
10,0.401,0.402467


# Developed Model with pre-trained on ImageNet-21k (14 million images, 21,843 classes) at resolution 224x224. It was introduced in the paper An Image is Worth 16x16 Words: Transformers for Image Recognition at Scale by Dosovitskiy et al. and then trained again on out data set to achieve better metrics

In [3]:
os.environ['CUDA_VISIBLE_DEVICES'] = '3'
os.environ['CUDA_LAUNCH_BLOCKING'] = '1'  

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")  # "cuda:0" will refer to GPU 3
print(f"Using device: {device}")

file_path = 'train_df.csv'
train_df_main = pd.read_csv(file_path)

# Map image paths to the DataFrame
all_image_paths = {os.path.basename(x): x for x in glob(os.path.join('images*', '*', '*.png'))}
train_df_main["FilePath"] = train_df_main["Image Index"].map(all_image_paths)

# Drop 'No Finding' column
train_df_main.drop(['No Finding'], axis=1, inplace=True)

# Selected labels
selected_labels = ['Effusion', 'Infiltration', 'Mass', 'Nodule', 'Atelectasis', 'Pneumothorax']

# Filter for selected labels
train_df_main = train_df_main[['Image Index', 'FilePath'] + selected_labels]

# Calculate the sum of diseases per patient
train_df_main['Total Diseases'] = train_df_main[selected_labels].sum(axis=1)

# Filter patients with exactly one disease
one_disease_df = train_df_main[train_df_main['Total Diseases'] == 1]

# Calculate the frequency of each disease
disease_counts = one_disease_df[selected_labels].sum()

# Filter diseases with at least 2000 samples
frequent_diseases = disease_counts[disease_counts >= 2000].index.tolist()

# Filter the DataFrame for these frequent diseases
train_df_main = one_disease_df[one_disease_df[frequent_diseases].eq(1).any(axis=1)]

print(train_df_main.head())
print(f"Total images in dataset: {len(train_df_main)}")

output_directory = './output_directory'

if not os.path.exists(output_directory):
    os.makedirs(output_directory)

output_file_path = os.path.join(output_directory, 'train_df_main_full.csv')

try:
    train_df_main.to_csv(output_file_path, index=False)
    print(f"Dataset saved to {output_file_path}")
except PermissionError as e:
    print(f"PermissionError: {e}. Could not save the file at {output_file_path}")

# Define custom dataset for ViT
class CustomImageDataset(Dataset):
    def __init__(self, df, labels, transform=None):
        self.df = df
        self.labels = labels
        self.transform = transform
        self.image_paths = df['FilePath'].values
        self.label_values = df[labels].values

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        img_path = self.image_paths[idx]
        image = Image.open(img_path).convert("RGB")
        if self.transform:
            image = self.transform(image)
        label = torch.tensor(self.label_values[idx], dtype=torch.float32)
        return {"pixel_values": image, "labels": label}

# Custom transform function with data augmentation
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(10),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

# Create train-test split
train_df, test_df = train_test_split(train_df_main, test_size=0.2, random_state=42)

# Create datasets and dataloaders
train_dataset = CustomImageDataset(train_df, frequent_diseases, transform=transform)
test_dataset = CustomImageDataset(test_df, frequent_diseases, transform=transform)

train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)  
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

# Initialize the model with pre-trained weights
model = ViTForImageClassification.from_pretrained('google/vit-base-patch16-224-in21k', num_labels=len(frequent_diseases)).to(device)

# Define training arguments
training_args = TrainingArguments(
    output_dir='./results',
    num_train_epochs=20,  
    per_device_train_batch_size=64,
    learning_rate=2e-5,  
    warmup_steps=1000,
    weight_decay=0.01,
    logging_dir='./logs',
    logging_steps=10,
    evaluation_strategy="epoch",
    save_strategy="epoch",
    eval_steps=10,
    load_best_model_at_end=True, )

# Define Trainer

class CustomTrainer(Trainer):
    def compute_loss(self, model, inputs, return_outputs=False):
        labels = inputs.pop("labels").to(device)
        outputs = model(**inputs)
        loss = torch.nn.BCEWithLogitsLoss()(outputs.logits, labels)
        return (loss, outputs) if return_outputs else loss

    def compute_metrics(self, p):
        preds = torch.sigmoid(p.predictions).cpu().numpy()
        labels = p.label_ids
        preds = (preds > 0.5).astype(int)
        accuracy = accuracy_score(labels, preds)
        f1 = f1_score(labels, preds, average='macro')
        return {"accuracy": accuracy, "f1": f1}

trainer = CustomTrainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=test_dataset,
)

# Train the model
trainer.train()

# Save the model
model.save_pretrained('./trained_model')

# # Evaluation on test set
# def evaluate_model(trainer, dataset):
#     trainer.model.eval()  
#     predictions, labels = [], []
#     for batch in DataLoader(dataset, batch_size=64):  # Match batch size with training
#         inputs = {"pixel_values": batch["pixel_values"].to(device)}
#         with torch.no_grad():
#             outputs = trainer.model(**inputs)
#         logits = outputs.logits.detach().cpu().numpy()
#         predictions.append(logits)
#         labels.append(batch["labels"].numpy())
#     predictions = np.concatenate(predictions, axis=0)
#     labels = np.concatenate(labels, axis=0)
#     predictions = (predictions > 0.5).astype(int)
#     acc = accuracy_score(labels, predictions)
#     f1 = f1_score(labels, predictions, average='macro')
#     return predictions, labels, acc, f1

# # Evaluate the model
# predictions, labels, accuracy, f1 = evaluate_model(trainer, test_dataset)
# print(f"Test Accuracy: {accuracy:.4f}")
# print(f"Test F1 Score: {f1:.4f}")

# # Classification report
# print("Classification Report:")
# print(classification_report(labels, predictions, target_names=frequent_diseases))

# # Plot all ROC curves in one graph
# plt.figure(figsize=(10, 8))
# for i, label in enumerate(frequent_diseases):
#     fpr, tpr, _ = roc_curve(labels[:, i], predictions[:, i])
#     roc_auc = auc(fpr, tpr)
#     plt.plot(fpr, tpr, lw=2, label=f'ROC curve of {label} (area = {roc_auc:.2f})')

# plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--')
# plt.xlim([0.0, 1.0])
# plt.ylim([0.0, 1.05])
# plt.xlabel('False Positive Rate')
# plt.ylabel('True Positive Rate')
# plt.title('Receiver Operating Characteristic for all labels')
# plt.legend(loc="lower right")
# plt.show()


Using device: cuda:0
         Image Index                            FilePath  Effusion  \
2   00000001_002.png  images_001/images/00000001_002.png         1   
7   00000003_003.png  images_001/images/00000003_003.png         0   
19  00000005_006.png  images_001/images/00000005_006.png         0   
25  00000008_002.png  images_001/images/00000008_002.png         0   
27  00000010_000.png  images_001/images/00000010_000.png         0   

    Infiltration  Mass  Nodule  Atelectasis  Pneumothorax  Total Diseases  
2              0     0       0            0             0               1  
7              1     0       0            0             0               1  
19             1     0       0            0             0               1  
25             0     0       1            0             0               1  
27             1     0       0            0             0               1  
Total images in dataset: 31085
Dataset saved to ./output_directory/train_df_main_full.csv


Some weights of ViTForImageClassification were not initialized from the model checkpoint at google/vit-base-patch16-224-in21k and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Detected kernel version 5.4.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.


Epoch,Training Loss,Validation Loss
1,0.4244,0.42927
2,0.3934,0.394242
3,0.3807,0.367892


KeyboardInterrupt: 