In [2]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
#         print(os.path.join(dirname, filename))
        pass

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [3]:
import os
import torch
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
from transformers import ViTForImageClassification, ViTFeatureExtractor, AdamW
from transformers import TrainingArguments, Trainer
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.metrics import classification_report
import numpy as np



In [4]:
# Define data directories
train_dir = '/kaggle/input/chest-ctscan-images/Data/train'
valid_dir = '/kaggle/input/chest-ctscan-images/Data/valid'
test_dir = '/kaggle/input/chest-ctscan-images/Data/test'

In [5]:
# Image transformations
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
])

In [6]:
# Load datasets
train_dataset = datasets.ImageFolder(train_dir, transform=transform)
valid_dataset = datasets.ImageFolder(valid_dir, transform=transform)
test_dataset = datasets.ImageFolder(test_dir, transform=transform)

In [7]:
# Data loaders
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
valid_loader = DataLoader(valid_dataset, batch_size=32)
test_loader = DataLoader(test_dataset, batch_size=32)


In [8]:
# Load pre-trained ViT model
model = ViTForImageClassification.from_pretrained('google/vit-base-patch16-224-in21k', num_labels=4)

Some weights of ViTForImageClassification were not initialized from the model checkpoint at google/vit-base-patch16-224-in21k and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [9]:
# Training settings
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model.to(device)

ViTForImageClassification(
  (vit): ViTModel(
    (embeddings): ViTEmbeddings(
      (patch_embeddings): ViTPatchEmbeddings(
        (projection): Conv2d(3, 768, kernel_size=(16, 16), stride=(16, 16))
      )
      (dropout): Dropout(p=0.0, inplace=False)
    )
    (encoder): ViTEncoder(
      (layer): ModuleList(
        (0-11): 12 x ViTLayer(
          (attention): ViTAttention(
            (attention): ViTSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.0, inplace=False)
            )
            (output): ViTSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.0, inplace=False)
            )
          )
          (intermediate): ViTIntermediate(
            (dense): Linear(in_features=7

In [10]:
optimizer = AdamW(model.parameters(), lr=1e-4)

# Define loss function
loss_fn = torch.nn.CrossEntropyLoss()



In [11]:
# Training loop
num_epochs = 50
for epoch in range(num_epochs):
    model.train()
    total_loss = 0
    for images, labels in train_loader:
        images = images.to(device)
        labels = labels.to(device)
        outputs = model(images)
        loss = loss_fn(outputs.logits, labels)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    avg_loss = total_loss / len(train_loader)
    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {avg_loss:.4f}")

    # Validation loop
    model.eval()
    total_correct = 0
    total_samples = 0
    with torch.no_grad():
        for images, labels in valid_loader:
            images = images.to(device)
            labels = labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs.logits, 1)
            total_correct += (predicted == labels).sum().item()
            total_samples += labels.size(0)
    accuracy = total_correct / total_samples
    print(f"Validation Accuracy: {accuracy:.4f}")

Epoch [1/50], Loss: 1.0965
Validation Accuracy: 0.4722
Epoch [2/50], Loss: 0.6617
Validation Accuracy: 0.7361
Epoch [3/50], Loss: 0.3389
Validation Accuracy: 0.6389
Epoch [4/50], Loss: 0.2995
Validation Accuracy: 0.8611
Epoch [5/50], Loss: 0.1181
Validation Accuracy: 0.7361
Epoch [6/50], Loss: 0.0746
Validation Accuracy: 0.8194
Epoch [7/50], Loss: 0.0592
Validation Accuracy: 0.8750
Epoch [8/50], Loss: 0.0647
Validation Accuracy: 0.8611
Epoch [9/50], Loss: 0.0498
Validation Accuracy: 0.6667
Epoch [10/50], Loss: 0.0813
Validation Accuracy: 0.7222
Epoch [11/50], Loss: 0.0597
Validation Accuracy: 0.7778
Epoch [12/50], Loss: 0.0295
Validation Accuracy: 0.8611
Epoch [13/50], Loss: 0.0262
Validation Accuracy: 0.7917
Epoch [14/50], Loss: 0.0224
Validation Accuracy: 0.8750
Epoch [15/50], Loss: 0.0201
Validation Accuracy: 0.8750
Epoch [16/50], Loss: 0.0185
Validation Accuracy: 0.8750
Epoch [17/50], Loss: 0.0170
Validation Accuracy: 0.8750
Epoch [18/50], Loss: 0.0159
Validation Accuracy: 0.8750
E

In [12]:
# Save the model
model_save_path = 'vit_model.pth'
torch.save(model.state_dict(), model_save_path)

In [13]:
# Test loop and metrics calculation
model.eval()
all_labels = []
all_predictions = []
with torch.no_grad():
    for images, labels in test_loader:
        images = images.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs.logits, 1)
        all_labels.extend(labels.cpu().numpy())
        all_predictions.extend(predicted.cpu().numpy())

# Calculate metrics
accuracy = accuracy_score(all_labels, all_predictions)
precision = precision_score(all_labels, all_predictions, average='macro')
recall = recall_score(all_labels, all_predictions, average='macro')
f1 = f1_score(all_labels, all_predictions, average='macro')

print(f"Test Metrics:\nAccuracy: {accuracy:.4f}, Precision: {precision:.4f}, Recall: {recall:.4f}, F1 Score: {f1:.4f}")

print("Training complete. Model saved to", model_save_path)

Test Metrics:
Accuracy: 0.9175, Precision: 0.9288, Recall: 0.9251, F1 Score: 0.9252
Training complete. Model saved to vit_model.pth
