In [None]:
# ! pip install timm torch torchvision transformers

In [None]:
import torch
import timm
import torchvision.transforms as transforms
from torchvision.datasets import ImageFolder
from torch.utils.data import DataLoader
import torch.nn as nn
import torch.optim as optim
from PIL import Image

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
# Paths
train_dir = "/content/drive/MyDrive/colab datasets/kidney/train"
test_dir = "/content/drive/MyDrive/colab datasets/kidney/test"

In [None]:
# Image preprocessing
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
])

In [None]:
# Load dataset
train_data = ImageFolder(root=train_dir, transform=transform)
test_data = ImageFolder(root=test_dir, transform=transform)

train_loader = DataLoader(train_data, batch_size=32, shuffle=True)
test_loader = DataLoader(test_data, batch_size=32, shuffle=False)

In [None]:
# Load Vision Transformer model
model = timm.create_model("vit_large_patch16_224", pretrained=True, num_classes=4)

# Define loss and optimizer
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.0001)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


model.safetensors:   0%|          | 0.00/1.22G [00:00<?, ?B/s]

In [None]:
# Training loop
num_epochs = 10
for epoch in range(num_epochs):
    model.train()
    total_loss = 0
    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()

    # Calculate average loss
    avg_loss = total_loss / len(train_loader)

    # Evaluate model on test set
    model.eval()
    correct, total = 0, 0
    with torch.no_grad():
        for images, labels in test_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    # Calculate test accuracy
    test_accuracy = 100 * correct / total
    print(f"Epoch {epoch+1}/{num_epochs}, Loss: {avg_loss:.4f}, Test Accuracy: {test_accuracy:.2f}%")


Epoch 1/10, Loss: 0.6640, Test Accuracy: 86.25%
Epoch 2/10, Loss: 0.0233, Test Accuracy: 86.25%
Epoch 3/10, Loss: 0.0028, Test Accuracy: 91.98%
Epoch 4/10, Loss: 0.0000, Test Accuracy: 92.08%
Epoch 5/10, Loss: 0.0000, Test Accuracy: 91.98%
Epoch 6/10, Loss: 0.0000, Test Accuracy: 91.98%
Epoch 7/10, Loss: 0.0000, Test Accuracy: 91.56%
Epoch 8/10, Loss: 0.0000, Test Accuracy: 89.58%
Epoch 9/10, Loss: 0.0000, Test Accuracy: 90.31%
Epoch 10/10, Loss: 0.0000, Test Accuracy: 89.79%


In [None]:
# Save model
torch.save(model.state_dict(), "vit_kidney.pth")

In [None]:
# Function to classify a new image
def classify_kidney_image(image_path, model_path="vit_kidney.pth"):
    # Load the trained model
    model = timm.create_model("vit_large_patch16_224", pretrained=False, num_classes=4)
    model.load_state_dict(torch.load(model_path, map_location=torch.device("cpu")))
    model.eval()

    # Define image preprocessing
    transform = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
    ])

    # Load and preprocess the image
    image = Image.open(image_path).convert("RGB")
    image = transform(image).unsqueeze(0)  # Add batch dimension

    # Perform inference
    with torch.no_grad():
        output = model(image)
        predicted_class = torch.argmax(output, dim=1).item()

    # Class mapping (update if needed)
    class_names = ["cyst", "normal", "stone", "tumor"]

    return class_names[predicted_class]

In [None]:
# Example usage
image_path = "/content/drive/MyDrive/colab datasets/kidney/test/cyst/cyst (1).jpg"
result = classify_kidney_image(image_path)

  model.load_state_dict(torch.load(model_path, map_location=torch.device("cpu")))


In [None]:
print(f"Predicted Class: {result}")

Predicted Class: cyst
