In [1]:
from visual_transformer import ConvNeXtViT
from DIPutils import DIPTransform
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, random_split
from torchvision import datasets, transforms


  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# Set dataset path
dataset_path = r"C:\Users\parth\Downloads\Kindey-Stone-Dataset\Kindey-Stone-Dataset\Augmented"

In [3]:
# Define transformations (resize, normalize, etc.)
transform = transforms.Compose([
    # DIPTransform(target_size=500),  # Apply DIP pipeline
    # transforms.ToPILImage(),        # Convert back to PIL imagev
    transforms.Resize((224, 224)),  # Resize to model input size
    transforms.ToTensor(),          # Convert to tensor
])

In [4]:
# Load the dataset
dataset_path = dataset_path
dataset = datasets.ImageFolder(root=dataset_path, transform=transform)

# Define split sizes
total_size = len(dataset)
val_size = int(0.15 * total_size)  # 15% for validation
test_size = int(0.15 * total_size)  # 15% for testing
train_size = total_size - (val_size + test_size)  # Remaining 70% for training

# Split dataset
train_ds, val_ds, test_ds = random_split(dataset, [train_size, val_size, test_size])

# Create data loaders
batch_size = 64 
train_loader = DataLoader(train_ds, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_ds, batch_size=batch_size, shuffle=False)
test_loader = DataLoader(test_ds, batch_size=batch_size, shuffle=False)

print(f"Train size: {len(train_ds)}, Validation size: {len(val_ds)}, Test size: {len(test_ds)}")
# also print the number of classes
num_classes = len(dataset.classes)
print(f"Number of classes: {num_classes}")

Train size: 24821, Validation size: 5318, Test size: 5318
Number of classes: 2


In [5]:
# Define model
model = ConvNeXtViT(input_shape=(224, 224, 3), num_classes=2)
print(model)

ConvNeXtViT(
  (convnext): ConvNeXt(
    (stem): Sequential(
      (0): Conv2d(3, 96, kernel_size=(4, 4), stride=(4, 4))
      (1): LayerNorm2d((96,), eps=1e-06, elementwise_affine=True)
    )
    (stages): Sequential(
      (0): ConvNeXtStage(
        (downsample): Identity()
        (blocks): Sequential(
          (0): ConvNeXtBlock(
            (conv_dw): Conv2d(96, 96, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=96)
            (norm): LayerNorm((96,), eps=1e-06, elementwise_affine=True)
            (mlp): Mlp(
              (fc1): Linear(in_features=96, out_features=384, bias=True)
              (act): GELU()
              (drop1): Dropout(p=0.0, inplace=False)
              (norm): Identity()
              (fc2): Linear(in_features=384, out_features=96, bias=True)
              (drop2): Dropout(p=0.0, inplace=False)
            )
            (shortcut): Identity()
            (drop_path): Identity()
          )
          (1): ConvNeXtBlock(
            (conv_dw): Co

In [6]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=1e-4)
def compute_accuracy(outputs, labels):
    _, preds = torch.max(outputs, dim=1)
    return (preds == labels).float().mean().item()


In [7]:
num_epochs = 20  # Set number of epochs
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")
model.to(device)

for epoch in range(num_epochs):
    model.train()  # Set model to training mode
    running_loss = 0.0
    correct, total = 0, 0

    for inputs, labels in train_loader:  # Assume train_loader is your DataLoader
        inputs, labels = inputs.to(device), labels.to(device)

        optimizer.zero_grad()  # Zero gradients
        outputs = model(inputs)  # Forward pass
        loss = criterion(outputs, labels)  # Compute loss
        loss.backward()  # Backpropagation
        optimizer.step()  # Update weights

        running_loss += loss.item()
        correct += (torch.argmax(outputs, dim=1) == labels).sum().item()
        total += labels.size(0)

    train_loss = running_loss / len(train_loader)
    train_accuracy = correct / total

    # Validation Phase
    model.eval()  # Set model to evaluation mode
    val_loss = 0.0
    val_correct, val_total = 0, 0

    with torch.no_grad():  # No need to track gradients for validation
        for val_inputs, val_labels in val_loader:  # Assume val_loader is your validation DataLoader
            val_inputs, val_labels = val_inputs.to(device), val_labels.to(device)
            val_outputs = model(val_inputs)
            loss = criterion(val_outputs, val_labels)
            val_loss += loss.item()

            val_correct += (torch.argmax(val_outputs, dim=1) == val_labels).sum().item()
            val_total += val_labels.size(0)

    val_loss /= len(val_loader)
    val_accuracy = val_correct / val_total

    print(f"Epoch [{epoch+1}/{num_epochs}]")
    print(f"Train Loss: {train_loss:.4f}, Train Accuracy: {train_accuracy:.4f}")
    print(f"Val Loss: {val_loss:.4f}, Val Accuracy: {val_accuracy:.4f}\n")

Using device: cuda
Epoch [1/20]
Train Loss: 0.4504, Train Accuracy: 0.8556
Val Loss: 0.3751, Val Accuracy: 0.9359

Epoch [2/20]
Train Loss: 0.3741, Train Accuracy: 0.9376
Val Loss: 0.3582, Val Accuracy: 0.9543

Epoch [3/20]
Train Loss: 0.3610, Train Accuracy: 0.9515
Val Loss: 0.3535, Val Accuracy: 0.9590

Epoch [4/20]
Train Loss: 0.3459, Train Accuracy: 0.9661
Val Loss: 0.3518, Val Accuracy: 0.9603

Epoch [5/20]
Train Loss: 0.3518, Train Accuracy: 0.9608
Val Loss: 0.3449, Val Accuracy: 0.9671

Epoch [6/20]
Train Loss: 0.3446, Train Accuracy: 0.9680
Val Loss: 0.3419, Val Accuracy: 0.9716

Epoch [7/20]
Train Loss: 0.3368, Train Accuracy: 0.9759
Val Loss: 0.3333, Val Accuracy: 0.9795

Epoch [8/20]
Train Loss: 0.3312, Train Accuracy: 0.9815
Val Loss: 0.3328, Val Accuracy: 0.9799

Epoch [9/20]
Train Loss: 0.3392, Train Accuracy: 0.9739
Val Loss: 0.3887, Val Accuracy: 0.9252

Epoch [10/20]
Train Loss: 0.3556, Train Accuracy: 0.9569
Val Loss: 0.3338, Val Accuracy: 0.9788

Epoch [11/20]
Train 

In [8]:
# save teh model
torch.save(model.state_dict(), 'model20.pth')

In [9]:
# test teh model
model.eval()  # Set model to evaluation mode
correct, total = 0, 0
running_loss = 0.0
criterion = nn.CrossEntropyLoss()

with torch.no_grad():  # No need to track gradients for validation
    for test_inputs, test_labels in test_loader:  # Assume val_loader is your validation DataLoader
        test_inputs, test_labels = test_inputs.to(device), test_labels.to(device)
        test_outputs = model(test_inputs)
        loss = criterion(test_outputs, test_labels)
        running_loss += loss.item()

        correct += (torch.argmax(test_outputs, dim=1) == test_labels).sum().item()
        total += test_labels.size(0)
test_loss = running_loss / len(test_loader)
test_accuracy = correct / total
print(f"Test Loss: {test_loss:.4f}, Test Accuracy: {test_accuracy:.4f}")


Test Loss: 0.3353, Test Accuracy: 0.9771
