In [1]:
import torch.optim as optim
import torch
import torch.nn as nn
import torch.nn.parallel
import torch.optim
import torch.utils.data
import torch.utils.data.distributed
import torchvision.transforms as transforms
import torchvision.datasets as datasets
import torchvision.models
from torch.autograd import Variable

In [2]:
# setting global parameters
learning_rate = 1e-4
BATCH_SIZE = 32
EPOCHS = 20
DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [3]:
# data preprocessing
 
transform_train = transforms.Compose([
   transforms.Resize((224, 224)),  # Resize to 224x224 to match ViT input size
    transforms.ToTensor(),
    transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])
])

transform_test = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])
])

In [4]:
# loading data
dataset_train = datasets.ImageFolder('output/train', transform_train)
print(dataset_train.imgs)
# label
print(dataset_train.class_to_idx)
dataset_val = datasets.ImageFolder('output/val', transform_test)

print(dataset_val.class_to_idx)
dataset_test = datasets.ImageFolder('output/test', transform_test)

print(dataset_test.class_to_idx)
 
# loading data
train_loader = torch.utils.data.DataLoader(dataset_train, batch_size=BATCH_SIZE, shuffle=True)
val_loader = torch.utils.data.DataLoader(dataset_val, batch_size=BATCH_SIZE, shuffle=False)
test_loader = torch.utils.data.DataLoader(dataset_test, batch_size=BATCH_SIZE, shuffle=False)

[('output/train\\Kirmizi_Pistachio\\kirmizi (1).jpg', 0), ('output/train\\Kirmizi_Pistachio\\kirmizi (10).jpg', 0), ('output/train\\Kirmizi_Pistachio\\kirmizi (11).jpg', 0), ('output/train\\Kirmizi_Pistachio\\kirmizi (12).jpg', 0), ('output/train\\Kirmizi_Pistachio\\kirmizi (13).jpg', 0), ('output/train\\Kirmizi_Pistachio\\kirmizi (14).jpg', 0), ('output/train\\Kirmizi_Pistachio\\kirmizi (15).jpg', 0), ('output/train\\Kirmizi_Pistachio\\kirmizi (16).jpg', 0), ('output/train\\Kirmizi_Pistachio\\kirmizi (17).jpg', 0), ('output/train\\Kirmizi_Pistachio\\kirmizi (18).jpg', 0), ('output/train\\Kirmizi_Pistachio\\kirmizi (19).jpg', 0), ('output/train\\Kirmizi_Pistachio\\kirmizi (2).jpg', 0), ('output/train\\Kirmizi_Pistachio\\kirmizi (22).jpg', 0), ('output/train\\Kirmizi_Pistachio\\kirmizi (23).jpg', 0), ('output/train\\Kirmizi_Pistachio\\kirmizi (24).jpg', 0), ('output/train\\Kirmizi_Pistachio\\kirmizi (25).jpg', 0), ('output/train\\Kirmizi_Pistachio\\kirmizi (26).jpg', 0), ('output/train\

<h1>Vision Transformer</h1>

In [5]:
from timm import create_model

# define Vision Transformer model
class ViTClassifier(nn.Module):
    def __init__(self):
        super(ViTClassifier, self).__init__()
        # loading Vision Transformer 
        self.vit = create_model('vit_base_patch16_224', pretrained=True)
        self.vit.head = nn.Linear(self.vit.head.in_features, 2)  # modify output for binary classification

    def forward(self, x):
        return self.vit(x)

# initial model、loss function and optimizer
model = ViTClassifier().cuda()
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

train_losses = []
val_losses = []
train_accuracies = []
val_accuracies = []

# train model
def train():
    best_val_acc = 0.0
    
    for epoch in range(EPOCHS):
        model.train()
        running_loss = 0.0
        corrects = 0
        total = 0
        
        for inputs, labels in train_loader:
            inputs, labels = inputs.to(DEVICE), labels.to(DEVICE)

            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            running_loss += loss.item() * inputs.size(0)
            _, preds = torch.max(outputs.data, 1)
            corrects += torch.sum(preds == labels)
            total += labels.size(0)
        
        epoch_loss = running_loss / len(train_loader.dataset)
        epoch_acc = corrects.double() / total
        
        train_losses.append(epoch_loss)
        train_accuracies.append(epoch_acc.item())
        
        print(f"Epoch {epoch}/{EPOCHS - 1}")
        print(f"Loss: {epoch_loss:.4f} Acc: {epoch_acc:.4f}")

        
        model.eval()
        val_loss = 0.0
        val_corrects = 0
        val_total = 0

        with torch.no_grad():
            for inputs, labels in val_loader:
                inputs, labels = Variable(inputs).to(DEVICE), Variable(labels).to(DEVICE)
                
                outputs = model(inputs)
                loss = criterion(outputs, labels)
                
                val_loss += loss.item() * inputs.size(0)
                _, preds = torch.max(outputs.data, 1)
                val_corrects += torch.sum(preds == labels)
                val_total += labels.size(0)
        
        val_loss /= len(val_loader.dataset)
        val_acc = val_corrects.double() / val_total
        
        val_losses.append(val_loss)
        val_accuracies.append(val_acc.item())
        
        print(f"Validation Loss: {val_loss:.4f} Validation Acc: {val_acc:.4f}")

        # Save the best model
        if val_acc > best_val_acc:
            best_val_acc = val_acc
            torch.save(model.state_dict(), 'ViT_best_model_pretrained.pth')

train()


LocalEntryNotFoundError: An error happened while trying to locate the file on the Hub and we cannot find the requested files in the local cache. Please check your connection and try again or make sure your Internet connection is on.

In [None]:
import matplotlib.pyplot as plt
# Plotting the curves
epochs_range = range(EPOCHS)
plt.figure(figsize=(12, 5))

# Plot loss
plt.subplot(1, 2, 1)
plt.plot(epochs_range, train_losses, label='Train Loss')
plt.plot(epochs_range, val_losses, label='Validation Loss')
plt.title('Loss Curve')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
plt.xticks(range(0, EPOCHS + 1, 2)) 

# Plot accuracy
plt.subplot(1, 2, 2)
plt.plot(epochs_range, train_accuracies, label='Train Accuracy')
plt.plot(epochs_range, val_accuracies, label='Validation Accuracy')
plt.title('Accuracy Curve')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend()
plt.xticks(range(0, EPOCHS + 1, 2)) 

plt.tight_layout()
plt.show()

<h1>Evaluation</h1>

In [None]:
import pandas as pd
import os
from sklearn.metrics import classification_report
import numpy as np

# Load the best model
model.load_state_dict(torch.load('ViT_best_model_pretrained.pth'))

# Evaluation
model.eval()
y_true, y_pred = [], []

with torch.no_grad():
    for inputs, labels in test_loader:
        inputs, labels = Variable(inputs).to(DEVICE), Variable(labels).to(DEVICE)
        outputs = model(inputs)
        _, preds = torch.max(outputs.data, 1)
        y_true.extend(labels.cpu().numpy())
        y_pred.extend(preds.cpu().numpy())

print(classification_report(np.array(y_true), np.array(y_pred), target_names=dataset_test.classes))

# Save results to DataFrame
filenames = [os.path.basename(x[0]) for x in test_loader.dataset.imgs]
class_indices = test_loader.dataset.class_to_idx
indices = {v: k for k, v in class_indices.items()}

val_df = pd.DataFrame({
    'filename': filenames,
    'actual': [indices[x] for x in y_true],
    'predicted': [indices[x] for x in y_pred]
})
val_df['Same'] = val_df['actual'] == val_df['predicted']
val_df = val_df.sample(frac=1).reset_index(drop=True)

print(val_df.head(10))


In [None]:
import timm
print(timm.list_models())
