In [2]:
!pip install torch torchvision pandas scikit-learn

Collecting pandas
  Downloading pandas-2.2.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (19 kB)
Collecting scikit-learn
  Downloading scikit_learn-1.5.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)
Collecting tzdata>=2022.7 (from pandas)
  Downloading tzdata-2024.1-py2.py3-none-any.whl.metadata (1.4 kB)
Collecting scipy>=1.6.0 (from scikit-learn)
  Downloading scipy-1.14.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (60 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m60.8/60.8 kB[0m [31m2.4 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting joblib>=1.2.0 (from scikit-learn)
  Downloading joblib-1.4.2-py3-none-any.whl.metadata (5.4 kB)
Collecting threadpoolctl>=3.1.0 (from scikit-learn)
  Downloading threadpoolctl-3.5.0-py3-none-any.whl.metadata (13 kB)
Downloading pandas-2.2.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (13.0 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader, random_split
from torchvision import transforms
from torchvision.models import vit_b_16, ViT_B_16_Weights
import pandas as pd
import os
from PIL import Image
from sklearn.metrics import f1_score
from tqdm import tqdm

print("Current working directory:", os.getcwd())

# 1. Load the CSV file
csv_path = 'train.csv'
print(f"Attempting to load CSV from: {os.path.abspath(csv_path)}")
df = pd.read_csv(csv_path)
print(f"Successfully loaded CSV with {len(df)} entries")

# 2. Custom Dataset Class
class ImageDataset(Dataset):
    def __init__(self, df, image_dir, transform=None):
        self.df = df
        self.image_dir = image_dir
        self.transform = transform

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        try:
            img_name = os.path.join(self.image_dir, self.df.iloc[idx, 0] + '.jpg')
            image = Image.open(img_name).convert('RGB')
            label = self.df.iloc[idx, 1]
            if self.transform:
                image = self.transform(image)
            return image, label
        except FileNotFoundError:
            print(f"Error: File not found: {img_name}")
            raise
        except Exception as e:
            print(f"Error loading image {img_name}: {str(e)}")
            raise

# 3. Enhanced Data Transformations with Augmentation
train_transform = transforms.Compose([
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.RandomVerticalFlip(p=0.5),
    transforms.RandomRotation(degrees=15),
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

val_test_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

# 4. Create Dataset and DataLoader
image_dir = 'Images'
print(f"Image directory: {os.path.abspath(image_dir)}")
full_dataset = ImageDataset(df, image_dir, transform=None)  # We'll apply transforms later
train_size = int(0.8 * len(full_dataset))
val_size = int(0.1 * len(full_dataset))
test_size = len(full_dataset) - train_size - val_size
train_dataset, val_dataset, test_dataset = random_split(full_dataset, [train_size, val_size, test_size])

# Apply transforms
train_dataset.dataset.transform = train_transform
val_dataset.dataset.transform = val_test_transform
test_dataset.dataset.transform = val_test_transform

print(f"Dataset split - Train: {train_size}, Validation: {val_size}, Test: {test_size}")

# 5. Model (Vision Transformer)
def create_model(num_classes):
    model = vit_b_16(weights=ViT_B_16_Weights.IMAGENET1K_V1)
    
    # Replace the last layer of the classifier
    num_ftrs = model.heads[-1].in_features
    model.heads[-1] = nn.Linear(num_ftrs, num_classes)
    
    return model

# 6. Training function
def train_model(model, train_loader, val_loader, criterion, optimizer, scheduler, num_epochs, device):
    best_val_f1 = 0
    for epoch in range(num_epochs):
        # Training phase
        model.train()
        train_loss = 0
        for images, labels in tqdm(train_loader, desc=f"Epoch {epoch+1}/{num_epochs} - Training"):
            images, labels = images.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            train_loss += loss.item()
        
        # Validation phase
        model.eval()
        val_loss = 0
        val_preds = []
        val_labels = []
        with torch.no_grad():
            for images, labels in tqdm(val_loader, desc=f"Epoch {epoch+1}/{num_epochs} - Validation"):
                images, labels = images.to(device), labels.to(device)
                outputs = model(images)
                loss = criterion(outputs, labels)
                val_loss += loss.item()
                _, preds = torch.max(outputs, 1)
                val_preds.extend(preds.cpu().numpy())
                val_labels.extend(labels.cpu().numpy())
        
        val_f1 = f1_score(val_labels, val_preds, average='weighted')
        
        print(f"Epoch {epoch+1}/{num_epochs}")
        print(f"Train Loss: {train_loss/len(train_loader):.4f}")
        print(f"Val Loss: {val_loss/len(val_loader):.4f}")
        print(f"Val F1-Score: {val_f1:.4f}")
        
        scheduler.step(val_loss)
        
        if val_f1 > best_val_f1:
            best_val_f1 = val_f1
            torch.save(model.state_dict(), 'best_model_vit.pth')

    return model

if __name__ == '__main__':
    # DataLoaders
    train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True, num_workers=4)
    val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False, num_workers=4)
    test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False, num_workers=4)

    # Verify data loading
    print("Verifying data loading...")
    for images, labels in train_loader:
        print("Batch shape:", images.shape)
        print("Labels:", labels)
        break  # Just check the first batch

    # Device
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    print(f"Using device: {device}")

    # Create model
    num_classes = 42  # Assuming 42 classes as in the original code
    model = create_model(num_classes)
    model.to(device)

    # Loss and Optimizer
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.AdamW(model.parameters(), lr=0.0001, weight_decay=0.05)
    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', patience=3, factor=0.1)

    # Train model
    num_epochs = 100
    print(f"Starting training for {num_epochs} epochs...")
    model = train_model(model, train_loader, val_loader, criterion, optimizer, scheduler, num_epochs, device)

    # 8. Evaluation
    print("Loading best model for evaluation...")
    model.load_state_dict(torch.load('best_model_vit.pth'))
    model.eval()
    test_preds = []
    test_labels = []
    with torch.no_grad():
        for images, labels in tqdm(test_loader, desc="Testing"):
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, preds = torch.max(outputs, 1)
            test_preds.extend(preds.cpu().numpy())
            test_labels.extend(labels.cpu().numpy())

    # 9. Calculate F1-Score
    test_f1 = f1_score(test_labels, test_preds, average='weighted')
    print('Test F1-Score:', test_f1)

Current working directory: /workspace/hackathon-online-agriculture-classification
Attempting to load CSV from: /workspace/hackathon-online-agriculture-classification/train.csv
Successfully loaded CSV with 38511 entries
Image directory: /workspace/hackathon-online-agriculture-classification/Images
Dataset split - Train: 30808, Validation: 3851, Test: 3852
Verifying data loading...
Batch shape: torch.Size([32, 3, 224, 224])
Labels: tensor([19, 38, 32, 26, 39, 18, 27, 37, 19, 19, 19, 41, 39, 33, 29,  2, 39, 19,
         0, 14,  2, 32, 41, 39, 41, 29,  3, 39, 23, 26, 41, 22])
Using device: cuda
Starting training for 100 epochs...


Epoch 1/100 - Training: 100%|██████████| 963/963 [02:21<00:00,  6.82it/s]
Epoch 1/100 - Validation: 100%|██████████| 121/121 [00:06<00:00, 17.70it/s]


Epoch 1/100
Train Loss: 0.2050
Val Loss: 0.0542
Val F1-Score: 0.9810


Epoch 2/100 - Training: 100%|██████████| 963/963 [02:22<00:00,  6.75it/s]
Epoch 2/100 - Validation: 100%|██████████| 121/121 [00:06<00:00, 17.63it/s]


Epoch 2/100
Train Loss: 0.0538
Val Loss: 0.0799
Val F1-Score: 0.9777


Epoch 3/100 - Training: 100%|██████████| 963/963 [02:23<00:00,  6.72it/s]
Epoch 3/100 - Validation: 100%|██████████| 121/121 [00:06<00:00, 18.04it/s]


Epoch 3/100
Train Loss: 0.0476
Val Loss: 0.0460
Val F1-Score: 0.9846


Epoch 4/100 - Training: 100%|██████████| 963/963 [02:22<00:00,  6.74it/s]
Epoch 4/100 - Validation: 100%|██████████| 121/121 [00:06<00:00, 18.67it/s]


Epoch 4/100
Train Loss: 0.0354
Val Loss: 0.1045
Val F1-Score: 0.9718


Epoch 5/100 - Training: 100%|██████████| 963/963 [02:21<00:00,  6.82it/s]
Epoch 5/100 - Validation: 100%|██████████| 121/121 [00:06<00:00, 17.72it/s]


Epoch 5/100
Train Loss: 0.0362
Val Loss: 0.1055
Val F1-Score: 0.9665


Epoch 6/100 - Training: 100%|██████████| 963/963 [02:24<00:00,  6.69it/s]
Epoch 6/100 - Validation: 100%|██████████| 121/121 [00:06<00:00, 17.88it/s]


Epoch 6/100
Train Loss: 0.0291
Val Loss: 0.1505
Val F1-Score: 0.9654


Epoch 7/100 - Training: 100%|██████████| 963/963 [02:22<00:00,  6.77it/s]
Epoch 7/100 - Validation: 100%|██████████| 121/121 [00:06<00:00, 17.69it/s]


Epoch 7/100
Train Loss: 0.0290
Val Loss: 0.0392
Val F1-Score: 0.9875


Epoch 8/100 - Training: 100%|██████████| 963/963 [02:22<00:00,  6.74it/s]
Epoch 8/100 - Validation: 100%|██████████| 121/121 [00:06<00:00, 17.52it/s]


Epoch 8/100
Train Loss: 0.0246
Val Loss: 0.0889
Val F1-Score: 0.9732


Epoch 9/100 - Training: 100%|██████████| 963/963 [02:22<00:00,  6.74it/s]
Epoch 9/100 - Validation: 100%|██████████| 121/121 [00:06<00:00, 17.79it/s]


Epoch 9/100
Train Loss: 0.0164
Val Loss: 0.0515
Val F1-Score: 0.9811


Epoch 10/100 - Training: 100%|██████████| 963/963 [02:21<00:00,  6.82it/s]
Epoch 10/100 - Validation: 100%|██████████| 121/121 [00:06<00:00, 18.08it/s]


Epoch 10/100
Train Loss: 0.0323
Val Loss: 0.0714
Val F1-Score: 0.9778


Epoch 11/100 - Training: 100%|██████████| 963/963 [02:22<00:00,  6.75it/s]
Epoch 11/100 - Validation: 100%|██████████| 121/121 [00:06<00:00, 17.39it/s]


Epoch 11/100
Train Loss: 0.0222
Val Loss: 0.0456
Val F1-Score: 0.9848


Epoch 12/100 - Training: 100%|██████████| 963/963 [02:22<00:00,  6.74it/s]
Epoch 12/100 - Validation: 100%|██████████| 121/121 [00:06<00:00, 17.68it/s]


Epoch 12/100
Train Loss: 0.0026
Val Loss: 0.0153
Val F1-Score: 0.9951


Epoch 13/100 - Training: 100%|██████████| 963/963 [02:23<00:00,  6.72it/s]
Epoch 13/100 - Validation: 100%|██████████| 121/121 [00:06<00:00, 17.84it/s]


Epoch 13/100
Train Loss: 0.0006
Val Loss: 0.0129
Val F1-Score: 0.9961


Epoch 14/100 - Training: 100%|██████████| 963/963 [02:21<00:00,  6.82it/s]
Epoch 14/100 - Validation: 100%|██████████| 121/121 [00:06<00:00, 17.59it/s]


Epoch 14/100
Train Loss: 0.0004
Val Loss: 0.0129
Val F1-Score: 0.9956


Epoch 15/100 - Training: 100%|██████████| 963/963 [02:23<00:00,  6.73it/s]
Epoch 15/100 - Validation: 100%|██████████| 121/121 [00:06<00:00, 17.57it/s]


Epoch 15/100
Train Loss: 0.0003
Val Loss: 0.0125
Val F1-Score: 0.9961


Epoch 16/100 - Training: 100%|██████████| 963/963 [02:22<00:00,  6.74it/s]
Epoch 16/100 - Validation: 100%|██████████| 121/121 [00:06<00:00, 18.01it/s]


Epoch 16/100
Train Loss: 0.0002
Val Loss: 0.0120
Val F1-Score: 0.9961


Epoch 17/100 - Training: 100%|██████████| 963/963 [02:21<00:00,  6.83it/s]
Epoch 17/100 - Validation: 100%|██████████| 121/121 [00:06<00:00, 17.77it/s]


Epoch 17/100
Train Loss: 0.0001
Val Loss: 0.0127
Val F1-Score: 0.9964


Epoch 18/100 - Training: 100%|██████████| 963/963 [02:23<00:00,  6.72it/s]
Epoch 18/100 - Validation: 100%|██████████| 121/121 [00:06<00:00, 17.96it/s]


Epoch 18/100
Train Loss: 0.0001
Val Loss: 0.0126
Val F1-Score: 0.9961


Epoch 19/100 - Training: 100%|██████████| 963/963 [02:23<00:00,  6.73it/s]
Epoch 19/100 - Validation: 100%|██████████| 121/121 [00:06<00:00, 17.73it/s]


Epoch 19/100
Train Loss: 0.0001
Val Loss: 0.0132
Val F1-Score: 0.9958


Epoch 20/100 - Training: 100%|██████████| 963/963 [02:22<00:00,  6.75it/s]
Epoch 20/100 - Validation: 100%|██████████| 121/121 [00:06<00:00, 17.40it/s]


Epoch 20/100
Train Loss: 0.0001
Val Loss: 0.0130
Val F1-Score: 0.9961


Epoch 21/100 - Training: 100%|██████████| 963/963 [02:23<00:00,  6.70it/s]
Epoch 21/100 - Validation: 100%|██████████| 121/121 [00:06<00:00, 17.96it/s]


Epoch 21/100
Train Loss: 0.0000
Val Loss: 0.0132
Val F1-Score: 0.9961


Epoch 22/100 - Training: 100%|██████████| 963/963 [02:23<00:00,  6.70it/s]
Epoch 22/100 - Validation: 100%|██████████| 121/121 [00:06<00:00, 17.31it/s]


Epoch 22/100
Train Loss: 0.0000
Val Loss: 0.0133
Val F1-Score: 0.9961


Epoch 23/100 - Training: 100%|██████████| 963/963 [02:23<00:00,  6.73it/s]
Epoch 23/100 - Validation: 100%|██████████| 121/121 [00:06<00:00, 17.45it/s]


Epoch 23/100
Train Loss: 0.0000
Val Loss: 0.0134
Val F1-Score: 0.9961


Epoch 24/100 - Training: 100%|██████████| 963/963 [02:23<00:00,  6.71it/s]
Epoch 24/100 - Validation: 100%|██████████| 121/121 [00:06<00:00, 17.37it/s]


Epoch 24/100
Train Loss: 0.0000
Val Loss: 0.0135
Val F1-Score: 0.9961


Epoch 25/100 - Training:  91%|█████████▏| 879/963 [02:11<00:12,  6.82it/s]

In [1]:
import torch
import torch.nn as nn
from torchvision import transforms
from torchvision.models import vit_b_16
from PIL import Image
import os
import csv
import pandas as pd
from tqdm import tqdm
from torchvision.models import swin_t, Swin_T_Weights

# Set up the device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Function to create the model (same as in the training script)
def create_model(num_classes):
    model = vit_b_16()
    
    # Replace the last layer of the classifier
    num_ftrs = model.heads[-1].in_features
    model.heads[-1] = nn.Linear(num_ftrs, num_classes)
    
    return model

# Create the model
num_classes = 42  # Make sure this matches the number of classes in your trained model
model = create_model(num_classes)

# Load the state dictionary
state_dict = torch.load('best_model_vit_h14.pth', map_location=device)
model.load_state_dict(state_dict)
model.to(device)
model.eval()

# Set up the image transform
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

# Function to predict the class of a single image
def predict_image(image_path, model, transform):
    image = Image.open(image_path).convert('RGB')
    image = transform(image).unsqueeze(0).to(device)
    with torch.no_grad():
        outputs = model(image)
        _, predicted = torch.max(outputs, 1)
    return predicted.item()

# Read the test.csv file
test_csv_path = 'test.csv'
df = pd.read_csv(test_csv_path)

# Ensure the first column contains image names
image_column = df.columns[0]
image_names = df[image_column].tolist()

# Set the path to the test folder containing the images
test_folder = 'Images'  # Replace with your test folder path if different

# Predict classes for all images and store results
results = []
for image_name in tqdm(image_names, desc="Predicting"):
    # Add .jpg extension to the image name
    image_file = image_name + '.jpg'
    image_path = os.path.join(test_folder, image_file)
    
    # Check if the image file exists
    if os.path.exists(image_path):
        predicted_class = predict_image(image_path, model, transform)
        results.append((image_name, predicted_class))
    else:
        print(f"Warning: Image {image_file} not found in {test_folder}")
        results.append((image_name, "Not Found"))

# Write results to CSV file
output_csv = 'predictions2.csv'
with open(output_csv, 'w', newline='') as csvfile:
    csv_writer = csv.writer(csvfile)
    csv_writer.writerow(['Image Name', 'Predicted Class'])  # Write header
    csv_writer.writerows(results)

print(f"Predictions completed. Results saved to {output_csv}")

RuntimeError: Error(s) in loading state_dict for SwinTransformer:
	Missing key(s) in state_dict: "features.0.0.weight", "features.0.0.bias", "features.0.2.weight", "features.0.2.bias", "features.1.0.norm1.weight", "features.1.0.norm1.bias", "features.1.0.attn.relative_position_bias_table", "features.1.0.attn.relative_position_index", "features.1.0.attn.qkv.weight", "features.1.0.attn.qkv.bias", "features.1.0.attn.proj.weight", "features.1.0.attn.proj.bias", "features.1.0.norm2.weight", "features.1.0.norm2.bias", "features.1.0.mlp.0.weight", "features.1.0.mlp.0.bias", "features.1.0.mlp.3.weight", "features.1.0.mlp.3.bias", "features.1.1.norm1.weight", "features.1.1.norm1.bias", "features.1.1.attn.relative_position_bias_table", "features.1.1.attn.relative_position_index", "features.1.1.attn.qkv.weight", "features.1.1.attn.qkv.bias", "features.1.1.attn.proj.weight", "features.1.1.attn.proj.bias", "features.1.1.norm2.weight", "features.1.1.norm2.bias", "features.1.1.mlp.0.weight", "features.1.1.mlp.0.bias", "features.1.1.mlp.3.weight", "features.1.1.mlp.3.bias", "features.2.reduction.weight", "features.2.norm.weight", "features.2.norm.bias", "features.3.0.norm1.weight", "features.3.0.norm1.bias", "features.3.0.attn.relative_position_bias_table", "features.3.0.attn.relative_position_index", "features.3.0.attn.qkv.weight", "features.3.0.attn.qkv.bias", "features.3.0.attn.proj.weight", "features.3.0.attn.proj.bias", "features.3.0.norm2.weight", "features.3.0.norm2.bias", "features.3.0.mlp.0.weight", "features.3.0.mlp.0.bias", "features.3.0.mlp.3.weight", "features.3.0.mlp.3.bias", "features.3.1.norm1.weight", "features.3.1.norm1.bias", "features.3.1.attn.relative_position_bias_table", "features.3.1.attn.relative_position_index", "features.3.1.attn.qkv.weight", "features.3.1.attn.qkv.bias", "features.3.1.attn.proj.weight", "features.3.1.attn.proj.bias", "features.3.1.norm2.weight", "features.3.1.norm2.bias", "features.3.1.mlp.0.weight", "features.3.1.mlp.0.bias", "features.3.1.mlp.3.weight", "features.3.1.mlp.3.bias", "features.4.reduction.weight", "features.4.norm.weight", "features.4.norm.bias", "features.5.0.norm1.weight", "features.5.0.norm1.bias", "features.5.0.attn.relative_position_bias_table", "features.5.0.attn.relative_position_index", "features.5.0.attn.qkv.weight", "features.5.0.attn.qkv.bias", "features.5.0.attn.proj.weight", "features.5.0.attn.proj.bias", "features.5.0.norm2.weight", "features.5.0.norm2.bias", "features.5.0.mlp.0.weight", "features.5.0.mlp.0.bias", "features.5.0.mlp.3.weight", "features.5.0.mlp.3.bias", "features.5.1.norm1.weight", "features.5.1.norm1.bias", "features.5.1.attn.relative_position_bias_table", "features.5.1.attn.relative_position_index", "features.5.1.attn.qkv.weight", "features.5.1.attn.qkv.bias", "features.5.1.attn.proj.weight", "features.5.1.attn.proj.bias", "features.5.1.norm2.weight", "features.5.1.norm2.bias", "features.5.1.mlp.0.weight", "features.5.1.mlp.0.bias", "features.5.1.mlp.3.weight", "features.5.1.mlp.3.bias", "features.5.2.norm1.weight", "features.5.2.norm1.bias", "features.5.2.attn.relative_position_bias_table", "features.5.2.attn.relative_position_index", "features.5.2.attn.qkv.weight", "features.5.2.attn.qkv.bias", "features.5.2.attn.proj.weight", "features.5.2.attn.proj.bias", "features.5.2.norm2.weight", "features.5.2.norm2.bias", "features.5.2.mlp.0.weight", "features.5.2.mlp.0.bias", "features.5.2.mlp.3.weight", "features.5.2.mlp.3.bias", "features.5.3.norm1.weight", "features.5.3.norm1.bias", "features.5.3.attn.relative_position_bias_table", "features.5.3.attn.relative_position_index", "features.5.3.attn.qkv.weight", "features.5.3.attn.qkv.bias", "features.5.3.attn.proj.weight", "features.5.3.attn.proj.bias", "features.5.3.norm2.weight", "features.5.3.norm2.bias", "features.5.3.mlp.0.weight", "features.5.3.mlp.0.bias", "features.5.3.mlp.3.weight", "features.5.3.mlp.3.bias", "features.5.4.norm1.weight", "features.5.4.norm1.bias", "features.5.4.attn.relative_position_bias_table", "features.5.4.attn.relative_position_index", "features.5.4.attn.qkv.weight", "features.5.4.attn.qkv.bias", "features.5.4.attn.proj.weight", "features.5.4.attn.proj.bias", "features.5.4.norm2.weight", "features.5.4.norm2.bias", "features.5.4.mlp.0.weight", "features.5.4.mlp.0.bias", "features.5.4.mlp.3.weight", "features.5.4.mlp.3.bias", "features.5.5.norm1.weight", "features.5.5.norm1.bias", "features.5.5.attn.relative_position_bias_table", "features.5.5.attn.relative_position_index", "features.5.5.attn.qkv.weight", "features.5.5.attn.qkv.bias", "features.5.5.attn.proj.weight", "features.5.5.attn.proj.bias", "features.5.5.norm2.weight", "features.5.5.norm2.bias", "features.5.5.mlp.0.weight", "features.5.5.mlp.0.bias", "features.5.5.mlp.3.weight", "features.5.5.mlp.3.bias", "features.6.reduction.weight", "features.6.norm.weight", "features.6.norm.bias", "features.7.0.norm1.weight", "features.7.0.norm1.bias", "features.7.0.attn.relative_position_bias_table", "features.7.0.attn.relative_position_index", "features.7.0.attn.qkv.weight", "features.7.0.attn.qkv.bias", "features.7.0.attn.proj.weight", "features.7.0.attn.proj.bias", "features.7.0.norm2.weight", "features.7.0.norm2.bias", "features.7.0.mlp.0.weight", "features.7.0.mlp.0.bias", "features.7.0.mlp.3.weight", "features.7.0.mlp.3.bias", "features.7.1.norm1.weight", "features.7.1.norm1.bias", "features.7.1.attn.relative_position_bias_table", "features.7.1.attn.relative_position_index", "features.7.1.attn.qkv.weight", "features.7.1.attn.qkv.bias", "features.7.1.attn.proj.weight", "features.7.1.attn.proj.bias", "features.7.1.norm2.weight", "features.7.1.norm2.bias", "features.7.1.mlp.0.weight", "features.7.1.mlp.0.bias", "features.7.1.mlp.3.weight", "features.7.1.mlp.3.bias", "norm.weight", "norm.bias", "head.weight", "head.bias". 
	Unexpected key(s) in state_dict: "class_token", "conv_proj.weight", "conv_proj.bias", "encoder.pos_embedding", "encoder.layers.encoder_layer_0.ln_1.weight", "encoder.layers.encoder_layer_0.ln_1.bias", "encoder.layers.encoder_layer_0.self_attention.in_proj_weight", "encoder.layers.encoder_layer_0.self_attention.in_proj_bias", "encoder.layers.encoder_layer_0.self_attention.out_proj.weight", "encoder.layers.encoder_layer_0.self_attention.out_proj.bias", "encoder.layers.encoder_layer_0.ln_2.weight", "encoder.layers.encoder_layer_0.ln_2.bias", "encoder.layers.encoder_layer_0.mlp.0.weight", "encoder.layers.encoder_layer_0.mlp.0.bias", "encoder.layers.encoder_layer_0.mlp.3.weight", "encoder.layers.encoder_layer_0.mlp.3.bias", "encoder.layers.encoder_layer_1.ln_1.weight", "encoder.layers.encoder_layer_1.ln_1.bias", "encoder.layers.encoder_layer_1.self_attention.in_proj_weight", "encoder.layers.encoder_layer_1.self_attention.in_proj_bias", "encoder.layers.encoder_layer_1.self_attention.out_proj.weight", "encoder.layers.encoder_layer_1.self_attention.out_proj.bias", "encoder.layers.encoder_layer_1.ln_2.weight", "encoder.layers.encoder_layer_1.ln_2.bias", "encoder.layers.encoder_layer_1.mlp.0.weight", "encoder.layers.encoder_layer_1.mlp.0.bias", "encoder.layers.encoder_layer_1.mlp.3.weight", "encoder.layers.encoder_layer_1.mlp.3.bias", "encoder.layers.encoder_layer_2.ln_1.weight", "encoder.layers.encoder_layer_2.ln_1.bias", "encoder.layers.encoder_layer_2.self_attention.in_proj_weight", "encoder.layers.encoder_layer_2.self_attention.in_proj_bias", "encoder.layers.encoder_layer_2.self_attention.out_proj.weight", "encoder.layers.encoder_layer_2.self_attention.out_proj.bias", "encoder.layers.encoder_layer_2.ln_2.weight", "encoder.layers.encoder_layer_2.ln_2.bias", "encoder.layers.encoder_layer_2.mlp.0.weight", "encoder.layers.encoder_layer_2.mlp.0.bias", "encoder.layers.encoder_layer_2.mlp.3.weight", "encoder.layers.encoder_layer_2.mlp.3.bias", "encoder.layers.encoder_layer_3.ln_1.weight", "encoder.layers.encoder_layer_3.ln_1.bias", "encoder.layers.encoder_layer_3.self_attention.in_proj_weight", "encoder.layers.encoder_layer_3.self_attention.in_proj_bias", "encoder.layers.encoder_layer_3.self_attention.out_proj.weight", "encoder.layers.encoder_layer_3.self_attention.out_proj.bias", "encoder.layers.encoder_layer_3.ln_2.weight", "encoder.layers.encoder_layer_3.ln_2.bias", "encoder.layers.encoder_layer_3.mlp.0.weight", "encoder.layers.encoder_layer_3.mlp.0.bias", "encoder.layers.encoder_layer_3.mlp.3.weight", "encoder.layers.encoder_layer_3.mlp.3.bias", "encoder.layers.encoder_layer_4.ln_1.weight", "encoder.layers.encoder_layer_4.ln_1.bias", "encoder.layers.encoder_layer_4.self_attention.in_proj_weight", "encoder.layers.encoder_layer_4.self_attention.in_proj_bias", "encoder.layers.encoder_layer_4.self_attention.out_proj.weight", "encoder.layers.encoder_layer_4.self_attention.out_proj.bias", "encoder.layers.encoder_layer_4.ln_2.weight", "encoder.layers.encoder_layer_4.ln_2.bias", "encoder.layers.encoder_layer_4.mlp.0.weight", "encoder.layers.encoder_layer_4.mlp.0.bias", "encoder.layers.encoder_layer_4.mlp.3.weight", "encoder.layers.encoder_layer_4.mlp.3.bias", "encoder.layers.encoder_layer_5.ln_1.weight", "encoder.layers.encoder_layer_5.ln_1.bias", "encoder.layers.encoder_layer_5.self_attention.in_proj_weight", "encoder.layers.encoder_layer_5.self_attention.in_proj_bias", "encoder.layers.encoder_layer_5.self_attention.out_proj.weight", "encoder.layers.encoder_layer_5.self_attention.out_proj.bias", "encoder.layers.encoder_layer_5.ln_2.weight", "encoder.layers.encoder_layer_5.ln_2.bias", "encoder.layers.encoder_layer_5.mlp.0.weight", "encoder.layers.encoder_layer_5.mlp.0.bias", "encoder.layers.encoder_layer_5.mlp.3.weight", "encoder.layers.encoder_layer_5.mlp.3.bias", "encoder.layers.encoder_layer_6.ln_1.weight", "encoder.layers.encoder_layer_6.ln_1.bias", "encoder.layers.encoder_layer_6.self_attention.in_proj_weight", "encoder.layers.encoder_layer_6.self_attention.in_proj_bias", "encoder.layers.encoder_layer_6.self_attention.out_proj.weight", "encoder.layers.encoder_layer_6.self_attention.out_proj.bias", "encoder.layers.encoder_layer_6.ln_2.weight", "encoder.layers.encoder_layer_6.ln_2.bias", "encoder.layers.encoder_layer_6.mlp.0.weight", "encoder.layers.encoder_layer_6.mlp.0.bias", "encoder.layers.encoder_layer_6.mlp.3.weight", "encoder.layers.encoder_layer_6.mlp.3.bias", "encoder.layers.encoder_layer_7.ln_1.weight", "encoder.layers.encoder_layer_7.ln_1.bias", "encoder.layers.encoder_layer_7.self_attention.in_proj_weight", "encoder.layers.encoder_layer_7.self_attention.in_proj_bias", "encoder.layers.encoder_layer_7.self_attention.out_proj.weight", "encoder.layers.encoder_layer_7.self_attention.out_proj.bias", "encoder.layers.encoder_layer_7.ln_2.weight", "encoder.layers.encoder_layer_7.ln_2.bias", "encoder.layers.encoder_layer_7.mlp.0.weight", "encoder.layers.encoder_layer_7.mlp.0.bias", "encoder.layers.encoder_layer_7.mlp.3.weight", "encoder.layers.encoder_layer_7.mlp.3.bias", "encoder.layers.encoder_layer_8.ln_1.weight", "encoder.layers.encoder_layer_8.ln_1.bias", "encoder.layers.encoder_layer_8.self_attention.in_proj_weight", "encoder.layers.encoder_layer_8.self_attention.in_proj_bias", "encoder.layers.encoder_layer_8.self_attention.out_proj.weight", "encoder.layers.encoder_layer_8.self_attention.out_proj.bias", "encoder.layers.encoder_layer_8.ln_2.weight", "encoder.layers.encoder_layer_8.ln_2.bias", "encoder.layers.encoder_layer_8.mlp.0.weight", "encoder.layers.encoder_layer_8.mlp.0.bias", "encoder.layers.encoder_layer_8.mlp.3.weight", "encoder.layers.encoder_layer_8.mlp.3.bias", "encoder.layers.encoder_layer_9.ln_1.weight", "encoder.layers.encoder_layer_9.ln_1.bias", "encoder.layers.encoder_layer_9.self_attention.in_proj_weight", "encoder.layers.encoder_layer_9.self_attention.in_proj_bias", "encoder.layers.encoder_layer_9.self_attention.out_proj.weight", "encoder.layers.encoder_layer_9.self_attention.out_proj.bias", "encoder.layers.encoder_layer_9.ln_2.weight", "encoder.layers.encoder_layer_9.ln_2.bias", "encoder.layers.encoder_layer_9.mlp.0.weight", "encoder.layers.encoder_layer_9.mlp.0.bias", "encoder.layers.encoder_layer_9.mlp.3.weight", "encoder.layers.encoder_layer_9.mlp.3.bias", "encoder.layers.encoder_layer_10.ln_1.weight", "encoder.layers.encoder_layer_10.ln_1.bias", "encoder.layers.encoder_layer_10.self_attention.in_proj_weight", "encoder.layers.encoder_layer_10.self_attention.in_proj_bias", "encoder.layers.encoder_layer_10.self_attention.out_proj.weight", "encoder.layers.encoder_layer_10.self_attention.out_proj.bias", "encoder.layers.encoder_layer_10.ln_2.weight", "encoder.layers.encoder_layer_10.ln_2.bias", "encoder.layers.encoder_layer_10.mlp.0.weight", "encoder.layers.encoder_layer_10.mlp.0.bias", "encoder.layers.encoder_layer_10.mlp.3.weight", "encoder.layers.encoder_layer_10.mlp.3.bias", "encoder.layers.encoder_layer_11.ln_1.weight", "encoder.layers.encoder_layer_11.ln_1.bias", "encoder.layers.encoder_layer_11.self_attention.in_proj_weight", "encoder.layers.encoder_layer_11.self_attention.in_proj_bias", "encoder.layers.encoder_layer_11.self_attention.out_proj.weight", "encoder.layers.encoder_layer_11.self_attention.out_proj.bias", "encoder.layers.encoder_layer_11.ln_2.weight", "encoder.layers.encoder_layer_11.ln_2.bias", "encoder.layers.encoder_layer_11.mlp.0.weight", "encoder.layers.encoder_layer_11.mlp.0.bias", "encoder.layers.encoder_layer_11.mlp.3.weight", "encoder.layers.encoder_layer_11.mlp.3.bias", "encoder.ln.weight", "encoder.ln.bias", "heads.head.weight", "heads.head.bias". 