In [1]:
!pip install timm

Collecting timm
  Obtaining dependency information for timm from https://files.pythonhosted.org/packages/68/99/2018622d268f6017ddfa5ee71f070bad5d07590374793166baa102849d17/timm-0.9.16-py3-none-any.whl.metadata
  Downloading timm-0.9.16-py3-none-any.whl.metadata (38 kB)
Collecting torch (from timm)
  Obtaining dependency information for torch from https://files.pythonhosted.org/packages/96/23/18b9c16c18a77755e7f15173821c7100f11e6b3b7717bea8d729bdeb92c0/torch-2.2.2-cp311-none-macosx_11_0_arm64.whl.metadata
  Downloading torch-2.2.2-cp311-none-macosx_11_0_arm64.whl.metadata (25 kB)
Collecting torchvision (from timm)
  Obtaining dependency information for torchvision from https://files.pythonhosted.org/packages/36/15/c48f74f8f8d382677ef016b65f09969028a1549b8a518c18894deb95b544/torchvision-0.17.2-cp311-cp311-macosx_11_0_arm64.whl.metadata
  Downloading torchvision-0.17.2-cp311-cp311-macosx_11_0_arm64.whl.metadata (6.6 kB)
Collecting typing-extensions>=3.7.4.3 (from huggingface_hub->timm)
  

In [2]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import cv2

import timm
import numpy as np

from PIL import Image, ImageFilter
import io
import random
import numpy.random as npr
from skimage import data
from scipy.ndimage import rotate
import torchvision
import os
from torchvision.transforms.functional import to_pil_image
from torch.utils.data import Dataset, DataLoader, Subset
from torchvision.datasets import ImageFolder
import torchvision.transforms as transforms
from transformers import Swinv2ForImageClassification, SwinConfig
from torch.optim import AdamW
from torchvision import transforms, datasets
from preprocessing import smash_n_reconstruct, apply_high_pass_filter



ModuleNotFoundError: No module named 'cv2'

In [6]:
from torchvision.transforms import Compose, Resize, ToTensor, Normalize

# Define the image transformations
transformations = Compose([
    Resize((224, 224)),  # EfficientNet typically uses 224x224 inputs
    ToTensor(),
    Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

def preprocess_image(image):
    # Convert image to PIL if it's not already
    if not isinstance(image, Image.Image):
        image = Image.fromarray(image)

    # Apply smash and reconstruct and high pass filter
    rich, poor = smash_n_reconstruct(image)
    rich = apply_high_pass_filter(rich)
    poor = apply_high_pass_filter(poor)

    # Apply the resize, to tensor, and normalization transforms
    rich = transformations(rich)
    poor = transformations(poor)

    return rich, poor


Preprocessing and Data loader


In [8]:

def preprocess_image(image):
    rich, poor = smash_n_reconstruct(image)
    rich = apply_high_pass_filter(rich)
    poor = apply_high_pass_filter(poor)
    return rich, poor


class DatasetAI(Dataset):
    def __init__(self, root_dir, transform=None, split='train'):
        self.root_dir = root_dir
        self.transform = transform
        self.split = split  # This can be 'train', 'val', or 'test'
        self.samples = []
        self.label_count = {'ai': 0, 'nature': 0}

        for model in sorted(os.listdir(root_dir)):
            model_path = os.path.join(root_dir, model)
            if os.path.isdir(model_path):
                # Depending on the split, choose the appropriate subdirectory
                split_folder = 'train' if split == 'train' else 'val'
                data_dir = os.path.join(model_path, f'imagenet_{model.split("_")[0]}', split_folder)
                for class_label in ['ai', 'nature']:
                    class_path = os.path.join(data_dir, class_label)
                    if os.path.exists(class_path):
                        for img_name in os.listdir(class_path):
                            if img_name.lower().endswith(('.png', '.jpg', '.jpeg')):
                                img_path = os.path.join(class_path, img_name)
                                self.samples.append((img_path, class_label))
                                self.label_count[class_label] += 1

    def __len__(self):
        return len(self.samples)

    def __getitem__(self, idx):
        img_path, class_label = self.samples[idx]
        image = Image.open(img_path).convert('RGB')
        rich, poor = preprocess_image(image)  # Assume this function is defined elsewhere
        if self.transform:
            rich = self.transform(rich)
            poor = self.transform(poor)
        label = 0 if class_label == 'ai' else 1
        return rich, poor, label

def split_val_test_train(dataset_test_valid, dataset_train, train_size, val_size, test_size, seed=42):
    rng = npr.default_rng(seed)
    total_size_test_valid = len(dataset_test_valid)
    total_size_train = len(dataset_train)

    indices_test_valid = np.arange(total_size_test_valid)
    indices_train = np.arange(total_size_train)

    rng.shuffle(indices_test_valid)
    rng.shuffle(indices_train)

    if val_size + test_size > total_size_test_valid:
        raise ValueError("Requested sizes for validation and test exceed available data")
    if train_size > total_size_train:
        raise ValueError("Requested size for train exceeds available data")

    val_indices = indices_test_valid[:val_size]
    test_indices = indices_test_valid[val_size:val_size + test_size]
    train_indices = indices_train[:train_size]

    val_subset = Subset(dataset_test_valid, val_indices)
    test_subset = Subset(dataset_test_valid, test_indices)
    train_subset = Subset(dataset_train, train_indices)

    return train_subset, val_subset, test_subset



transform = transforms.Compose([
    Resize((224, 224)),  # EfficientNet typically 224x224 inputs
    ToTensor(),
    Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

# # Create dataset instances
# train_dataset = DatasetAI(root_dir='/mnt/d/GenImage', transform=transform, split='train')
# val_test_dataset = DatasetAI(root_dir='/mnt/d/GenImage', transform=transform, split='val')


# val_dataset, test_dataset ,train_dataset = split_val_test_train(val_test_dataset, train_dataset, 1000, 200, 200)

# # Create DataLoader for each dataset
# train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True, num_workers=4)
# val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False, num_workers=4)
# test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False, num_workers=4)



CNN Block

In [9]:

class CNNBlock(nn.Module):
   def __init__(self, num_input_channels):
       super(CNNBlock, self).__init__()
       self.conv = nn.Conv2d(num_input_channels, 3, kernel_size=3, padding=1)
       self.bn = nn.BatchNorm2d(3)
       self.relu = nn.ReLU()
   def forward(self, x):
       x = self.conv(x)
       x = self.bn(x)
       x = self.relu(x)

       return x

Model

In [14]:
class ImageClassificationModel_EfficientNet(nn.Module):
    def __init__(self, num_classes=2):
        super(ImageClassificationModel_EfficientNet, self).__init__()
        self.feature_combiner = CNNBlock(num_input_channels=3)
        self.feature_combiner2 = CNNBlock(num_input_channels=3)

        # Initialize EfficientNet with the desired number of output classes
        self.efficientnet = timm.create_model('efficientnet_b0', pretrained=True, num_classes=num_classes)

        # Replace the classifier with a dummy since we have our own classifier
        self.efficientnet.classifier = nn.Identity()

        # Custom classifier that will take the output from EfficientNet
        self.classifier = nn.Linear(self.efficientnet.get_classifier().in_features, num_classes)

    def forward(self, rich, poor):
        # Combine features from 'rich' and 'poor' textures using the CNN blocks
        rich_features = self.feature_combiner(rich)
        poor_features = self.feature_combiner2(poor)

        # Calculate the feature difference
        feature_difference = rich_features - poor_features

        # Flatten the feature difference
        feature_difference = feature_difference.view(feature_difference.size(0), -1)

        # Pass the feature difference through EfficientNet to get the features
        features = self.efficientnet(feature_difference)

        # Pass the features through the custom classifier to get the logits
        logits = self.classifier(features)
        return logits

In [15]:


device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = ImageClassificationModel_EfficientNet().to(device)
criterion = nn.CrossEntropyLoss()
optimizer = AdamW([
    {'params': model.feature_combiner.parameters(), 'lr': 1e-5},
    {'params': model.feature_combiner2.parameters(), 'lr': 1e-5},
    {'params': model.efficientnet.parameters(), 'lr': 1e-5},  # To fine-tune EfficientNet
    {'params': model.classifier.parameters(), 'lr': 1e-4},
])
#freeze the transformer

# Initialize the best_val_accuracy variable
best_val_accuracy = 0.0
best_model_path = 'best_model.pth'

# Try to load previous best model and its best validation accuracy
try:
    checkpoint = torch.load(best_model_path)
    model.load_state_dict(checkpoint['model_state'])
    best_val_accuracy = checkpoint['best_val_accuracy']
    print("Loaded previous best model with accuracy:", best_val_accuracy)
except FileNotFoundError:
    best_val_accuracy = float('-inf')
    print("No saved model found. Starting fresh!")

def train_and_validate(model, train_loader, valid_loader, optimizer, device, num_epochs, best_val_accuracy):
    for epoch in range(num_epochs):
        # Training Phase
        model.train()
        total_train_loss, total_train, correct_train = 0, 0, 0
        for batch in train_loader:
            rich, poor, labels = batch
            rich = rich.to(device)
            poor = poor.to(device)
            labels = labels.to(device)

            optimizer.zero_grad()
            outputs = model(rich, poor)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            total_train_loss += loss.item() * labels.size(0)
            _, predicted = torch.max(outputs, 1)
            total_train += labels.size(0)
            correct_train += (predicted == labels).sum().item()

        train_loss = total_train_loss / total_train
        train_accuracy = correct_train / total_train

        # Validation Phase
        model.eval()
        total_val_loss, total_val, correct_val = 0, 0, 0
        with torch.no_grad():
            for rich, poor, labels in valid_loader:
                rich = rich.to(device)
                poor = poor.to(device)
                labels = labels.to(device)

                outputs = model(rich, poor)
                loss = criterion(outputs, labels)

                total_val_loss += loss.item() * labels.size(0)
                _, predicted = torch.max(outputs, 1)
                total_val += labels.size(0)
                correct_val += (predicted == labels).sum().item()

        val_loss = total_val_loss / total_val
        val_accuracy = correct_val / total_val

        print(f'Epoch {epoch+1}/{num_epochs}, '
              f'Train Loss: {train_loss:.4f}, Train Accuracy: {train_accuracy:.4f}, '
              f'Validation Loss: {val_loss:.4f}, Validation Accuracy: {val_accuracy:.4f}')

        # Update the best model if current model is better
        if val_accuracy > best_val_accuracy:
            best_val_accuracy = val_accuracy
            torch.save({'model_state': model.state_dict(),
                        'best_val_accuracy': best_val_accuracy},
                       best_model_path)
            print(f"Saved new best model with accuracy: {best_val_accuracy:.4f}")


#train_and_validate(model, train_loader, val_loader, optimizer, device, num_epochs=10, best_val_accuracy=best_val_accuracy)


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


model.safetensors:   0%|          | 0.00/21.4M [00:00<?, ?B/s]

AttributeError: 'Identity' object has no attribute 'in_features'

Test

In [17]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = ImageClassificationModel_EfficientNet().to(device)
def test(model, test_loader, device):
    #load the best model
    checkpoint = torch.load("best_model.pth")
    model.load_state_dict(checkpoint['model_state'])

    model.eval()
    total_test, correct_test = 0, 0
    with torch.no_grad():
        for rich, poor, labels in test_loader:
            rich = rich.to(device)
            poor = poor.to(device)
            labels = labels.to(device)

            outputs = model(rich, poor)
            _, predicted = torch.max(outputs, 1)
            total_test += labels.size(0)
            correct_test += (predicted == labels).sum().item()

    test_accuracy = correct_test / total_test
    print(f'Test Accuracy: {test_accuracy:.4f}')
#test(model, test_loader, device)

AttributeError: 'Identity' object has no attribute 'in_features'