In [27]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import math
from torch.utils.data import DataLoader, random_split
from torchvision import transforms, datasets
from PIL import Image

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [None]:
def make_divisible(value, divisor, min_value=None, round_down_protect=True):
    if min_value is None:
        min_value = divisor
    new_value = max(min_value, int(value + divisor / 2) // divisor * divisor)
    if round_down_protect and new_value < 0.9 * value:
        new_value += divisor
    return new_value

In [None]:
import kagglehub

# Download latest version
path = kagglehub.dataset_download("paultimothymooney/breast-histopathology-images")

print("Path to dataset files:", path)

Downloading from https://www.kaggle.com/api/v1/datasets/download/paultimothymooney/breast-histopathology-images?dataset_version_number=1...


100%|██████████| 3.10G/3.10G [01:34<00:00, 35.3MB/s]

Extracting files...





Path to dataset files: /root/.cache/kagglehub/datasets/paultimothymooney/breast-histopathology-images/versions/1


In [None]:
from google.colab import files
files.upload()


In [None]:
!mkdir -p ~/.kaggle
!cp kaggle.json ~/.kaggle/
!chmod 600 ~/.kaggle/kaggle.json

In [None]:
!kaggle datasets download -d paultimothymooney/breast-histopathology-images

In [None]:
!mkdir -p /content/drive/MyDrive/Histo

In [None]:
!unzip /content/breast-histopathology-images.zip -d /content/Histo

In [None]:
import os
import shutil

# Source directory containing the subfolders
source_dir = '/content/Histo'

# Destination directory where you want to merge the folders
destination_dir = '/content/Histopathology'

# Create the destination directories for 0 and 1 if they don't exist
os.makedirs(os.path.join(destination_dir, '0'), exist_ok=True)
os.makedirs(os.path.join(destination_dir, '1'), exist_ok=True)

# Iterate through all subdirectories
for subfolder in os.listdir(source_dir):
    subfolder_path = os.path.join(source_dir, subfolder)
    if os.path.isdir(subfolder_path):  # Check if it's a directory
        for label in ['0', '1']:
            label_path = os.path.join(subfolder_path, label)
            if os.path.exists(label_path):
                # Move all files from the subfolder/label to destination_dir/label
                for file in os.listdir(label_path):
                    file_path = os.path.join(label_path, file)
                    dest_path = os.path.join(destination_dir, label, file)
                    shutil.move(file_path, dest_path)

print("Folders merged successfully!")


In [None]:
import torch
import torch.nn as nn
import math


__all__ = ['mobilenetv4_conv_small', 'mobilenetv4_conv_medium', 'mobilenetv4_conv_large',
           'mobilenetv4_hybrid_medium', 'mobilenetv4_hybrid_large']


def make_divisible(value, divisor, min_value=None, round_down_protect=True):
    if min_value is None:
        min_value = divisor
    new_value = max(min_value, int(value + divisor / 2) // divisor * divisor)
    # Make sure that round down does not go down by more than 10%.
    if round_down_protect and new_value < 0.9 * value:
        new_value += divisor
    return new_value


class ConvBN(nn.Module):
    def __init__(self, in_channels, out_channels, kernel_size, stride=1):
        super(ConvBN, self).__init__()
        self.block = nn.Sequential(
            nn.Conv2d(in_channels, out_channels, kernel_size, stride, (kernel_size - 1)//2, bias=False),
            nn.BatchNorm2d(out_channels),
            nn.ReLU(inplace=True),
        )

    def forward(self, x):
        return self.block(x)


class UniversalInvertedBottleneck(nn.Module):
    def __init__(self,
                 in_channels,
                 out_channels,
                 expand_ratio,
                 start_dw_kernel_size,
                 middle_dw_kernel_size,
                 stride,
                 middle_dw_downsample: bool = True,
                 use_layer_scale: bool = False,
                 layer_scale_init_value: float = 1e-5):
        super(UniversalInvertedBottleneck, self).__init__()
        self.start_dw_kernel_size = start_dw_kernel_size
        self.middle_dw_kernel_size = middle_dw_kernel_size

        if start_dw_kernel_size:
           self.start_dw_conv = nn.Conv2d(in_channels, in_channels, start_dw_kernel_size,
                                          stride if not middle_dw_downsample else 1,
                                          (start_dw_kernel_size - 1) // 2,
                                          groups=in_channels, bias=False)
           self.start_dw_norm = nn.BatchNorm2d(in_channels)

        expand_channels = make_divisible(in_channels * expand_ratio, 8)
        self.expand_conv = nn.Conv2d(in_channels, expand_channels, 1, 1, bias=False)
        self.expand_norm = nn.BatchNorm2d(expand_channels)
        self.expand_act = nn.ReLU(inplace=True)

        if middle_dw_kernel_size:
           self.middle_dw_conv = nn.Conv2d(expand_channels, expand_channels, middle_dw_kernel_size,
                                           stride if middle_dw_downsample else 1,
                                           (middle_dw_kernel_size - 1) // 2,
                                           groups=expand_channels, bias=False)
           self.middle_dw_norm = nn.BatchNorm2d(expand_channels)
           self.middle_dw_act = nn.ReLU(inplace=True)

        self.proj_conv = nn.Conv2d(expand_channels, out_channels, 1, 1, bias=False)
        self.proj_norm = nn.BatchNorm2d(out_channels)

        if use_layer_scale:
            self.gamma = nn.Parameter(layer_scale_init_value * torch.ones((out_channels)), requires_grad=True)

        self.use_layer_scale = use_layer_scale
        self.identity = stride == 1 and in_channels == out_channels

    def forward(self, x):
        shortcut = x

        if self.start_dw_kernel_size:
            x = self.start_dw_conv(x)
            x = self.start_dw_norm(x)

        x = self.expand_conv(x)
        x = self.expand_norm(x)
        x = self.expand_act(x)

        if self.middle_dw_kernel_size:
            x = self.middle_dw_conv(x)
            x = self.middle_dw_norm(x)
            x = self.middle_dw_act(x)

        x = self.proj_conv(x)
        x = self.proj_norm(x)

        if self.use_layer_scale:
            x = self.gamma * x

        return x + shortcut if self.identity else x


class MobileNetV4(nn.Module):
    def __init__(self, block_specs, num_classes=1000):
        super(MobileNetV4, self).__init__()

        c = 3
        layers = []
        for block_type, *block_cfg in block_specs:
            if block_type == 'conv_bn':
                block = ConvBN
                k, s, f = block_cfg
                layers.append(block(c, f, k, s))
            elif block_type == 'uib':
                block = UniversalInvertedBottleneck
                start_k, middle_k, s, f, e = block_cfg
                layers.append(block(c, f, e, start_k, middle_k, s))
            else:
                raise NotImplementedError
            c = f
        self.features = nn.Sequential(*layers)
        # building last several layers
        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
        hidden_channels = 1280
        self.conv = ConvBN(c, hidden_channels, 1)
        self.classifier = nn.Linear(hidden_channels, num_classes)

        self._initialize_weights()

    def forward(self, x):
        x = self.features(x)
        x = self.avgpool(x)
        x = self.conv(x)
        x = x.view(x.size(0), -1)
        x = self.classifier(x)
        return x

    def _initialize_weights(self):
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
                m.weight.data.normal_(0, math.sqrt(2. / n))
                if m.bias is not None:
                    m.bias.data.zero_()
            elif isinstance(m, nn.BatchNorm2d):
                m.weight.data.fill_(1)
                m.bias.data.zero_()
            elif isinstance(m, nn.Linear):
                m.weight.data.normal_(0, 0.01)
                m.bias.data.zero_()


def mobilenetv4_conv_small(**kwargs):
    """
    Constructs a MobileNetV4-Conv-Small model
    """
    block_specs = [
        # conv_bn, kernel_size, stride, out_channels
        # uib, start_dw_kernel_size, middle_dw_kernel_size, stride, out_channels, expand_ratio
        # 112px
        ('conv_bn', 3, 2, 32),
        # 56px
        ('conv_bn', 3, 2, 32),
        ('conv_bn', 1, 1, 32),
        # 28px
        ('conv_bn', 3, 2, 96),
        ('conv_bn', 1, 1, 64),
        # 14px
        ('uib', 5, 5, 2, 96, 3.0),  # ExtraDW
        ('uib', 0, 3, 1, 96, 2.0),  # IB
        ('uib', 0, 3, 1, 96, 2.0),  # IB
        ('uib', 0, 3, 1, 96, 2.0),  # IB
        ('uib', 0, 3, 1, 96, 2.0),  # IB
        ('uib', 3, 0, 1, 96, 4.0),  # ConvNext
        # 7px
        ('uib', 3, 3, 2, 128, 6.0),  # ExtraDW
        ('uib', 5, 5, 1, 128, 4.0),  # ExtraDW
        ('uib', 0, 5, 1, 128, 4.0),  # IB
        ('uib', 0, 5, 1, 128, 3.0),  # IB
        ('uib', 0, 3, 1, 128, 4.0),  # IB
        ('uib', 0, 3, 1, 128, 4.0),  # IB
        ('conv_bn', 1, 1, 960),  # Conv
    ]
    return MobileNetV4(block_specs, **kwargs)


def mobilenetv4_conv_medium(**kwargs):
    """
    Constructs a MobileNetV4-Conv-Medium model
    """
    block_specs = [
        ('conv_bn', 3, 2, 32),
        ('conv_bn', 3, 2, 128),
        ('conv_bn', 1, 1, 48),
        # 3rd stage
        ('uib', 3, 5, 2, 80, 4.0),
        ('uib', 3, 3, 1, 80, 2.0),
        # 4th stage
        ('uib', 3, 5, 2, 160, 6.0),
        ('uib', 3, 3, 1, 160, 4.0),
        ('uib', 3, 3, 1, 160, 4.0),
        ('uib', 3, 5, 1, 160, 4.0),
        ('uib', 3, 3, 1, 160, 4.0),
        ('uib', 3, 0, 1, 160, 4.0),
        ('uib', 0, 0, 1, 160, 2.0),
        ('uib', 3, 0, 1, 160, 4.0),
        # 5th stage
        ('uib', 5, 5, 2, 256, 6.0),
        ('uib', 5, 5, 1, 256, 4.0),
        ('uib', 3, 5, 1, 256, 4.0),
        ('uib', 3, 5, 1, 256, 4.0),
        ('uib', 0, 0, 1, 256, 4.0),
        ('uib', 3, 0, 1, 256, 4.0),
        ('uib', 3, 5, 1, 256, 2.0),
        ('uib', 5, 5, 1, 256, 4.0),
        ('uib', 0, 0, 1, 256, 4.0),
        ('uib', 0, 0, 1, 256, 4.0),
        ('uib', 5, 0, 1, 256, 2.0),
        # FC layers
        ('conv_bn', 1, 1, 960),
    ]

    return MobileNetV4(block_specs, **kwargs)


def mobilenetv4_conv_large(**kwargs):
    """
    Constructs a MobileNetV4-Conv-Large model
    """
    block_specs = [
        ('conv_bn', 3, 2, 24),
        ('conv_bn', 3, 2, 96),
        ('conv_bn', 1, 1, 48),
        ('uib', 3, 5, 2, 96, 4.0),
        ('uib', 3, 3, 1, 96, 4.0),
        ('uib', 3, 5, 2, 192, 4.0),
        ('uib', 3, 3, 1, 192, 4.0),
        ('uib', 3, 3, 1, 192, 4.0),
        ('uib', 3, 3, 1, 192, 4.0),
        ('uib', 3, 5, 1, 192, 4.0),
        ('uib', 5, 3, 1, 192, 4.0),
        ('uib', 5, 3, 1, 192, 4.0),
        ('uib', 5, 3, 1, 192, 4.0),
        ('uib', 5, 3, 1, 192, 4.0),
        ('uib', 5, 3, 1, 192, 4.0),
        ('uib', 3, 0, 1, 192, 4.0),
        ('uib', 5, 5, 2, 512, 4.0),
        ('uib', 5, 5, 1, 512, 4.0),
        ('uib', 5, 5, 1, 512, 4.0),
        ('uib', 5, 5, 1, 512, 4.0),
        ('uib', 5, 0, 1, 512, 4.0),
        ('uib', 5, 3, 1, 512, 4.0),
        ('uib', 5, 0, 1, 512, 4.0),
        ('uib', 5, 0, 1, 512, 4.0),
        ('uib', 5, 3, 1, 512, 4.0),
        ('uib', 5, 5, 1, 512, 4.0),
        ('uib', 5, 0, 1, 512, 4.0),
        ('uib', 5, 0, 1, 512, 4.0),
        ('uib', 5, 0, 1, 512, 4.0),
        ('conv_bn', 1, 1, 960),
    ]

    return MobileNetV4(block_specs, **kwargs)

In [None]:
# data_transforms = {
#     'train': transforms.Compose([
#         transforms.Resize((224, 224)),
#         transforms.RandomHorizontalFlip(),
#         transforms.ToTensor(),
#         transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
#     ]),
#     'val': transforms.Compose([
#         transforms.Resize((224, 224)),
#         transforms.ToTensor(),
#         transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
#     ]),
# }

# train_dataset = datasets.ImageFolder(root='/content/Histopathology', transform=data_transforms['train'])
# train_size = int(0.8 * len(train_dataset))
# val_size = len(train_dataset) - train_size
# train_dataset, val_dataset = random_split(train_dataset, [train_size, val_size])

from torch.utils.data import random_split, DataLoader
from torchvision import datasets, transforms

# Define data transformations
data_transforms = {
    'train': transforms.Compose([
        transforms.Resize((128, 128)),
        transforms.RandomHorizontalFlip(p=0.5),
        transforms.RandomRotation(degrees=(-15, 15)),
        transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1),
        transforms.RandomAffine(degrees=5, shear=(-5, 5)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
    ]),
    'val': transforms.Compose([
        transforms.Resize((144, 144)),
        transforms.CenterCrop((128, 128)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
    ])
}

# Load dataset without transformations
dataset = datasets.ImageFolder(root='/content/Histopathology')

# Split dataset
train_size = int(0.8 * len(dataset))
val_size = len(dataset) - train_size
train_dataset, val_dataset = random_split(dataset, [train_size, val_size])

# Apply transformations
train_dataset.dataset.transform = data_transforms['train']
val_dataset.dataset.transform = data_transforms['val']

# DataLoaders
train_loader = DataLoader(train_dataset, batch_size=8, shuffle=True, num_workers=4)
val_loader = DataLoader(val_dataset, batch_size=8, shuffle=False, num_workers=4)


In [None]:
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)

In [None]:
model = mobilenetv4_conv_small(num_classes=2).to(device)

In [None]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [None]:
def train_model(model, train_loader, val_loader, criterion, optimizer, epochs):
    for epoch in range(epochs):
        model.train()
        running_loss = 0.0
        correct_predictions = 0

        for images, labels in train_loader:
            images, labels = images.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            running_loss += loss.item() * images.size(0)
            _, preds = torch.max(outputs, 1)
            correct_predictions += torch.sum(preds == labels.data)

        epoch_loss = running_loss / len(train_loader.dataset)
        epoch_acc = correct_predictions.double() / len(train_loader.dataset)
        print(f"Epoch {epoch + 1}/{epochs}, Training Loss: {epoch_loss:.4f}, Training Accuracy: {epoch_acc:.4f}")

        # Validation Phase
        model.eval()
        val_loss = 0.0
        val_correct_predictions = 0
        with torch.no_grad():
            for images, labels in val_loader:
                images, labels = images.to(device), labels.to(device)
                outputs = model(images)
                loss = criterion(outputs, labels)
                val_loss += loss.item() * images.size(0)
                _, preds = torch.max(outputs, 1)
                val_correct_predictions += torch.sum(preds == labels.data)

        val_epoch_loss = val_loss / len(val_loader.dataset)
        val_epoch_acc = val_correct_predictions.double() / len(val_loader.dataset)
        print(f"Validation Loss: {val_epoch_loss:.4f}, Validation Accuracy: {val_epoch_acc:.4f}")

In [None]:
train_model(model, train_loader, val_loader, criterion, optimizer, epochs=50)

Epoch 1/50, Training Loss: 0.3502, Training Accuracy: 0.8503
Validation Loss: 0.3185, Validation Accuracy: 0.8644


In [None]:
image_path = '/content/drive/MyDrive/dataset/1/10254_idx5_x1651_y1201_class1.png'
img = Image.open(image_path)

# Apply transformations to the image
img_transformed = data_transforms['val'](img)
img_transformed = img_transformed.unsqueeze(0)  # Add batch dimension
img_transformed = img_transformed.to(device)

# Make prediction
model.eval()
with torch.no_grad():
    output = model(img_transformed)
    _, predicted = torch.max(output, 1)
    print(f'Predicted class: {predicted.item()}')

In [None]:
import matplotlib.pyplot as plt

# Training and validation accuracy values from the provided data
training_accuracy = [
    0.8659, 0.8777, 0.8876, 0.8957, 0.8890, 0.9093, 0.9108, 0.9023, 0.9259,
    0.9013, 0.9249, 0.9297, 0.9268, 0.9396, 0.9410, 0.9514, 0.9320, 0.9490,
    0.9585, 0.9533, 0.9570, 0.9622, 0.9688, 0.9769, 0.9539, 0.9660, 0.9717,
    0.9674, 0.9769, 0.9660
]
validation_accuracy = [
    0.8675, 0.8604, 0.8472, 0.8555, 0.8472, 0.8594, 0.8830, 0.8962, 0.8815,
    0.8868, 0.8906, 0.9075, 0.9057, 0.9158, 0.9036, 0.8999, 0.9151, 0.9038,
    0.8925, 0.9151, 0.8887, 0.8943, 0.9019, 0.9000, 0.9057, 0.8836, 0.8962,
    0.9057, 0.8981, 0.9170
]

epochs = list(range(1, len(training_accuracy) + 1))

# Plotting
plt.figure(figsize=(10, 6))
plt.plot(epochs, training_accuracy, label='Training Accuracy', marker='o')
plt.plot(epochs, validation_accuracy, label='Validation Accuracy', marker='x')
plt.title('Training and Validation Accuracy vs. Epochs')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend()
plt.grid()
plt.show()


In [None]:
!pip install ptflops


In [None]:
import torch
from ptflops import get_model_complexity_info

# Define the device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Instantiate the model
model = mobilenetv4_conv_small(num_classes=2).to(device)

# Define the input size (e.g., for 224x224 RGB images)
input_size = (3, 224, 224)

# Calculate FLOPs and number of parameters
flops, params = get_model_complexity_info(model, input_size, as_strings=True, print_per_layer_stat=True)

print(f"FLOPs: {flops}")
print(f"Parameters: {params}")


In [None]:
!pip install imbalanced-learn


In [None]:
from imblearn.over_sampling import SMOTE


In [None]:
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, random_split
from torchvision import datasets, transforms
import numpy as np
from imblearn.over_sampling import SMOTE
from collections import Counter
from PIL import Image
import os
import math

# Define data transformations
data_transforms = {
    'train': transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
    ]),
    'val': transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
    ]),
}

# Load the dataset
train_dataset = datasets.ImageFolder(root='/content/drive/MyDrive/dataset', transform=None)

# Extract features and labels for SMOTE
features, labels = [], []
for img, label in train_dataset:
    features.append(np.array(img))  # Keep the image in its original shape (height, width, channels)
    labels.append(label)

# Apply SMOTE on the flattened features
smote = SMOTE(random_state=42)
features_resampled, labels_resampled = smote.fit_resample(np.array(features).reshape(len(features), -1), np.array(labels))

# Check the number of features after resampling
print(f"Resampled features shape: {features_resampled.shape}")

# Ensure that the resampled data size is divisible by 224 * 224 * 3
# Calculate the number of elements per image
image_size = 224 * 224 * 3

# Calculate the total number of elements in the resampled data
num_samples = features_resampled.shape[0]
total_elements = num_samples * image_size

# Check if the total number of elements is divisible by the image size
if total_elements % image_size != 0:
    print(f"Total elements {total_elements} is not divisible by {image_size}. Trimming excess data.")
    trim_size = (total_elements // image_size) * image_size  # Largest divisible number of elements
    features_resampled = features_resampled[:trim_size]

# Now reshape the features back into the image format (num_samples, 224, 224, 3)
features_resampled = features_resampled.reshape(-1, 224, 224, 3)

# Verify the reshaped shape
print(f"Reshaped features shape: {features_resampled.shape}")

# Recreate a PyTorch Dataset
class ResampledDataset(torch.utils.data.Dataset):
    def __init__(self, features, labels, transform=None):
        self.features = features
        self.labels = labels
        self.transform = transform

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        image = self.features[idx].astype('uint8')  # Ensure the image is in uint8 format for PIL
        image = Image.fromarray(image)  # Convert to PIL.Image
        label = self.labels[idx]
        if self.transform:
            image = self.transform(image)  # Apply transformations
        return image, label

train_resampled_dataset = ResampledDataset(features_resampled, labels_resampled, transform=data_transforms['train'])

# Split into training and validation sets
train_size = int(0.8 * len(train_resampled_dataset))
val_size = len(train_resampled_dataset) - train_size
train_dataset, val_dataset = random_split(train_resampled_dataset, [train_size, val_size])

# Data loaders
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)

# Define MobileNetV4 helper functions and architecture
def make_divisible(value, divisor, min_value=None, round_down_protect=True):
    if min_value is None:
        min_value = divisor
    new_value = max(min_value, int(value + divisor / 2) // divisor * divisor)
    # Make sure that round down does not go down by more than 10%.
    if round_down_protect and new_value < 0.9 * value:
        new_value += divisor
    return new_value

class ConvBN(nn.Module):
    def __init__(self, in_channels, out_channels, kernel_size, stride=1):
        super(ConvBN, self).__init__()
        self.block = nn.Sequential(
            nn.Conv2d(in_channels, out_channels, kernel_size, stride, (kernel_size - 1)//2, bias=False),
            nn.BatchNorm2d(out_channels),
            nn.ReLU(inplace=True),
        )

    def forward(self, x):
        return self.block(x)

class UniversalInvertedBottleneck(nn.Module):
    def __init__(self,
                 in_channels,
                 out_channels,
                 expand_ratio,
                 start_dw_kernel_size,
                 middle_dw_kernel_size,
                 stride,
                 middle_dw_downsample: bool = True,
                 use_layer_scale: bool = False,
                 layer_scale_init_value: float = 1e-5):
        super(UniversalInvertedBottleneck, self).__init__()
        self.start_dw_kernel_size = start_dw_kernel_size
        self.middle_dw_kernel_size = middle_dw_kernel_size

        if start_dw_kernel_size:
            self.start_dw_conv = nn.Conv2d(in_channels, in_channels, start_dw_kernel_size,
                                          stride if not middle_dw_downsample else 1,
                                          (start_dw_kernel_size - 1) // 2,
                                          groups=in_channels, bias=False)
            self.start_dw_norm = nn.BatchNorm2d(in_channels)

        expand_channels = make_divisible(in_channels * expand_ratio, 8)
        self.expand_conv = nn.Conv2d(in_channels, expand_channels, 1, 1, bias=False)
        self.expand_norm = nn.BatchNorm2d(expand_channels)
        self.expand_act = nn.ReLU(inplace=True)

        if middle_dw_kernel_size:
            self.middle_dw_conv = nn.Conv2d(expand_channels, expand_channels, middle_dw_kernel_size,
                                           stride if middle_dw_downsample else 1,
                                           (middle_dw_kernel_size - 1) // 2,
                                           groups=expand_channels, bias=False)
            self.middle_dw_norm = nn.BatchNorm2d(expand_channels)
            self.middle_dw_act = nn.ReLU(inplace=True)

        self.proj_conv = nn.Conv2d(expand_channels, out_channels, 1, 1, bias=False)
        self.proj_norm = nn.BatchNorm2d(out_channels)

        if use_layer_scale:
            self.gamma = nn.Parameter(layer_scale_init_value * torch.ones((out_channels)), requires_grad=True)

        self.use_layer_scale = use_layer_scale
        self.identity = stride == 1 and in_channels == out_channels

    def forward(self, x):
        shortcut = x

        if self.start_dw_kernel_size:
            x = self.start_dw_conv(x)
            x = self.start_dw_norm(x)

        x = self.expand_conv(x)
        x = self.expand_norm(x)
        x = self.expand_act(x)

        if self.middle_dw_kernel_size:
            x = self.middle_dw_conv(x)
            x = self.middle_dw_norm(x)
            x = self.middle_dw_act(x)

        x = self.proj_conv(x)
        x = self.proj_norm(x)

        if self.use_layer_scale:
            x = self.gamma * x

        return x + shortcut if self.identity else x

class MobileNetV4(nn.Module):
    def __init__(self, block_specs, num_classes=1000):
        super(MobileNetV4, self).__init__()

        c = 3
        layers = []
        for block_type, *block_cfg in block_specs:
            if block_type == 'conv_bn':
                block = ConvBN
                k, s, f = block_cfg
                layers.append(block(c, f, k, s))
            elif block_type == 'uib':
                block = UniversalInvertedBottleneck
                start_k, middle_k, s, f, e = block_cfg
                layers.append(block(c, f, e, start_k, middle_k, s))
            else:
                raise NotImplementedError
            c = f
        self.features = nn.Sequential(*layers)
        # building last several layers
        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
        hidden_channels = 1280
        self.conv = ConvBN(c, hidden_channels, 1)
        self.classifier = nn.Linear(hidden_channels, num_classes)

        self._initialize_weights()

    def forward(self, x):
        x = self.features(x)
        x = self.avgpool(x)
        x = self.conv(x)
        x = x.view(x.size(0), -1)
        x = self.classifier(x)
        return x

    def _initialize_weights(self):
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
                m.weight.data.normal_(0, math.sqrt(2. / n))
                if m.bias is not None:
                    m.bias.data.zero_()
            elif isinstance(m, nn.BatchNorm2d):
                m.weight.data.fill_(1)
                m.bias.data.zero_()
            elif isinstance(m, nn.Linear):
                m.weight.data.normal_(0, 0.01)
                m.bias.data.zero_()

def mobilenetv4_conv_small(num_classes):
    """
    Constructs a MobileNetV4-Conv-Small model
    """
    block_specs = [
        ('conv_bn', 3, 2, 32),
        ('conv_bn', 3, 2, 32),
        ('conv_bn', 1, 1, 32),
        ('conv_bn', 3, 2, 96),
        ('conv_bn', 1, 1, 64),
        ('uib', 5, 5, 2, 96, 3.0),
        ('uib', 0, 3, 1, 96, 2.0),
        ('uib', 0, 3, 1, 96, 2.0),
        ('uib', 0, 3, 1, 96, 2.0),
        ('uib', 0, 3, 1, 96, 2.0),
        ('uib', 3, 0, 1, 96, 4.0),
        ('uib', 3, 3, 2, 128, 6.0),
        ('uib', 5, 5, 1, 128, 4.0),
        ('uib', 0, 5, 1, 128, 4.0),
        ('uib', 0, 5, 1, 128, 3.0),
        ('uib', 0, 3, 1, 128, 4.0),
        ('uib', 0, 3, 1, 128, 4.0),
        ('conv_bn', 1, 1, 960),
    ]
    return MobileNetV4(block_specs, num_classes)

# Initialize the model
model = mobilenetv4_conv_small(num_classes=len(train_dataset.classes))

# Print model summary
print(model)


In [None]:
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, random_split
from torchvision import datasets, transforms
import numpy as np
from imblearn.over_sampling import SMOTE
from collections import Counter
from PIL import Image
import os

# Define data transformations
data_transforms = {
    'train': transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
    ]),
    'val': transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
    ]),
}

# Load the dataset
train_dataset = datasets.ImageFolder(root='/content/drive/MyDrive/dataset', transform=None)

# Extract features and labels for SMOTE
features, labels = [], []
for img, label in train_dataset:
    features.append(np.array(img))  # Keep the image in its original shape (height, width, channels)
    labels.append(label)

# Apply SMOTE on the flattened features
smote = SMOTE(random_state=42)
features_resampled, labels_resampled = smote.fit_resample(np.array(features).reshape(len(features), -1), np.array(labels))

# Check the number of features after resampling
print(f"Resampled features shape: {features_resampled.shape}")

# Ensure that the resampled data size is divisible by 224 * 224 * 3
image_size = 224 * 224 * 3
num_samples = features_resampled.shape[0]

# Calculate the total number of elements in the resampled data
total_elements = num_samples * image_size

# Check if total_elements is divisible by image_size
if total_elements % image_size != 0:
    # Trim any excess data that doesn't fit into the expected image size
    print(f"Total elements {total_elements} is not divisible by {image_size}. Trimming excess data.")
    trim_size = (total_elements // image_size) * image_size  # Largest divisible number of elements
    features_resampled = features_resampled[:trim_size]

# Reshape the features back to the shape of (num_samples, 224, 224, 3)
features_resampled = features_resampled.reshape(-1, 224, 224, 3)

print("Before SMOTE:", Counter(labels))
print("After SMOTE:", Counter(labels_resampled))

# Recreate a PyTorch Dataset
class ResampledDataset(torch.utils.data.Dataset):
    def __init__(self, features, labels, transform=None):
        self.features = features
        self.labels = labels
        self.transform = transform

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        image = self.features[idx].astype('uint8')  # Ensure the image is in uint8 format for PIL
        image = Image.fromarray(image)  # Convert to PIL.Image
        label = self.labels[idx]
        if self.transform:
            image = self.transform(image)  # Apply transformations
        return image, label

train_resampled_dataset = ResampledDataset(features_resampled, labels_resampled, transform=data_transforms['train'])

# Split into training and validation sets
train_size = int(0.8 * len(train_resampled_dataset))
val_size = len(train_resampled_dataset) - train_size
train_dataset, val_dataset = random_split(train_resampled_dataset, [train_size, val_size])

# Data loaders
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)

# Define training function
def train_model(model, train_loader, val_loader, criterion, optimizer, epochs, save_path):
    os.makedirs(save_path, exist_ok=True)  # Ensure the save path exists

    for epoch in range(epochs):
        model.train()
        running_loss = 0.0
        correct_predictions = 0

        for images, labels in train_loader:
            images, labels = images.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            running_loss += loss.item() * images.size(0)
            _, preds = torch.max(outputs, 1)
            correct_predictions += torch.sum(preds == labels.data)

        epoch_loss = running_loss / len(train_loader.dataset)
        epoch_acc = correct_predictions.double() / len(train_loader.dataset)
        print(f"Epoch {epoch + 1}/{epochs}, Training Loss: {epoch_loss:.4f}, Training Accuracy: {epoch_acc:.4f}")

        # Validation phase
        model.eval()
        val_loss = 0.0
        val_correct_predictions = 0
        with torch.no_grad():
            for images, labels in val_loader:
                images, labels = images.to(device), labels.to(device)
                outputs = model(images)
                loss = criterion(outputs, labels)
                val_loss += loss.item() * images.size(0)
                _, preds = torch.max(outputs, 1)
                val_correct_predictions += torch.sum(preds == labels.data)

        val_epoch_loss = val_loss / len(val_loader.dataset)
        val_epoch_acc = val_correct_predictions.double() / len(val_loader.dataset)
        print(f"Validation Loss: {val_epoch_loss:.4f}, Validation Accuracy: {val_epoch_acc:.4f}")

        # Save the model every 5 epochs
        if (epoch + 1) % 5 == 0:
            model_save_path = f"{save_path}/model_epoch_{epoch + 1}.pt"
            torch.save(model.state_dict(), model_save_path)
            print(f"Model saved at: {model_save_path}")

# Define model, criterion, optimizer, and device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = mobilenetv4_conv_small(num_classes=2).to(device)  # Initialize the MobileNetV4 model
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

# Train the model
save_path = "/content/drive/MyDrive/saved_models"
train_model(model, train_loader, val_loader, criterion, optimizer, epochs=30, save_path=save_path)
