```text
Normal
```

In [None]:
# Import libraries
import torchvision, torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
import time

# Define the transform
transform = torchvision.transforms.Compose([
    torchvision.transforms.ToTensor(),
    torchvision.transforms.Normalize((0.5,), (0.5,))  # Normalize on GPU if possible
])

# Load CIFAR-10 dataset
train_dataset = torchvision.datasets.CIFAR10(root='data/', train=True, transform=transform, download=True)
test_dataset = torchvision.datasets.CIFAR10(root='data/', train=False, transform=transform, download=True)

# Function to create DataLoader
def get_data_loader(dataset, batch_size):
    return torch.utils.data.DataLoader(dataset, batch_size=batch_size, shuffle=True, num_workers=4, pin_memory=True)

# Display batch format
def get_batch_format(data_loader):
    images_t, labels_t = next(iter(data_loader))
    print('images.shape:', images_t.shape)
    print('labels.shape:', labels_t.shape)

# Accuracy function
def accuracy(predictions, labels):
    _, predicted_labels = torch.max(predictions, 1)
    correct = (predicted_labels == labels).sum().item()
    return correct / len(labels)

# Load DataLoaders
train_loader = get_data_loader(train_dataset, 128)
test_loader = get_data_loader(test_dataset, 10000)
get_batch_format(train_loader)

# Set device
deviceGPU = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

# Loss function and learning rate
lossFn = F.cross_entropy
learningRate = 0.0001

# Define the custom MyConv class
class MyConv(nn.Module):
    def __init__(self, in_channels, out_channels, kernel_size, stride=1, padding=0, activation=nn.ReLU(inplace=False)):
        super(MyConv, self).__init__()
        self.conv = nn.Conv2d(in_channels, out_channels, kernel_size, stride, padding)
        self.bn = nn.BatchNorm2d(out_channels)
        self.act = activation

    def forward(self, x):
        x = self.conv(x)
        x = self.bn(x)
        x = self.act(x)
        return x

# Define the ConvNetResidual model using MyConv
class ConvNetResidual(nn.Module):
    def __init__(self):
        super(ConvNetResidual, self).__init__()
        self.conv1 = MyConv(3, 32, kernel_size=3, stride=1, padding=1)
        self.conv2 = MyConv(32, 64, kernel_size=3, stride=1, padding=1)
        self.conv3 = MyConv(64, 128, kernel_size=3, stride=1, padding=1)
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2, padding=0)
        self.fc1 = nn.Linear(128 * 4 * 4, 256)
        self.fc2 = nn.Linear(256, 10)

        # Residual connections
        self.res_conv1 = nn.Conv2d(3, 32, kernel_size=1, stride=1, padding=0)
        self.res_conv2 = nn.Conv2d(32, 64, kernel_size=1, stride=1, padding=0)
        self.res_conv3 = nn.Conv2d(64, 128, kernel_size=1, stride=1, padding=0)

    def forward(self, x):
        residual = self.res_conv1(x)
        x = self.conv1(x)
        x = x + residual
        x = self.pool(x)

        residual = self.res_conv2(x)
        x = self.conv2(x)
        x = x + residual
        x = self.pool(x)

        residual = self.res_conv3(x)
        x = self.conv3(x)
        x = x + residual
        x = self.pool(x)

        x = x.view(-1, 128 * 4 * 4)
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return x

# Initialize the model
modelNet = ConvNetResidual().to(deviceGPU)

# Optimizer
opt = torch.optim.Adam(modelNet.parameters(), lr=learningRate, weight_decay=0.0001)

# Offload data to GPU
def off_load_on_gpu(train_loader, test_loader, device):
    train_loader_gpu = [(x.to(device), y.to(device)) for x, y in train_loader]
    test_loader_gpu = [(x.to(device), y.to(device)) for x, y in test_loader]
    return train_loader_gpu, test_loader_gpu

# Training cycle
def training_cycle(model, train_loader, device):
    model.train()
    for x, y in train_loader:
        preds = model(x)
        loss = lossFn(preds, y)
        loss.backward()
        opt.step()
        opt.zero_grad()

# Validation cycle
def validation_cycle(model, test_loader, epoch, num_epochs, device):
    model.eval()
    total_loss = 0
    total_correct = 0
    with torch.no_grad():
        for x, y in test_loader:
            preds = model(x)
            loss = lossFn(preds, y)
            total_loss += loss.item()
            total_correct += accuracy(preds, y)

    avg_loss = total_loss / len(test_loader)
    avg_acc = total_correct / len(test_loader)
    print(f"Epoch {epoch+1}/{num_epochs}, Loss: {avg_loss:.4f}, Accuracy: {avg_acc * 100:.2f}%")

# Training and validation for one epoch
def fit_one_cycle(model, train_loader, test_loader, epoch, num_epochs, device): 
    training_cycle(model, train_loader, device)
    validation_cycle(model, test_loader, epoch, num_epochs, device)

# Load data onto GPU
train_loader_gpu, test_loader_gpu = off_load_on_gpu(train_loader, test_loader, deviceGPU)

# Train the model
num_epochs = 50
time_to_train = time.time()

for epoch in range(num_epochs):
    fit_one_cycle(modelNet, train_loader_gpu, test_loader_gpu, epoch, num_epochs, deviceGPU)

time_to_train = time.time() - time_to_train
print(f"Time to train: {time_to_train:.2f} seconds")


```text
Bottleneck Block: Reduces, maintains, and restores the information.
```

In [3]:
import torchvision, torch
import torch.nn as nn
import torch.nn.functional as F
import time

# Transformations pour CIFAR-10
transform = torchvision.transforms.Compose([
    torchvision.transforms.ToTensor(),
    torchvision.transforms.Normalize((0.5,), (0.5,))
])

# Chargement des datasets
train_dataset = torchvision.datasets.CIFAR10(root='data/', train=True, transform=transform, download=True)
test_dataset = torchvision.datasets.CIFAR10(root='data/', train=False, transform=transform, download=True)

# DataLoader
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=128, shuffle=True, num_workers=4, pin_memory=True)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=1000, shuffle=False, num_workers=4, pin_memory=True)

# Définition du device
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

# Classe Bottleneck
class Bottleneck(nn.Module):
    def __init__(self, in_channels, out_channels, stride=1):
        super(Bottleneck, self).__init__()
        mid_channels = out_channels // 4
        
        self.reduce = nn.Conv2d(in_channels, mid_channels, kernel_size=1, stride=stride, bias=False)
        self.reduce_bn = nn.BatchNorm2d(mid_channels)
        
        self.maintain = nn.Conv2d(mid_channels, mid_channels, kernel_size=3, stride=1, padding=1, bias=False)
        self.maintain_bn = nn.BatchNorm2d(mid_channels)
        
        self.expand = nn.Conv2d(mid_channels, out_channels, kernel_size=1, stride=1, bias=False)
        self.expand_bn = nn.BatchNorm2d(out_channels)
        
        self.activation = nn.ReLU(inplace=True)
        self.residual_connection = (in_channels == out_channels and stride == 1)

    def forward(self, x):
        identity = x
        
        x = self.activation(self.reduce_bn(self.reduce(x)))
        x = self.activation(self.maintain_bn(self.maintain(x)))
        x = self.expand_bn(self.expand(x))
        
        if self.residual_connection:
            x += identity
        return self.activation(x)

# Stacking de Bottlenecks
class BottleneckStack(nn.Module):
    def __init__(self, in_channels, out_channels, num_blocks, stride=1):
        super(BottleneckStack, self).__init__()
        layers = [Bottleneck(in_channels, out_channels, stride)]
        layers += [Bottleneck(out_channels, out_channels, stride=1) for _ in range(num_blocks - 1)]
        self.stack = nn.Sequential(*layers)

    def forward(self, x):
        return self.stack(x)

# Modèle principal
class CIFAR10BottleneckNet(nn.Module):
    def __init__(self):
        super(CIFAR10BottleneckNet, self).__init__()
        self.init_conv = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False)
        self.init_bn = nn.BatchNorm2d(64)
        self.init_relu = nn.ReLU(inplace=True)

        self.block1 = BottleneckStack(64, 128, num_blocks=3, stride=2)
        self.block2 = BottleneckStack(128, 256, num_blocks=4, stride=2)
        self.block3 = BottleneckStack(256, 512, num_blocks=6, stride=2)

        self.pool = nn.AdaptiveAvgPool2d((1, 1))
        self.fc = nn.Linear(512, 10)

    def forward(self, x):
        x = self.init_relu(self.init_bn(self.init_conv(x)))
        x = self.block1(x)
        x = self.block2(x)
        x = self.block3(x)
        x = self.pool(x)
        x = torch.flatten(x, 1)
        x = self.fc(x)
        return x

# Instanciation du modèle
model = CIFAR10BottleneckNet().to(device)

# Optimiseur et fonction de perte
optimizer = torch.optim.Adam(model.parameters(), lr=0.001, weight_decay=0.0001)
criterion = nn.CrossEntropyLoss()

# Boucle d'entraînement
def train_one_epoch(model, train_loader, optimizer, criterion, device):
    model.train()
    total_loss = 0
    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)
        
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        total_loss += loss.item()
    return total_loss / len(train_loader)

# Boucle de validation
def validate(model, test_loader, criterion, device):
    model.eval()
    total_loss = 0
    correct = 0
    with torch.no_grad():
        for images, labels in test_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            loss = criterion(outputs, labels)
            total_loss += loss.item()

            _, preds = torch.max(outputs, 1)
            correct += (preds == labels).sum().item()

    accuracy = correct / len(test_loader.dataset)
    return total_loss / len(test_loader), accuracy

# Entraînement sur plusieurs époques
num_epochs = 50
for epoch in range(num_epochs):
    train_loss = train_one_epoch(model, train_loader, optimizer, criterion, device)
    val_loss, val_accuracy = validate(model, test_loader, criterion, device)

    print(f"Epoch {epoch+1}/{num_epochs}, Train Loss: {train_loss:.4f}, Val Loss: {val_loss:.4f}, Val Accuracy: {val_accuracy * 100:.2f}%")


Files already downloaded and verified
Files already downloaded and verified
Epoch 1/50, Train Loss: 1.6378, Val Loss: 1.4811, Val Accuracy: 47.55%
Epoch 2/50, Train Loss: 1.2066, Val Loss: 1.2412, Val Accuracy: 56.93%
Epoch 3/50, Train Loss: 0.9693, Val Loss: 1.0040, Val Accuracy: 65.24%
Epoch 4/50, Train Loss: 0.8200, Val Loss: 0.8809, Val Accuracy: 69.58%
Epoch 5/50, Train Loss: 0.7159, Val Loss: 0.8805, Val Accuracy: 70.30%
Epoch 6/50, Train Loss: 0.6284, Val Loss: 0.8017, Val Accuracy: 72.64%
Epoch 7/50, Train Loss: 0.5611, Val Loss: 0.7893, Val Accuracy: 74.37%
Epoch 8/50, Train Loss: 0.5005, Val Loss: 0.7202, Val Accuracy: 75.80%
Epoch 9/50, Train Loss: 0.4475, Val Loss: 0.7595, Val Accuracy: 74.35%
Epoch 10/50, Train Loss: 0.4006, Val Loss: 0.7611, Val Accuracy: 75.84%
Epoch 11/50, Train Loss: 0.3606, Val Loss: 1.0420, Val Accuracy: 68.01%
Epoch 12/50, Train Loss: 0.3221, Val Loss: 0.8220, Val Accuracy: 74.86%
Epoch 13/50, Train Loss: 0.2896, Val Loss: 0.7572, Val Accuracy: 76.6

```text
Inverted Bottleneck Block: Expands, maintains (via depthwise separable convolution), and reduces back.
```

In [4]:
import torchvision, torch
import torch.nn as nn
import torch.nn.functional as F
import time

# Transformations pour CIFAR-10
transform = torchvision.transforms.Compose([
    torchvision.transforms.ToTensor(),
    torchvision.transforms.Normalize((0.5,), (0.5,))
])

# Chargement des datasets
train_dataset = torchvision.datasets.CIFAR10(root='data/', train=True, transform=transform, download=True)
test_dataset = torchvision.datasets.CIFAR10(root='data/', train=False, transform=transform, download=True)

# DataLoader
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=128, shuffle=True, num_workers=4, pin_memory=True)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=1000, shuffle=False, num_workers=4, pin_memory=True)

# Définition du device
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

# Classe Inverted Bottleneck
class InvertedBottleneck(nn.Module):
    def __init__(self, in_channels, out_channels, expansion_factor, stride):
        super(InvertedBottleneck, self).__init__()
        mid_channels = in_channels * expansion_factor

        self.use_residual = (in_channels == out_channels and stride == 1)

        self.expand = nn.Conv2d(in_channels, mid_channels, kernel_size=1, stride=1, padding=0, bias=False)
        self.expand_bn = nn.BatchNorm2d(mid_channels)

        self.depthwise = nn.Conv2d(mid_channels, mid_channels, kernel_size=3, stride=stride, padding=1, groups=mid_channels, bias=False)
        self.depthwise_bn = nn.BatchNorm2d(mid_channels)

        self.project = nn.Conv2d(mid_channels, out_channels, kernel_size=1, stride=1, padding=0, bias=False)
        self.project_bn = nn.BatchNorm2d(out_channels)

        self.activation = nn.ReLU(inplace=True)

    def forward(self, x):
        identity = x

        x = self.activation(self.expand_bn(self.expand(x)))
        x = self.activation(self.depthwise_bn(self.depthwise(x)))
        x = self.project_bn(self.project(x))

        if self.use_residual:
            x += identity

        return x

# Stacking de Inverted Bottlenecks
class InvertedBottleneckStack(nn.Module):
    def __init__(self, in_channels, out_channels, num_blocks, expansion_factor, stride):
        super(InvertedBottleneckStack, self).__init__()
        layers = [InvertedBottleneck(in_channels, out_channels, expansion_factor, stride)]
        layers += [InvertedBottleneck(out_channels, out_channels, expansion_factor, stride=1) for _ in range(num_blocks - 1)]
        self.stack = nn.Sequential(*layers)

    def forward(self, x):
        return self.stack(x)

# Modèle principal avec Inverted Bottlenecks
class CIFAR10InvertedBottleneckNet(nn.Module):
    def __init__(self):
        super(CIFAR10InvertedBottleneckNet, self).__init__()
        self.init_conv = nn.Conv2d(3, 32, kernel_size=3, stride=1, padding=1, bias=False)
        self.init_bn = nn.BatchNorm2d(32)
        self.init_relu = nn.ReLU(inplace=True)

        self.block1 = InvertedBottleneckStack(32, 64, num_blocks=2, expansion_factor=6, stride=2)
        self.block2 = InvertedBottleneckStack(64, 128, num_blocks=3, expansion_factor=6, stride=2)
        self.block3 = InvertedBottleneckStack(128, 256, num_blocks=4, expansion_factor=6, stride=2)

        self.pool = nn.AdaptiveAvgPool2d((1, 1))
        self.fc = nn.Linear(256, 10)

    def forward(self, x):
        x = self.init_relu(self.init_bn(self.init_conv(x)))
        x = self.block1(x)
        x = self.block2(x)
        x = self.block3(x)
        x = self.pool(x)
        x = torch.flatten(x, 1)
        x = self.fc(x)
        return x

# Instanciation du modèle
model = CIFAR10InvertedBottleneckNet().to(device)

# Optimiseur et fonction de perte
optimizer = torch.optim.Adam(model.parameters(), lr=0.001, weight_decay=0.0001)
criterion = nn.CrossEntropyLoss()

# Boucle d'entraînement
def train_one_epoch(model, train_loader, optimizer, criterion, device):
    model.train()
    total_loss = 0
    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        total_loss += loss.item()
    return total_loss / len(train_loader)

# Boucle de validation
def validate(model, test_loader, criterion, device):
    model.eval()
    total_loss = 0
    correct = 0
    with torch.no_grad():
        for images, labels in test_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            loss = criterion(outputs, labels)
            total_loss += loss.item()

            _, preds = torch.max(outputs, 1)
            correct += (preds == labels).sum().item()

    accuracy = correct / len(test_loader.dataset)
    return total_loss / len(test_loader), accuracy

# Entraînement sur plusieurs époques
num_epochs = 50
for epoch in range(num_epochs):
    train_loss = train_one_epoch(model, train_loader, optimizer, criterion, device)
    val_loss, val_accuracy = validate(model, test_loader, criterion, device)

    print(f"Epoch {epoch+1}/{num_epochs}, Train Loss: {train_loss:.4f}, Val Loss: {val_loss:.4f}, Val Accuracy: {val_accuracy * 100:.2f}%")


Files already downloaded and verified
Files already downloaded and verified
Epoch 1/50, Train Loss: 1.1711, Val Loss: 0.9166, Val Accuracy: 68.02%
Epoch 2/50, Train Loss: 0.7127, Val Loss: 0.8171, Val Accuracy: 71.59%
Epoch 3/50, Train Loss: 0.5478, Val Loss: 0.6463, Val Accuracy: 77.90%
Epoch 4/50, Train Loss: 0.4394, Val Loss: 0.6813, Val Accuracy: 77.19%
Epoch 5/50, Train Loss: 0.3701, Val Loss: 0.6597, Val Accuracy: 78.34%
Epoch 6/50, Train Loss: 0.3142, Val Loss: 0.7045, Val Accuracy: 77.22%
Epoch 7/50, Train Loss: 0.2647, Val Loss: 0.6637, Val Accuracy: 79.32%
Epoch 8/50, Train Loss: 0.2328, Val Loss: 0.7332, Val Accuracy: 78.61%
Epoch 9/50, Train Loss: 0.2096, Val Loss: 0.6884, Val Accuracy: 78.89%
Epoch 10/50, Train Loss: 0.1826, Val Loss: 0.7953, Val Accuracy: 78.11%
Epoch 11/50, Train Loss: 0.1622, Val Loss: 0.6809, Val Accuracy: 80.62%
Epoch 12/50, Train Loss: 0.1626, Val Loss: 0.6646, Val Accuracy: 81.01%
Epoch 13/50, Train Loss: 0.1401, Val Loss: 0.7223, Val Accuracy: 79.9