In [4]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from accelerate import Accelerator
from torchvision.models import resnet50
from torchvision.datasets import CIFAR10
from torch.utils.data import DataLoader, Dataset
from torchvision.transforms import Compose, ToTensor, Normalize, RandomHorizontalFlip, v2, Resize
from tqdm import tqdm
from accelerate import notebook_launcher
from sklearn.model_selection import train_test_split
import os
import timm
os.environ["CUDA_VISIBLE_DEVICES"] = '0'

class lcnet_ft(nn.Module):
    def __init__(self, num_classes=10, embed_dim=1280):
        super(lcnet_ft, self).__init__()

        self.embed_dim = embed_dim
        
        # Load and freeze lcnet
        self.encoder = timm.create_model('timm/lcnet_075.ra2_in1k', pretrained=True, num_classes=num_classes)
        for param in self.encoder.parameters():
            param.requires_grad = False

        # unfreeze the last layer
        for param in self.encoder.classifier.parameters():
            param.requires_grad = True
        
    def forward(self, x):
        logits = self.encoder(x)  # Assuming [batch_size, embed_dim]
        
        return logits


class mnasnet_ft(nn.Module):
    def __init__(self, num_classes=10, embed_dim=1280):
        super(mnasnet_ft, self).__init__()

        self.embed_dim = embed_dim
        
        # Load and freeze lcnet
        self.encoder = timm.create_model('timm/mnasnet_small.lamb_in1k', pretrained=True, num_classes=num_classes)
        for param in self.encoder.parameters():
            param.requires_grad = False

        # unfreeze the last layer
        for param in self.encoder.classifier.parameters():
            param.requires_grad = True
        
    def forward(self, x):
        logits = self.encoder(x)  # Assuming [batch_size, embed_dim]
        
        return logits


class repghostnet_ft(nn.Module):
    def __init__(self, num_classes=10, embed_dim=1280):
        super(repghostnet_ft, self).__init__()

        self.embed_dim = embed_dim
        
        # Load and freeze lcnet
        self.encoder = timm.create_model('timm/repghostnet_050.in1k', pretrained=True, num_classes=num_classes)
        for param in self.encoder.parameters():
            param.requires_grad = False

        # unfreeze the last layer
        for param in self.encoder.classifier.parameters():
            param.requires_grad = True
        
    def forward(self, x):
        logits = self.encoder(x)  # Assuming [batch_size, embed_dim]
        
        return logits

class ResNetViTEnsembleWithTransformer(nn.Module):
    def __init__(self, num_classes=10, embed_dim=1280):
        super(ResNetViTEnsembleWithTransformer, self).__init__()

        self.embed_dim = embed_dim
        
        # Load and freeze 
        self.lcnet = lcnet_ft(num_classes=num_classes, embed_dim=embed_dim)
        self.mnasnet = mnasnet_ft(num_classes=num_classes, embed_dim=embed_dim)
        self.repghostnet = repghostnet_ft(num_classes=num_classes, embed_dim=embed_dim)

        self.lcnet.encoder.classifier = nn.Identity()
        self.mnasnet.encoder.classifier = nn.Identity()
        self.repghostnet.encoder.classifier = nn.Identity()

        self.conv = nn.Conv1d(3, 1, kernel_size=3, stride=1, padding=1)  # 2 channels input

        self.classifier = nn.Linear(embed_dim, num_classes)
    
        
    def forward(self, x):
        # Get the features from the backbone
        lcnet_features = self.lcnet.encoder(x)
        mnasnet_features = self.mnasnet.encoder(x)
        repghostnet_features = self.repghostnet.encoder(x)

        feature_concat = torch.cat((lcnet_features.unsqueeze(1), mnasnet_features.unsqueeze(1), repghostnet_features.unsqueeze(1)), dim=1)
        
        feature_concat = self.conv(feature_concat).squeeze(1)

        logits = self.classifier(feature_concat)

        return logits


# Assuming CIFAR10 - adapt transforms for your dataset
transform = Compose([
    Resize((224,224)),
    RandomHorizontalFlip(),
    # v2.ColorJitter(brightness=(0.5,1.5),contrast=(1),saturation=(0.5,1.5),hue=(-0.1,0.1)),
    ToTensor(),
    Normalize(mean=[0.4914, 0.4822, 0.4465], std=[0.247, 0.243, 0.261]),
])

# Dataset and DataLoader setup
class FTDataset(Dataset):
    def __init__(self, images, labels, transform=None):
        self.images = images
        self.labels = labels
        self.transform = transform

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        image = self.images[idx]
        label = self.labels[idx]
        
        if self.transform:
            image = self.transform(image)
        
        return image, label

def train_model(train_loader, test_loader):
    accelerator = Accelerator(mixed_precision="fp16")
    model = ResNetViTEnsembleWithTransformer()
    optimizer = torch.optim.Adam(model.parameters(), lr=0.0001)
    loss_fn = nn.CrossEntropyLoss()

    model, optimizer, train_loader, test_loader = accelerator.prepare(model, optimizer, train_loader, test_loader)

    for epoch in range(30):  # Adjust the number of epochs if needed
        model.train()
        total_loss = 0
        for x, labels in tqdm(train_loader, desc=f"Epoch {epoch + 1}"):
            optimizer.zero_grad()
            outputs = model(x)
            loss = loss_fn(outputs, labels)
            accelerator.backward(loss)
            optimizer.step()
            total_loss += loss.item()
        avg_loss = total_loss / len(train_loader)
        print(f"Epoch {epoch + 1}, Loss: {avg_loss:.4f}")

    # Optional test phase at the end of training
    model.eval()
    test_loss = 0
    with torch.no_grad():
        for x, labels in test_loader:
            outputs = model(x)
            loss = loss_fn(outputs, labels)
            test_loss += loss.item()
    avg_test_loss = test_loss / len(test_loader)
    print(f"Test Loss: {avg_test_loss:.4f}")

    # Save the final model
    model_path = "/home/chen/EECE570/new_models/ensemble_model/model_final_1dcnn.pt"
    unencapsulated_model = accelerator.unwrap_model(model)
    accelerator.save(unencapsulated_model.state_dict(), model_path)


def main():
    root_dir = './data'
    batch_size = 128
    transform = Compose([
        Resize((224,224)),
        RandomHorizontalFlip(),
        ToTensor(),
        Normalize(mean=[0.4914, 0.4822, 0.4465], std=[0.247, 0.243, 0.261]),
    ])
    
    # Load full dataset
    full_dataset = CIFAR10(root=root_dir, train=True, download=True, transform=None)
    images = [image for image, _ in full_dataset]
    labels = [label for _, label in full_dataset]

    # Stratified train-test split
    train_images, test_images, train_labels, test_labels = train_test_split(
        images, labels, test_size=0.2, stratify=labels, random_state=42
    )

    # Creating datasets
    train_dataset = FTDataset(train_images, train_labels, transform=transform)
    test_dataset = FTDataset(test_images, test_labels, transform=transform)

    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=8, pin_memory=True)
    test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=8, pin_memory=True)

    # Call train_model or any other training function here
    train_model(train_loader, test_loader)

notebook_launcher(main, num_processes=1)

Launching training on one GPU.
Files already downloaded and verified


Epoch 1: 100%|██████████| 313/313 [00:32<00:00,  9.67it/s]


Epoch 1, Loss: 1.7567


Epoch 2: 100%|██████████| 313/313 [00:38<00:00,  8.07it/s]


Epoch 2, Loss: 1.0906


Epoch 3: 100%|██████████| 313/313 [00:38<00:00,  8.05it/s]


Epoch 3, Loss: 0.8295


Epoch 4: 100%|██████████| 313/313 [00:39<00:00,  7.99it/s]


Epoch 4, Loss: 0.7082


Epoch 5: 100%|██████████| 313/313 [00:39<00:00,  8.00it/s]


Epoch 5, Loss: 0.6358


Epoch 6: 100%|██████████| 313/313 [00:38<00:00,  8.04it/s]


Epoch 6, Loss: 0.5900


Epoch 7: 100%|██████████| 313/313 [00:39<00:00,  7.92it/s]


Epoch 7, Loss: 0.5563


Epoch 8: 100%|██████████| 313/313 [00:39<00:00,  8.02it/s]


Epoch 8, Loss: 0.5291


Epoch 9: 100%|██████████| 313/313 [00:39<00:00,  7.98it/s]


Epoch 9, Loss: 0.5094


Epoch 10: 100%|██████████| 313/313 [00:39<00:00,  7.89it/s]


Epoch 10, Loss: 0.4923


Epoch 11: 100%|██████████| 313/313 [00:37<00:00,  8.43it/s]


Epoch 11, Loss: 0.4761


Epoch 12: 100%|██████████| 313/313 [00:40<00:00,  7.65it/s]


Epoch 12, Loss: 0.4668


Epoch 13: 100%|██████████| 313/313 [00:40<00:00,  7.72it/s]


Epoch 13, Loss: 0.4544


Epoch 14: 100%|██████████| 313/313 [00:40<00:00,  7.64it/s]


Epoch 14, Loss: 0.4430


Epoch 15: 100%|██████████| 313/313 [00:40<00:00,  7.73it/s]


Epoch 15, Loss: 0.4342


Epoch 16: 100%|██████████| 313/313 [00:40<00:00,  7.68it/s]


Epoch 16, Loss: 0.4267


Epoch 17: 100%|██████████| 313/313 [00:40<00:00,  7.70it/s]


Epoch 17, Loss: 0.4190


Epoch 18: 100%|██████████| 313/313 [00:40<00:00,  7.71it/s]


Epoch 18, Loss: 0.4140


Epoch 19: 100%|██████████| 313/313 [00:40<00:00,  7.77it/s]


Epoch 19, Loss: 0.4087


Epoch 20: 100%|██████████| 313/313 [00:40<00:00,  7.75it/s]


Epoch 20, Loss: 0.4012


Epoch 21: 100%|██████████| 313/313 [00:40<00:00,  7.70it/s]


Epoch 21, Loss: 0.3939


Epoch 22: 100%|██████████| 313/313 [00:40<00:00,  7.69it/s]


Epoch 22, Loss: 0.3904


Epoch 23: 100%|██████████| 313/313 [00:40<00:00,  7.66it/s]


Epoch 23, Loss: 0.3858


Epoch 24: 100%|██████████| 313/313 [00:38<00:00,  8.21it/s]


Epoch 24, Loss: 0.3826


Epoch 25: 100%|██████████| 313/313 [00:43<00:00,  7.17it/s]


Epoch 25, Loss: 0.3785


Epoch 26: 100%|██████████| 313/313 [00:43<00:00,  7.19it/s]


Epoch 26, Loss: 0.3743


Epoch 27: 100%|██████████| 313/313 [00:43<00:00,  7.26it/s]


Epoch 27, Loss: 0.3759


Epoch 28: 100%|██████████| 313/313 [00:43<00:00,  7.12it/s]


Epoch 28, Loss: 0.3691


Epoch 29: 100%|██████████| 313/313 [00:43<00:00,  7.19it/s]


Epoch 29, Loss: 0.3645


Epoch 30: 100%|██████████| 313/313 [00:43<00:00,  7.19it/s]

Epoch 30, Loss: 0.3631





Test Loss: 0.4086


In [5]:
# testing on testset
model = ResNetViTEnsembleWithTransformer()
model.load_state_dict(torch.load("/home/chen/EECE570/new_models/ensemble_model/model_final_1dcnn.pt"))
model.cuda()

full_dataset = CIFAR10(root='./data', train=False, download=True, transform=None)
images = [image for image, _ in full_dataset]
labels = [label for _, label in full_dataset]


testset = FTDataset(images, labels, transform=transform)
testloader = DataLoader(testset, batch_size=64, shuffle=True, num_workers=8, pin_memory=True)


model.eval()
correct = 0
total = 0
for x, labels in tqdm(testloader):
    x, labels = x.cuda(), labels.cuda()
    outputs = model(x)
    predicted = torch.argmax(outputs, 1)
    total += labels.size(0)
    correct += (predicted == labels).sum().item()
print(f"Accuracy: {correct / total:.4f}")


Files already downloaded and verified


100%|██████████| 157/157 [00:10<00:00, 15.35it/s]

Accuracy: 0.8595



