※ Global variables

In [83]:
import torch

gnDataset = 3
gBase_path = 'C:/Users/AdamBao/PycharmProjects/CAS-bigdata-main/Task2/Task2_data'
gnClasses = 5
gSaveModelAll = True

gDropout_rate = [0.5, 0.5, 0.5]
gBatch_momentum = [0.1, 0.1, 0.1]
gLearning_rate = [0.00001, 0.00001, 0.00001]
gWeight_decay = [1e-3, 1e-3, 1e-3]
gOpt_Momentum = [0.1, 0.1, 0.1]

gData_batch_size = 32
gEarly_stop_patience = 150
gnEpochs = 300
# for taskB augmentation is not allowed
gbData_augmentation = False

gModel_names = ["ResNet10", "ResNet10", "ResNet10"] # "resnet", "cnn"
gOptimizer_names = ["Adam", "Adam", "Adam"] # "Adam", "SGD"
gCriterion_names = ["CrossEntropyLoss", "CrossEntropyLoss", "CrossEntropyLoss"]


import os
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)
print(torch.__version__)
print("CUDA available:", torch.cuda.is_available())
print("PyTorch CUDA version: ", torch.version.cuda)

cuda:0
2.6.0+cu118
CUDA available: True
PyTorch CUDA version:  11.8


※ Widget functions

In [84]:
from torch.utils.data import DataLoader, Dataset
from torchvision.transforms import ToTensor
import torchvision.models as models
import torchvision.transforms as transforms
import matplotlib.pyplot as plt
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import pickle

class CAS771Dataset(Dataset):
    def __init__(self, data, labels, transform=None):
        self.data = data
        self.labels = labels
        self.transform = transform

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        img = self.data[idx]
        label = self.labels[idx]
        if self.transform and not isinstance(img, torch.Tensor):
            img = self.transform(img)
        return img, label

def _load_data(train_data_path):
    raw_data = torch.load(train_data_path)
    data = raw_data['data']
    data = data.permute(0, 3, 1, 2)
    torch_labels = raw_data['labels']
    labels = []
    if isinstance(torch_labels[0], torch.Tensor):
      labels = [label.item() for label in torch_labels]
    return data, labels

def remap_labels(labels, class_mapping):
    return [class_mapping[label] for label in labels]

def load_class_names(filepath):
    with open(filepath, 'r') as file:
        classes = [line.strip() for line in file]
    return classes

def count_parameters(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)

def count_layers(model):
    layer_count = 0
    for module in model.children():
        if not isinstance(module, nn.Dropout):  # deduct Dropout
            layer_count += 1
    return layer_count


class CAS771Plot():
    def __init__(self, model, criterion, device, train_dataloader, test_dataloader, model_num):
        self.model = model
        self.criterion = criterion
        self.device = device
        self.train_losses = []
        self.validation_losses = []
        self.validation_accuracies = []
        self.train_dataloader = train_dataloader
        self.test_dataloader = test_dataloader
        self.running_loss = 0.0
        self.model_num = model_num + 1

    def init_running_loss(self):
        self.running_loss = 0.0

    def add_loss(self, loss):
        self.running_loss += loss

    def append(self, epoch):
        train_loss = self.running_loss / len(self.train_dataloader)
        validation_loss, validation_accuracy = self._validate()
        self.train_losses.append(train_loss)
        self.validation_losses.append(validation_loss)
        self.validation_accuracies.append(validation_accuracy)
        self._print(epoch, train_loss, validation_loss, validation_accuracy)
        return validation_loss

    def plot(self):
        self._plot_metrics(self.train_losses, self.validation_losses, self.validation_accuracies, self.model_num)

    def _print(self, epoch, train_loss, validation_loss, validation_accuracy):
        print(f"Epoch {epoch+1}, Train Loss: {train_loss:.4f}, Validation Loss: {validation_loss:.4f}, Validation Accuracy: {validation_accuracy:.4f}")

    def _validate(self):
        self.model.eval()  # evaluation mode
        running_loss = 0.0
        correct_predictions = 0
        total_samples = 0

        with torch.no_grad():  # disable gradient calculations
            for inputs, labels in self.test_dataloader:
                inputs, labels = inputs.to(self.device), labels.to(self.device)

                outputs = self.model(inputs)
                loss = self.criterion(outputs, labels)
                running_loss += loss.item()

                _, predicted = torch.max(outputs, 1)
                total_samples += labels.size(0)
                correct_predictions += (predicted == labels).sum().item()

        validation_loss = running_loss / len(self.test_dataloader)
        validation_accuracy = correct_predictions / total_samples
        return validation_loss, validation_accuracy

    def _plot_metrics(self, train_losses, validation_losses, validation_accuracies, model_num):
        epochs = range(1, len(train_losses) + 1)

        plt.figure(figsize=(12, 6))

        plt.subplot(1, 2, 1)
        plt.plot(epochs, train_losses, label='Train Loss', marker='o')
        plt.plot(epochs, validation_losses, label='Validation Loss', marker='o')
        plt.title(f'Model {model_num} Learning per Epoch')
        plt.xlabel('Epoch')
        plt.ylabel('Loss')
        plt.legend()
        plt.grid(True)

        plt.subplot(1, 2, 2)
        plt.plot(epochs, validation_accuracies, label='Validation Accuracy', marker='o', color='green')
        plt.title(f'Model {model_num} Accuracy per Epoch')
        plt.xlabel('Epoch')
        plt.ylabel('Accuracy')
        plt.legend()
        plt.grid(True)

        plt.tight_layout()
        plt.show()


class CAS771EarlyStopping():
    def __init__(self):
        self.best_val_loss = float('inf')
        self.patience_counter = 0

    def isStop(self, validation_loss):
        if validation_loss < self.best_val_loss: # 검증 손실이 감소하면 best_val_loss 업데이트
            self.best_val_loss = validation_loss
            self.patience_counter = 0
        else:
            self.patience_counter += 1

        if self.patience_counter >= gEarly_stop_patience: # 검증 손실이 감소하지 않으면 학습 종료
            print("Early stopping")
            return True
        else:
            return False

# configurable input_size
def get_data_augmentation(mode, input_size=64):
    if gbData_augmentation == False:
        return None

    if mode == "train":
        return transforms.Compose([
            transforms.ToTensor(),
            transforms.RandomHorizontalFlip(), # inverse left-right
            transforms.RandomRotation(degrees=15), # random rotate
            transforms.RandomResizedCrop(input_size, scale=(0.8, 1.0)), # random crop
            transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]) # normalization
        ])
    else:
        return transforms.Compose([
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
        ])

def load_data(train_data_path, test_data_path, m=0, save_class_mapping=True):
    train_data, train_labels = _load_data(train_data_path)
    print("Train data shape:", train_data.shape)
    unique_labels = sorted(set(train_labels))
    class_mapping = {label: i for i, label in enumerate(unique_labels)}
    print(f"Class mapping: {class_mapping}")

    if train_data_path == None:
        train_dataloader = None
    else:
        train_remapped_labels = remap_labels(train_labels, class_mapping)
        transform = get_data_augmentation("train")
        train_dataset = CAS771Dataset(train_data, train_remapped_labels, transform=transform)
        train_dataloader = DataLoader(train_dataset, batch_size=gData_batch_size, shuffle=True, drop_last=False)

        if save_class_mapping:
            # Save the mapping to a file
            class_mapping_path = f'{gBase_path}/class_mapping_model_{m+1}.pkl'
            with open(class_mapping_path, "wb") as f:
                pickle.dump(class_mapping, f)

    if test_data_path == None:
        test_dataloader = None
    else:
        test_data, test_labels = _load_data(test_data_path)
        remapped_test_labels = remap_labels(test_labels, class_mapping)
        transform = get_data_augmentation("test")
        test_dataset = CAS771Dataset(test_data, remapped_test_labels, transform=transform)
        test_dataloader = DataLoader(test_dataset, batch_size=gData_batch_size, shuffle=False, drop_last=False)

    return train_dataloader, test_dataloader

※ Datasets

In [85]:
train_data_paths = [f'{gBase_path}/train_dataB_model_{i}.pth' for i in range(1, gnDataset+1)]
test_data_paths = [f'{gBase_path}/val_dataB_model_{i}.pth' for i in range(1, gnDataset+1)]
print(train_data_paths)
print(test_data_paths)
m = 0
train_dataloader, test_dataloader = load_data(train_data_paths[m], test_data_paths[m], m)


['C:/Users/AdamBao/PycharmProjects/CAS-bigdata-main/Task2/Task2_data/train_dataB_model_1.pth', 'C:/Users/AdamBao/PycharmProjects/CAS-bigdata-main/Task2/Task2_data/train_dataB_model_2.pth', 'C:/Users/AdamBao/PycharmProjects/CAS-bigdata-main/Task2/Task2_data/train_dataB_model_3.pth']
['C:/Users/AdamBao/PycharmProjects/CAS-bigdata-main/Task2/Task2_data/val_dataB_model_1.pth', 'C:/Users/AdamBao/PycharmProjects/CAS-bigdata-main/Task2/Task2_data/val_dataB_model_2.pth', 'C:/Users/AdamBao/PycharmProjects/CAS-bigdata-main/Task2/Task2_data/val_dataB_model_3.pth']
Train data shape: torch.Size([3197, 3, 64, 64])
Class mapping: {34: 0, 137: 1, 159: 2, 173: 3, 201: 4}


※ Models

MobileNetV2

In [86]:
class MobileNetV2(nn.Module):
    def __init__(self,num_classes,pretrained=False,use_small_input=False,dropout_rate=False):
        self.dropout_rate = dropout_rate
        super(MobileNetV2, self).__init__()
        if pretrained:
            self.model = models.mobilenet_v2(weights=models.MobileNet_V2_Weights.IMAGENET1K_V1)
        else:
            self.model = models.mobilenet_v2(weights=None)
        # 2) Modify for small input if requested (e.g., 64x64)
        if use_small_input:
            self.model.features[0][0].stride = (1, 1)  # originally (2, 2)
        # 3) Modify the classifier
        in_features = self.model.last_channel  # typically 1280 for MobileNetV2
        self.model.classifier[1] = nn.Linear(in_features, num_classes)

    def forward(self, x):
        return self.model(x)

ResNet50

In [87]:
class ResNet50(nn.Module):
    def __init__(self, num_classes, pretrained=False, use_small_input=False, dropout_rate=False):
        super(ResNet50, self).__init__()
        self.dropout_rate = dropout_rate
        if pretrained:
            self.model = models.resnet50(weights=models.ResNet50_Weights.IMAGENET1K_V1)
        else:
            self.model = models.resnet50(weights=None)
        # 2) Modify the first layer for small inputs if desired
        if use_small_input:
            # Example: kernel_size=3, stride=1 for 64x64
            self.model.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False
            )
            self.model.bn1 = nn.BatchNorm2d(64)
            self.model.layer2[0].conv2.stride(1,1)
            if self.model.layer2[0].downsample is not None:
                self.model.layer2[0].downsample[0].stride = (1,1)
        # 3) Modify the final fully connected layer
        in_features = self.model.fc.in_features
        self.model.fc = nn.Linear(in_features, num_classes)

    def forward(self, x):

        return self.model(x)

1. ResNet1

In [88]:
# later add this back 
        #self.block3 = BasicBlock(128, 256, stride=2)  # (Layers #6,7)
        #self.block4 = BasicBlock(256, 256, stride=2)  # (Layers #8,9)
        #self.final_conv = nn.Conv2d(256, 384, kernel_size=1, stride=1, bias=False)
        #self.final_bn = nn.BatchNorm2d(384)
class ResNet10Wide(nn.Module):
    def __init__(self, num_classes, dropout_rate=gDropout_rate):
        super(ResNet10Wide, self).__init__()
        # it used to be kernel = 3, stride = 1, padding = 1
        self.conv1 = nn.Conv2d(3, 64, kernel_size=5,stride=1, padding=2, bias=False)
        self.bn1 = nn.BatchNorm2d(64)
        self.block1 = BasicBlock(64, 64, stride=1)    # (Layers #2,3)
        self.block2 = BasicBlock(64, 128, stride=2)   # (Layers #4,5)
        self.final_conv = nn.Conv2d(128, 256, kernel_size=1, stride=1, bias=False)
        self.final_bn = nn.BatchNorm2d(256)
        self.global_pool = nn.AdaptiveAvgPool2d((1, 1))
        dummy_input = torch.randn(1, 3, 64, 64)
        dummy_output = self._forward_features(dummy_input)
        flattened_size = dummy_output.view(-1).size(0)
        print(f"[ResNet10Wide] Flattened size: {flattened_size}")
        self.fc1 = nn.Linear(flattened_size, 512)   # Extra FC
        #self.fc2 = nn.Linear(512, 512)             # Another dense layer
        self.classifier = nn.Linear(512, num_classes)
        self.dropout = nn.Dropout(dropout_rate)
        
    def _forward_features(self, x):
        x = F.relu(self.bn1(self.conv1(x)))
        x = self.block1(x)
        x = self.block2(x)
        #x = self.block4(x)
        x = F.relu(self.final_bn(self.final_conv(x)))
        x = self.global_pool(x)
        return x

    def forward(self, x, return_embedding=False):
        x = self._forward_features(x)
        x = x.view(x.size(0), -1)       # Flatten
        x = F.relu(self.fc1(x))         # FC1
        x = self.dropout(x)             # Dropout
        #x = F.relu(self.fc2(x))         # FC2
        if return_embedding:
            return x                    # For feature extraction
        x = self.dropout(x)
        x = self.classifier(x)          # Final classifier
        return x

class BasicBlock(nn.Module):
    expansion = 1
    def __init__(self, in_ch, out_ch, stride=1):
        super(BasicBlock, self).__init__()
        # 1st conv layer of the block
        self.conv1 = nn.Conv2d(in_ch, out_ch, kernel_size=3, stride=stride,padding=1,bias=False)
        self.bn1 = nn.BatchNorm2d(out_ch)
        # 2nd conv layer of the block
        self.conv2 = nn.Conv2d(out_ch, out_ch, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(out_ch)
        # Shortcut if shape mismatch (stride != 1 or channel change)
        self.shortcut = nn.Sequential()
        if stride != 1 or in_ch != out_ch:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_ch, out_ch, kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(out_ch)
            )
    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.bn2(self.conv2(out))
        out += self.shortcut(x)  # Skip connection
        out = F.relu(out)
        return out
    
class ResNet10(nn.Module):
    def __init__(self, num_classes, dropout_rate=gDropout_rate):
        super(ResNet10, self).__init__()
        # 1) Initial Convolution (Conv Layer #1)
        self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=2,padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(64)
        # 2) Four Residual Blocks
        self.block1 = BasicBlock(64, 64, stride=1)   # (Conv Layers #2,3)
        self.block2 = BasicBlock(64, 128, stride=2)   # (Conv Layers #4,5)
        self.block3 = BasicBlock(128, 256, stride=2)  # (Conv Layers #6,7)
        self.block4 = BasicBlock(256, 256, stride=2) # (Conv Layers #8,9)
        self.final_conv = nn.Conv2d(256, 384, kernel_size=1, stride=1, bias=False)
        self.final_bn = nn.BatchNorm2d(384)
        self.global_pool = nn.AdaptiveAvgPool2d((1, 1))
        dummy_input = torch.randn(1, 3, 64, 64)
        dummy_output = self._forward_features(dummy_input)
        flattened_size = dummy_output.view(-1).size(0)
        self.fc1 = nn.Linear(flattened_size, 512)
        self.fc2 = nn.Linear(512, 512)
        self.cls = nn.Linear(512, num_classes)
        self.dropout = nn.Dropout(dropout_rate)
        
    def _forward_features(self, x):
        x = F.relu(self.bn1(self.conv1(x)))
        x = self.block1(x)
        x = self.block2(x)
        x = self.block3(x)
        x = self.block4(x)
        x = F.relu(self.final_bn(self.final_conv(x)))
        x = self.global_pool(x)
        return x

    def forward(self, x, return_embedding=False):
        x = self._forward_features(x)
        x = x.view(x.size(0), -1)  # Flatten
        x = F.relu(self.fc1(x))
        x = self.dropout(x)
        x = F.relu(self.fc2(x))
        if return_embedding:
            return x  # Return feature vector if requested
        x = self.dropout(x)
        x = self.cls(x)
        return x

2. CNN

In [89]:
'''
gnGPU = "0"
gnDataset = 3
gBase_path = 'CAS771/Task1_data'
gnClasses = 5
gDropout_rate = 0.9
gBatch_momentum = 0.1
gLearning_rate = 0.00001
gWeight_decay = 1e-3
gOpt_Momentum = 0.9
gData_batch_size = 32
gEarly_stop_patience = 15
gnEpochs = 200
gbData_augmentation = False

'''
class ImprovedCNN(nn.Module):
    def __init__(self, num_classes,dropout_rate=0.5):
        super(ImprovedCNN, self).__init__()
        self.conv_layers = nn.Sequential(
            # Conv Block 1
            nn.Conv2d(3, 64, kernel_size=3, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            #nn.Conv2d(64, 64, kernel_size=3, padding=1),  # Extra Conv Layer
            #nn.BatchNorm2d(64),
            #nn.ReLU(),
            #nn.MaxPool2d(2, 2),
            # Conv Block 2
            nn.Conv2d(64, 128, kernel_size=3, padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(),
            nn.Conv2d(128, 128, kernel_size=3, padding=1),  # Extra Conv Layer
            nn.BatchNorm2d(128),
            nn.ReLU(),
            nn.MaxPool2d(2, 2),
            # Conv Block 3
            nn.Conv2d(128, 256, kernel_size=3, padding=1),
            nn.BatchNorm2d(256),
            nn.ReLU(),
            nn.Conv2d(256, 256, kernel_size=3, padding=1),  # Extra Conv Layer
            nn.BatchNorm2d(256),
            nn.ReLU(),
            nn.MaxPool2d(2, 2),
        )
        # **Global Average Pooling*
        self.gap = nn.AdaptiveAvgPool2d(1)  # Output: (batch_size, 256, 1, 1)
        # Fully Connected Layers
        self.fc_layers = nn.Sequential(
            nn.Linear(256, 128),  # Reduced size
            nn.ReLU(),
            nn.Dropout(0.7),  # Reduced dropout
            nn.Linear(128, num_classes)  # Output layer
        )

    def forward(self, x):

        x = self.conv_layers(x)

        x = self.gap(x)  # Apply Global Average Pooling

        x = x.view(x.size(0), -1)  # Flatten before FC layers

        return self.fc_layers(x)

3. CNN2

In [90]:
'''
gnGPU = "0"
gnDataset = 3
gBase_path = 'CAS771/Task1_data'
gnClasses = 5
gDropout_rate = 0.5
gBatch_momentum = 0.1
gLearning_rate = 0.00001
gWeight_decay = 1e-3
gOpt_Momentum = 0.9
gData_batch_size = 64
gEarly_stop_patience = 15
gnEpochs = 300
gbData_augmentation = False

'''
class CNN2(nn.Module):
    def __init__(self, num_classes, dropout_rate):
        super(CNN2, self).__init__()

        # Convolutional layers
        self.conv1 = nn.Conv2d(3, 32, kernel_size=3, padding=1)
        self.bn1 = nn.BatchNorm2d(32)
        #self.conv1_extra = nn.Conv2d(64, 64, kernel_size=5, padding=1)
        #self.bn1_extra = nn.BatchNorm2d(64)
        self.conv2 = nn.Conv2d(32,32, kernel_size=5, padding=1)
        self.bn2 = nn.BatchNorm2d(32)
        #self.conv2_extra = nn.Conv2d(128, 128, kernel_size=5, padding=1)
        #self.bn2_extra = nn.BatchNorm2d(128)
        self.conv3 = nn.Conv2d(32, 32, kernel_size=7, padding=1)
        self.bn3 = nn.BatchNorm2d(32)
        #self.conv3_extra = nn.Conv2d(256, 256, kernel_size=7, padding=1)
        #self.bn3_extra = nn.BatchNorm2d(256)

        # Pooling layers
        self.pool1 = nn.MaxPool2d(2,2)
        self.pool2 = nn.MaxPool2d(2,2)
        self.pool3 = nn.MaxPool2d(2,2)

        # Calculate the flattened size by passing dummy input
        dummy_input = torch.randn(1, 3, 64, 64)
        dummy_output = self.pool2(F.relu(self.bn3(self.conv3(self.pool1(F.relu(self.bn2(self.conv2(F.relu(self.bn1(self.conv1(dummy_input)))))))))))
        flattened_size = dummy_output.view(1, -1).size(1)
        print(f"falttened size is {flattened_size}")

        # Fully connected layers
        self.fc1 = nn.Linear(flattened_size, 32)
        self.fc2 = nn.Linear(32, num_classes)
        """self.fc_layers = nn.Sequential(
            nn.Linear(256, 128),  # Reduced size
            nn.ReLU(),
            nn.Dropout(0.5),  # Reduced dropout
            nn.Linear(128, num_classes)  # Output layer
        )"""
        self.gap = nn.AdaptiveAvgPool2d(1)
        # Dropout
        self.dropout = nn.Dropout(dropout_rate)

    def forward(self, x, return_embedding=False):
        # Convolutional layer + ReLU + Batch Normalization
        x = F.relu(self.bn1(self.conv1(x)))
       # x = F.relu(self.bn1_extra(self.conv1_extra(x)))
      #  x = self.pool1(x) # second pooling
        x = F.relu(self.bn2(self.conv2(x)))
        #x = F.relu(self.bn2_extra(self.conv2_extra(x)))
        x = self.pool2(x) # first pooling
        x = F.relu(self.bn3(self.conv3(x)))
        #x = F.relu(self.bn3_extra(self.conv3_extra(x)))
        x = self.pool3(x) # second pooling
        #x = self.gap(x)
        #x = x.view(x.size(0), -1)  # Flatten before FC layers
        #return self.fc_layers(x)
        # Flatten
        x = x.contiguous().view(x.size(0), -1)

        # Fully connected layers + Dropout
        x = self.dropout(F.relu(self.fc1(x)))

        if return_embedding:
            return x  # Feature vector 반환

        x = self.fc2(x)
        return x

※ Model Functions

In [91]:
def _get_model(model_name, dropout_rate, batch_momentum, m = 0):
    if model_name == "cnn":
        model = ImprovedCNN(num_classes=gnClasses,
                     dropout_rate=dropout_rate).to(device)
    elif model_name == "cnn2":
        model = CNN2(num_classes=gnClasses,
                     dropout_rate=dropout_rate).to(device)
    elif model_name == "ResNet10":
        model = ResNet10Wide(num_classes=gnClasses,
                     dropout_rate=dropout_rate).to(device)
    elif  model_name == "ResNet50":
        model = ResNet50(num_classes=gnClasses,
                     dropout_rate=dropout_rate).to(device)

    print(f"Model{m+1} parameters: {count_parameters(model)}")
    print(f"Model{m+1} Total layers: {count_layers(model)}")
    return model

def _get_optimizer(opname, model, learning_rate, weight_decay, opt_momentum):
    if opname == "Adam":
        return optim.Adam(model.parameters(), lr=learning_rate, weight_decay=weight_decay)
    elif opname == "SGD":
        return optim.SGD(model.parameters(), lr=learning_rate, momentum = opt_momentum, weight_decay=weight_decay)

def _get_criterion(criterion_name):
    if criterion_name == "CrossEntropyLoss":
        return nn.CrossEntropyLoss()
    return nn.CrossEntropyLoss()

def get_models(model_names, optimizer_names, criterion_names):
    models = []
    optimizers = []
    criterions = []
    for m in range(gnDataset):
        models.append(_get_model(model_names[m], gDropout_rate[m], gBatch_momentum[m], m))
        optimizers.append(_get_optimizer(optimizer_names[m], models[m], gLearning_rate[m], gWeight_decay[m], gOpt_Momentum[m]))
        criterions.append(_get_criterion(criterion_names[m]))

    return models, optimizers, criterions


def save_model(model, m=0, all=False):
    if all == True:
        model_saved_path = f"{gBase_path}/model{m+1}_all.pth"
        torch.save(model, model_saved_path)
    else:
        model_saved_path = f"{gBase_path}/model{m+1}_weights.pth"
        torch.save(model.state_dict(), model_saved_path)

    print(f"Model{m+1} is saved to: {model_saved_path}")

def load_model(model, m=0, all=False):
    if all == True:
        model_saved_path = f"{gBase_path}/model{m+1}_all.pth"
        model.load(model_saved_path)
    else:
        model_saved_path = f"{gBase_path}/model{m+1}_weights.pth"
        model.load_state_dict(torch.load(model_saved_path))

    print(f"Model{m+1} is loaded from: {model_saved_path}")


※ Training / Validating for each model

In [None]:
models, optimizers, criterions = get_models(gModel_names, gOptimizer_names, gCriterion_names)

for m in range(gnDataset):
    model = models[m]
    optimizer = optimizers[m]
    criterion = criterions[m]
    train_dataloader, test_dataloader = load_data(train_data_paths[m], test_data_paths[m], m)


    progress = CAS771Plot(model, criterion, device, train_dataloader, test_dataloader, m)
    es = CAS771EarlyStopping()

    for epoch in range(gnEpochs):
        model.train()
        progress.init_running_loss()

        for inputs, labels in train_dataloader:
            inputs, labels = inputs.to(device), labels.to(device) # move data to GPU/CPU
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            progress.add_loss(loss.item())

        if es.isStop(progress.append(epoch)):
            break
    progress.plot()
    save_model(model, m, gSaveModelAll)

    del model
torch.cuda.empty_cache()



[ResNet10Wide] Flattened size: 256
Model1 parameters: 476485
Model1 Total layers: 9
[ResNet10Wide] Flattened size: 256
Model2 parameters: 476485
Model2 Total layers: 9
[ResNet10Wide] Flattened size: 256
Model3 parameters: 476485
Model3 Total layers: 9
Train data shape: torch.Size([3197, 3, 64, 64])
Class mapping: {34: 0, 137: 1, 159: 2, 173: 3, 201: 4}
Epoch 1, Train Loss: 1.6002, Validation Loss: 1.5593, Validation Accuracy: 0.4240
Epoch 2, Train Loss: 1.5555, Validation Loss: 1.5191, Validation Accuracy: 0.4120
Epoch 3, Train Loss: 1.5200, Validation Loss: 1.4859, Validation Accuracy: 0.4120
Epoch 4, Train Loss: 1.4949, Validation Loss: 1.4584, Validation Accuracy: 0.4000
Epoch 5, Train Loss: 1.4721, Validation Loss: 1.4387, Validation Accuracy: 0.4320
Epoch 6, Train Loss: 1.4545, Validation Loss: 1.4235, Validation Accuracy: 0.4520
Epoch 7, Train Loss: 1.4366, Validation Loss: 1.4083, Validation Accuracy: 0.4360
Epoch 8, Train Loss: 1.4200, Validation Loss: 1.3930, Validation Accura

In [None]:


5
