In [2]:
import cv2
import matplotlib.pyplot as plt
import numpy as np
import os
import pickle
import torch
import torchvision
import torch.nn.functional as F
import torch.optim as optim
import umap
from torchvision import transforms
from mpl_toolkits.mplot3d import Axes3D
from sklearn.decomposition import PCA
from sklearn.linear_model import SGDClassifier
from sklearn.manifold import TSNE
from torch import nn
from torch.utils.data import Dataset, DataLoader, SubsetRandomSampler, TensorDataset
import torch.utils.data as Data
from visualization import draw_scores
from preprocessing import load_data, transfer_unit_float, normalize_channel, get_training_validation_samplers, get_priors, resize, get_transitions, serialize_object, read_pickled

In [3]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print('The training will run on __ {} __'.format(device))

The training will run on __ cpu __


In [4]:
########## load data ##########
mnist05_X_train, mnist05_Y_train, mnist05_X_test, mnist05_Y_test = load_data('FashionMNIST0.5.npz')
mnist06_X_train, mnist06_Y_train, mnist06_X_test, mnist06_Y_test = load_data('FashionMNIST0.6.npz')
cifar_X_train, cifar_Y_train, cifar_X_test, cifar_Y_test = load_data('CIFAR.npz')


########## Transfer data type and resize ##########
mnist05_X_train = resize(transfer_unit_float(mnist05_X_train))
mnist05_X_test = resize(transfer_unit_float(mnist05_X_test))
mnist06_X_train = resize(transfer_unit_float(mnist06_X_train))
mnist06_X_test = resize(transfer_unit_float(mnist06_X_test))
cifar_X_train = resize(transfer_unit_float(cifar_X_train), data_name="cifar")
cifar_X_test = resize(transfer_unit_float(cifar_X_test), data_name="cifar")


########## Preperation for train ##########
mnist05_train_set = TensorDataset(torch.from_numpy(mnist05_X_train), torch.from_numpy(mnist05_Y_train))
mnist05_test_set = TensorDataset(torch.from_numpy(mnist05_X_test), torch.from_numpy(mnist05_Y_test))

mnist06_train_set = TensorDataset(torch.from_numpy(mnist06_X_train), torch.from_numpy(mnist06_Y_train))
mnist06_test_set = TensorDataset(torch.from_numpy(mnist06_X_test), torch.from_numpy(mnist06_Y_test))

cifar_train_set = TensorDataset(torch.from_numpy(cifar_X_train), torch.from_numpy(cifar_Y_train))
cifar_test_set = TensorDataset(torch.from_numpy(cifar_X_test), torch.from_numpy(cifar_Y_test))



########## Split into trainning and validation ##########

mnist05_train_sampler, mnist05_validation_sampler = get_training_validation_samplers(mnist05_train_set)
mnist06_train_sampler, mnist06_validation_sampler = get_training_validation_samplers(mnist06_train_set)
cifar_train_sampler, cifar_validation_sampler = get_training_validation_samplers(cifar_train_set)


########## Get transitions and priors ##########
mnist05_transitions = get_transitions(data_name="mnist05")
mnist06_transitions = get_transitions(data_name="mnist06")

mnist05_priors = get_priors(mnist05_Y_test)
mnist05_noisy_priors = get_priors(mnist05_Y_train)

mnist06_priors = get_priors(mnist06_Y_test)
mnist06_noisy_priors = get_priors(mnist06_Y_train)

cifar_priors = get_priors(cifar_Y_test)
cifar_noisy_priors = get_priors(cifar_Y_train)

In [5]:
########## Different classifers: CNN, LSTM##########
class CNN(nn.Module):
    
    def __init__(self, number_classes, channel=1):
        super(CNN, self).__init__()
        self.conv1 = nn.Sequential(
            nn.Conv2d(channel, 16, kernel_size=5, stride=1, padding=2),#1.in_channel=input height, 2.out_channel=n_filters=n_feature_map（扫描框个数）, 3.kernal_size(扫描框大小), 4.stride=1（扫描框步长）, 5.padding（图片周围填充几轮）;  28=(32-5+1)
            nn.ReLU(),
            nn.MaxPool2d(2),          #框边长为2，每2个结果输出1个，28/2 = 14，压缩后image为14*14，高度变成了16
        )
        self.conv2 = nn.Sequential(
            nn.Conv2d(16, 32, kernel_size=5, stride=1, padding=2),
            nn.ReLU(),
            nn.MaxPool2d(2),          #框边长为2，每2个结果输出1个，14/2 = 7，压缩后image为7*7，高度变成了32  
        )
        self.out = nn.Linear(32 * 7 * 7, number_classes)
    
    def forward(self, x):
        x = self.conv1(x)
        x = self.conv2(x)
        x = x.view(x.size(0), -1)
        x = self.out(x)
        return x



class CNN_BN(nn.Module):
    
    def __init__(self, number_classes, channel=1):
        super(CNN_BN, self).__init__()
        self.conv1 = nn.Sequential(
            nn.Conv2d(channel, 16, kernel_size=5, stride=1, padding=2),#1.in_channel=input height, 2.out_channel=n_filters=n_feature_map（扫描框个数）, 3.kernal_size(扫描框大小), 4.stride=1（扫描框步长）, 5.padding（图片周围填充几轮）;  28=(32-5+1)
            nn.BatchNorm2d(16),
            nn.ReLU(),
            nn.MaxPool2d(2),          #框边长为2，每2个结果输出1个，28/2 = 14，压缩后image为14*14，高度变成了16
        )
        self.conv2 = nn.Sequential(
            nn.Conv2d(16, 32, kernel_size=5, stride=1, padding=2),
            nn.BatchNorm2d(32),
            nn.ReLU(),
            nn.MaxPool2d(2),          #框边长为2，每2个结果输出1个，14/2 = 7，压缩后image为7*7，高度变成了32  
        )
        self.out = nn.Linear(32 * 7 * 7, number_classes)
    
    def forward(self, x):
        x = self.conv1(x)
        x = self.conv2(x)
        x = x.view(x.size(0), -1)
        x = self.out(x)
        return x

class CNN2(nn.Module): ## with Normalization on Batch

    def __init__(self, number_classes=10, channel=3):
        super(CNN2, self).__init__()
        
        self.features = nn.Sequential(
            nn.Conv2d(channel, 32, kernel_size=5, stride=1, padding=0), # 28
            nn.BatchNorm2d(32),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2), # 14
            nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1), # 14
            nn.BatchNorm2d(64),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2), # 7
            nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=0), # 5
            nn.BatchNorm2d(128),
            nn.ReLU(inplace=True)
        )
        
        self.classifier = nn.Sequential(
            nn.Linear(5*5*128, 1024),
            nn.ReLU(inplace=True),
            nn.Dropout(),
            nn.Linear(1024, 1024),
            nn.ReLU(inplace=True),
            nn.Dropout(),
            nn.Linear(1024, number_classes),
        ) # No softmax because CrossEntropy loss does it.

    def forward(self, x):
        x = self.features(x)
        x = x.view(x.size(0), 128 * 5 * 5)
        x = self.classifier(x)
        return x

class CNN2_BN(nn.Module): ## with no Normalization on Batch

    def __init__(self, number_classes=10):
        super(CNN2_BN, self).__init__()
        
        self.features = nn.Sequential(
            nn.Conv2d(3, 32, kernel_size=5, stride=1, padding=0), # 28
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2), # 14
            nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1), # 14
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2), # 7
            nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=0), # 5
            nn.ReLU(inplace=True)
        )
        
        self.classifier = nn.Sequential(
            nn.Linear(5*5*128, 1024),
            nn.ReLU(inplace=True),
            nn.Dropout(),
            nn.Linear(1024, 1024),
            nn.ReLU(inplace=True),
            nn.Dropout(),
            nn.Linear(1024, number_classes),
        ) # No softmax because CrossEntropy loss does it.

    def forward(self, x):
        x = self.features(x)
        x = x.view(x.size(0), 128 * 5 * 5)
        x = self.classifier(x)
        return x
# batch_size*num_features*height*width


class LSTM(nn.Module):
    def __init__(self, number_classes=3):
        super(LSTM, self).__init__()
        self.f1 = nn.LSTM(input_size=28, hidden_size=64, num_layers=1, batch_first=False)
        self.f2 = nn.Linear(64, number_classes)
    def forward(self, x):
        r_out, (h_n, h_c) = self.f1(x.reshape(-1, 28, 28), None)
        out = self.f2(r_out[:, -1, :])#忘加f2
        return out
    
class LSTM_BN(nn.Module):
    def __init__(self, number_classes=3):
        super(LSTM_BN, self).__init__()
        self.f1 = nn.LSTM(input_size=28, hidden_size=64, num_layers=1, batch_first=False)
        self.f2 = nn.BatchNorm1d(28)
        self.f3 = nn.Linear(64, number_classes)
    def forward(self, x):
        r_out, (h_n, h_c) = self.f1(x.reshape(-1, 28, 28), None)
        r_out = self.f2(r_out)
        out = self.f3(r_out[:, -1, :])#忘加f2
        return out
    
class LSTM2(nn.Module):
    def __init__(self, number_classes=3):
        super(LSTM2, self).__init__()
        self.f1 = nn.LSTM(input_size=32 * 3, hidden_size=64, num_layers=3, batch_first=False)
        self.f2 = nn.Linear(64, number_classes)
    def forward(self, x):
        r_out, (h_n, h_c) = self.f1(x.reshape(-1, 32, 32*3), None)
        out = self.f2(r_out[:, -1, :])#忘加f2
        return out
    
class LSTM2_BN(nn.Module):
    def __init__(self, number_classes=3):
        super(LSTM2_BN, self).__init__()
        self.f1 = nn.LSTM(input_size=32 * 3, hidden_size=64, num_layers=3, batch_first=False)
        self.f2 = nn.BatchNorm1d(32)
        self.f3 = nn.Linear(64, number_classes)
    def forward(self, x):
        r_out, (h_n, h_c) = self.f1(x.reshape(-1, 32, 32*3), None)
        r_out = self.f2(r_out)
        out = self.f3(r_out[:, -1, :])#忘加f2
        return out

In [6]:
# Training and Evaluation
class Evaluation(object):
    
    def __init__(self):
        self.loss = []
        self.accuracy = []

    
class Recordings():
    
    def __init__(self, progress_recording_frequency, evaluation_recording_frequency):
        self.progress = []
        self.evaluation = Evaluation()
        self.progress_recording_frequency = progress_recording_frequency
        self.evaluation_recording_frequency = evaluation_recording_frequency
    
    def log_progress(self, epoch, epochs, iteration, iterations):
        print(f'\n---- Epoch {epoch + 1}/{epochs}, Iteration {iteration + 1}/{iterations}')
        print('Loss : {:.4f}'.format(self.progress[-1]))

    def log_evaluation(self, epoch, epochs):
        print(f'\n----- Recording of Evaluation for epoch {epoch + 1}/{epochs}')
        print('Loss: {:.4f}, Accuracy: {:.4f}'.format(self.evaluation.loss[-1], self.evaluation.accuracy[-1]))
        
    def display(self):
        iterations_axes = [i * self.progress_recording_frequency for i in range(len(self.progress))]

        # Progress Loss
        fig, (ax1) = plt.subplots(1, 1, figsize=(9, 2))
        ax1.plot(iterations_axes, self.progress, color='darkslategray')
        fig.suptitle('Progress Loss')
        ax1.set(xlabel='Iterations', ylabel='Loss')
        plt.show()

        iterations_axes = [i * self.evaluation_recording_frequency for i in range(len(self.evaluation.loss))]

        # Evaluation Metrics
        fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(9, 2))
        ax1.plot(iterations_axes, self.evaluation.loss, color='darkslategray')
        ax1.set(xlabel='Epochs', ylabel='Loss')
        ax2.plot(iterations_axes, self.evaluation.accuracy, color='darkslategray')
        ax2.set(xlabel='Epochs', ylabel='Accuracy')
        fig.suptitle('Evaluation Metrics Recording')
        plt.show()
        
    def show_loss_rmse(self):
        fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(9, 2))
        loss_his = self.evaluation.loss
        rmse_his = self.evaluation.accuracy
        x_loss = range(len(loss_his))
        ax1.plot(x_loss, loss_his, label='loss', color = 'b', linewidth=3)
        ax1.set_xlabel('epoches')
        ax1.set_ylabel('loss')
        ax1.legend()

        x_rmse = range(len(rmse_his))
        ax2.plot(x_rmse, rmse_his, label='rmse', color = 'r', linewidth=3)
        ax2.set_xlabel('epoches')
        ax2.set_ylabel('rmse')
        ax2.legend()
        
        plt.show()
        
def evaluate(model, loader, device, validation=False):
    # Eval flag
    model.eval()

    # Initialize metrics
    loss = 0.
    accuracy = 0.
    
    # Criterion
    criterion = nn.CrossEntropyLoss()
    
    
    with torch.no_grad():
        for (inputs, labels) in loader:
            # To device
            inputs = inputs.to(device)
            labels = labels.to(device)
            
            # Prediction
            label_outputs = model(inputs.float())

            # Update loss
            loss += criterion(label_outputs, labels).data.item()

            # Update Accuracies
            predicted_labels = label_outputs.data.max(1)[1]
            accuracy += predicted_labels.eq(labels.data).sum().item()

    # Average metrics
    loss /= len(loader)
    if validation:        
        accuracy /= len(loader.sampler.indices)
    else:
        accuracy /= len(loader.dataset)

    return loss, accuracy

In [7]:
def train(model, optimizer, criterion, dataset, device, params, test_loader, transition_matrix=None, verbose=False):
    print("Training by 10-fold and repeat with 10 epoches.....")
    if verbose:
        print('==== Start Training ====')
    # Train Flag
    model.train()
    
    # Recordings
    recordings = Recordings(params.log_progress_every, params.evaluate_model_every)

    # Number of classes
    nbr_classes = len(set(dataset.tensors[1].numpy()))

    # Type casting for inverse transition matrix
    if transition_matrix is not None:
        transition_matrix = transition_matrix.float()
    
    for epoch in range(params.epochs):

        # For shuffling, regenerate the dataloaders
        t_sampler, v_sampler = get_training_validation_samplers(dataset)
        t_loader = DataLoader(dataset, batch_size=params.batch_size, sampler=t_sampler)
        v_loader = DataLoader(dataset, batch_size=params.batch_size, sampler=v_sampler)
        
        for iteration, (inputs, labels) in enumerate(t_loader):
            # Prepare to device
            inputs = inputs.to(device)
            labels = labels.to(device)
            
            # Reset grad
            optimizer.zero_grad()

            # Outputs
            output_labels = model(inputs.float())

            # Loss
            if transition_matrix is not None:
                unique_labels = [c * torch.ones(labels.size()[0]).long().to(device) for c in range(nbr_classes)]
                losses = [criterion(output_labels, l) for l in unique_labels]
                losses = torch.stack(losses)
                corrected_losses = losses.transpose(0,1)@transition_matrix
                loss = corrected_losses.gather(1, labels.view(-1,1)).mean()
            else:
                loss = criterion(output_labels, labels).mean()

            # Optimize
            loss.backward()
            optimizer.step()
            
            # =======
            # Log Progress
            if ((iteration + 1) % params.log_progress_every == 0):
                recordings.progress.append(loss.data.item())
                if verbose:
                    recordings.log_progress(epoch, params.epochs, iteration, len(t_loader))
        
        # Evaluate
        if ((epoch + 1) % params.evaluate_model_every == 0):
            evaluation_loss, evaluation_accuracy = evaluate(model, test_loader, device, validation=False)
            recordings.evaluation.loss.append(evaluation_loss); recordings.evaluation.accuracy.append(evaluation_accuracy)

            if verbose:
                recordings.log_evaluation(epoch, params.epochs)

        # Save temporary model
        if ((epoch + 1) % params.save_model_every == 0):
            torch.save(model.state_dict(), params.model_filename)
    
    # Save final model
    torch.save(model.state_dict(), params.model_filename)

    # Save recordings
    serialize_object(recordings, params.recordings_filename)

    if verbose:
        print('==== End Training ====')
    
    return model, recordings
        

In [8]:
class Params(object):
    batch_size = 252
    epochs = 10
    learning_rate = 0.01
    momentum = 0.9
    weight_decay = 0
    
    log_progress_every = 50 # in iterations
    evaluate_model_every = 1 # in epochs

    save_model_every = 25 # in epochs
    model_filename = '{}/saves/model.pth'.format('.')
    recordings_filename = '{}/saves/recordings.pickle'.format('.')

def select_train(data_name='mnist05', classifer="cnn", optimizer_name='sgd', loss='crossentropy', estimation=True):
    params = Params()
    if classifer == 'cnn':
        model = CNN(number_classes=3)
    elif classifer == 'cnn-bn':
        model = CNN_BN(number_classes=3)
    elif classifer == 'cnn2':
        model = CNN2(number_classes=3)
    elif classifer == 'cnn2-bn':
        model = CNN2_BN(number_classes=3)
    elif classifer == 'lstm':
        model = LSTM(number_classes=3)
    elif classifer == 'lstm-bn':
        model = LSTM_BN(number_classes=3)
    elif classifer == 'lstm2':
        model = LSTM2(number_classes=3) 
    elif classifer == 'lstm2-bn':
        model = LSTM2_BN(number_classes=3) 
    else:
        print('Wrong name of classifer.')
        return
    
    if optimizer_name == 'sgd':
        optimizer = optim.SGD(model.parameters(), lr=params.learning_rate, momentum=params.momentum, weight_decay=params.weight_decay)
    elif optimizer_name == 'rmsprop':
        optimizer = optim.RMSprop(model.parameters(), lr=params.learning_rate, momentum=params.momentum, weight_decay=params.weight_decay)
    elif optimizer_name == 'adam':
        optimizer = optim.Adam(model.parameters(), lr=params.learning_rate, momentum=params.momentum, weight_decay=params.weight_decay)
    else:
        print("Wrong name of optimizer.")
        return
    criterion = nn.CrossEntropyLoss(reduction='none')   
    
    ########## Wrap data with loader ##########
    mnist05_train_loader = DataLoader(mnist05_train_set, batch_size=params.batch_size, sampler=mnist05_train_sampler)
    mnist05_validation_loader = DataLoader(mnist05_train_set, batch_size=params.batch_size, sampler=mnist05_validation_sampler)
    mnist05_test_loader = DataLoader(mnist05_test_set, batch_size=params.batch_size, shuffle=False)

    mnist06_train_loader = DataLoader(mnist06_train_set, batch_size=params.batch_size, sampler=mnist06_train_sampler)
    mnist06_validation_loader = DataLoader(mnist06_train_set, batch_size=params.batch_size, sampler=mnist06_validation_sampler)
    mnist06_test_loader = DataLoader(mnist06_test_set, batch_size=params.batch_size, shuffle=False)

    cifar_train_loader = DataLoader(cifar_train_set, batch_size=params.batch_size, sampler=cifar_train_sampler)
    cifar_validation_loader = DataLoader(cifar_train_set, batch_size=params.batch_size, sampler=cifar_validation_sampler)
    cifar_test_loader = DataLoader(cifar_test_set, batch_size=params.batch_size, shuffle=False)


    # ====================================
    # Choose dataset and Transition Matrix
    datasets_dict = {
        "mnist05": {
            "loaders": (mnist05_train_loader, mnist05_validation_loader, mnist05_test_loader),
            "dataset": mnist05_train_set,
            "testset": mnist05_test_set,
            "transition_matrix": torch.from_numpy(mnist05_transitions.T).to(device) # Transpose because defined differently than in lectures
            },
        "mnist06": {
            "loaders": (mnist06_train_loader, mnist06_validation_loader, mnist06_test_loader),
            "dataset": mnist06_train_set,
            "testset": mnist06_test_set,
            "transition_matrix": torch.from_numpy(mnist06_transitions.T).to(device) # Transpose because defined differently than in lectures
            },
        "cifar": {
            "loaders": (cifar_train_loader, cifar_validation_loader, cifar_test_loader),
            "dataset": cifar_train_set,
            "testset": cifar_test_set,
            "transition_matrix": torch.from_numpy(np.array(
                [[0.2796548,  0.37073188, 0.33353805],
                [0.37033914, 0.35247205, 0.28679701],
                [0.35000606, 0.27679607, 0.37966494]]
                ).T).to(device)
            },
    }


    train_loader, validation_loader, test_loader = datasets_dict[data_name]['loaders']
    dataset = datasets_dict[data_name]['dataset']
    testset = datasets_dict[data_name]['testset']

    if not estimation:
        transition_matrix = datasets_dict[data_name]['transition_matrix']
    else:
        transition_matrix = None
    
    model, recordings = train(model, optimizer, criterion, dataset, device, params, test_loader, transition_matrix=transition_matrix)
    #loss, accuracy = evaluate(model, test_loader, device) #
    return recordings

In [31]:
 def draw_scores(mean1, mean2, mean3, std1, std2, std3, title="", file=""):
    
    labels = ['CNN', 'CNN-BN', 'LSTM', 'LSTM-BN']

    x = np.arange(len(labels))  # the label locations
    y = np.arange(0, 1, 0.1)
    width = 0.25  # the width of the bars

    fig, ax = plt.subplots(figsize=(12, 8))#可调整柱子粗细高低
    rects1 = ax.bar(x - width, mean1, width, alpha=0.7, color='r' , yerr=std1, label='mnist05')
    rects2 = ax.bar(x, mean2, width, alpha=0.7, color='b', yerr=std2, label='mnist06')
    rects3 = ax.bar(x + width, mean3, width, alpha=0.7, color='g', yerr=std3, label='cifar')
    
    
    
    # Add some text for labels, title and custom x-axis tick labels, etc.
    ax.set_ylabel('Scores')
    ax.set_title(title)
    ax.set_xticks(x)
    ax.set_yticks(y)
    ax.set_xticklabels(labels)
    ax.legend(loc=(0.65, 0.9))
    
    for rect in rects1:
        height = rect.get_height()
        ax.annotate('{:.3f}'.format(height),
                    xy=(rect.get_x() + rect.get_width() / 2, height),
                    xytext=(0, 3),  # 3 points vertical offset
                    textcoords="offset points",
                    ha='center', va='bottom')
        
    for rect in rects2:
        height = rect.get_height()
        ax.annotate('{:.3f}'.format(height),
                    xy=(rect.get_x() + rect.get_width() / 2, height),
                    xytext=(0, 3),  # 3 points vertical offset
                    textcoords="offset points",
                    ha='center', va='bottom')
        
    for rect in rects3:
        height = rect.get_height()
        ax.annotate('{:.3f}'.format(height),
                    xy=(rect.get_x() + rect.get_width() / 2, height),
                    xytext=(0, 3),  # 3 points vertical offset
                    textcoords="offset points",
                    ha='center', va='bottom')
    
    
    
    fig.tight_layout()
    plt.savefig(file + ".jpg")

# 1. Impact of classifers with Transitions

## 1.1 Different classifers(cnn, cnn-bn, lstm, lstm-bn) by sgd

In [32]:
mean1, mean2, mean3 = [], [], []
std1, std2, std3 = [], [], []

### 1.1.1 with T

In [33]:
classifer_dict = ['cnn', 'cnn-bn', 'lstm', 'lstm-bn']
acc_list = [] # [0]: cnn, [1]: cnn-bn, [2]: lstm, [4]: lstm-bn

In [34]:
for i, classifer in enumerate(classifer_dict):
    recordings = select_train(data_name='mnist05', classifer=classifer, optimizer_name='sgd', loss='crossentropy', estimation=False)
    acc_list.append(recordings.evaluation.accuracy)
    print(classifer + ': | Mean of Accuracy: {:4.6f} | Std of Accuracy: {:4.6f} | Max of Accuracy: {:4.6f} | Min of Accuracy: {:4.6f}'\
      .format(np.mean(acc_list[i]), np.std(acc_list[i]), \
              np.max(acc_list[i]), np.min(acc_list[i])))

Training by 10-fold and repeat with 10 epoches.....
cnn: | Mean of Accuracy: 0.502133 | Std of Accuracy: 0.038739 | Max of Accuracy: 0.535333 | Min of Accuracy: 0.394667
Training by 10-fold and repeat with 10 epoches.....
cnn-bn: | Mean of Accuracy: 0.820500 | Std of Accuracy: 0.082232 | Max of Accuracy: 0.892667 | Min of Accuracy: 0.617333
Training by 10-fold and repeat with 10 epoches.....
lstm: | Mean of Accuracy: 0.724033 | Std of Accuracy: 0.035166 | Max of Accuracy: 0.755667 | Min of Accuracy: 0.633333
Training by 10-fold and repeat with 10 epoches.....
lstm-bn: | Mean of Accuracy: 0.714367 | Std of Accuracy: 0.026912 | Max of Accuracy: 0.742333 | Min of Accuracy: 0.660667


In [35]:
mean1 = [np.mean(accs) for accs in acc_list]
std1 = [np.std(stds) for stds in acc_list]

In [36]:
classifer_dict = ['cnn', 'cnn-bn', 'lstm', 'lstm-bn']
acc_list = [] # [0]: cnn, [1]: cnn-bn, [2]: lstm, [4]: lstm-bn
for i, classifer in enumerate(classifer_dict):
    recordings = select_train(data_name='mnist06', classifer=classifer, optimizer_name='sgd', loss='crossentropy', estimation=False)
    acc_list.append(recordings.evaluation.accuracy)
    print(classifer + ': | Mean of Accuracy: {:4.6f} | Std of Accuracy: {:4.6f} | Max of Accuracy: {:4.6f} | Min of Accuracy: {:4.6f}'\
      .format(np.mean(acc_list[i]), np.std(acc_list[i]), \
              np.max(acc_list[i]), np.min(acc_list[i])))
mean2 = [np.mean(accs) for accs in acc_list]
std2 = [np.std(stds) for stds in acc_list]

Training by 10-fold and repeat with 10 epoches.....
cnn: | Mean of Accuracy: 0.333333 | Std of Accuracy: 0.000000 | Max of Accuracy: 0.333333 | Min of Accuracy: 0.333333
Training by 10-fold and repeat with 10 epoches.....
cnn-bn: | Mean of Accuracy: 0.531900 | Std of Accuracy: 0.108761 | Max of Accuracy: 0.679000 | Min of Accuracy: 0.333667
Training by 10-fold and repeat with 10 epoches.....
lstm: | Mean of Accuracy: 0.439833 | Std of Accuracy: 0.032891 | Max of Accuracy: 0.480667 | Min of Accuracy: 0.377000
Training by 10-fold and repeat with 10 epoches.....
lstm-bn: | Mean of Accuracy: 0.482400 | Std of Accuracy: 0.057481 | Max of Accuracy: 0.547667 | Min of Accuracy: 0.357000


In [37]:
classifer_dict = ['cnn2', 'cnn2-bn', 'lstm2', 'lstm2-bn']
acc_list = [] # [0]: cnn, [1]: cnn-bn, [2]: lstm, [4]: lstm-bn
for i, classifer in enumerate(classifer_dict):
    recordings = select_train(data_name='cifar', classifer=classifer, optimizer_name='sgd', loss='crossentropy', estimation=False)
    acc_list.append(recordings.evaluation.accuracy)
    print(classifer + ': | Mean of Accuracy: {:4.6f} | Std of Accuracy: {:4.6f} | Max of Accuracy: {:4.6f} | Min of Accuracy: {:4.6f}'\
      .format(np.mean(acc_list[i]), np.std(acc_list[i]), \
              np.max(acc_list[i]), np.min(acc_list[i])))
mean3 = [np.mean(accs) for accs in acc_list]
std3 = [np.std(stds) for stds in acc_list]

Training by 10-fold and repeat with 10 epoches.....
cnn2: | Mean of Accuracy: 0.319900 | Std of Accuracy: 0.012831 | Max of Accuracy: 0.351667 | Min of Accuracy: 0.302333
Training by 10-fold and repeat with 10 epoches.....
cnn2-bn: | Mean of Accuracy: 0.332900 | Std of Accuracy: 0.021520 | Max of Accuracy: 0.393333 | Min of Accuracy: 0.305667
Training by 10-fold and repeat with 10 epoches.....
lstm2: | Mean of Accuracy: 0.326667 | Std of Accuracy: 0.003169 | Max of Accuracy: 0.330667 | Min of Accuracy: 0.320667
Training by 10-fold and repeat with 10 epoches.....
lstm2-bn: | Mean of Accuracy: 0.334467 | Std of Accuracy: 0.007242 | Max of Accuracy: 0.344333 | Min of Accuracy: 0.321667


In [None]:
draw_scores(mean1, mean2, mean3, std1, std2, std3, title="classifers with transition matrix by SGD", file="classifers_with_transition_sgd")

### 1.1.2 without T

In [None]:
classifer_dict = ['cnn', 'cnn-bn', 'lstm', 'lstm-bn']
acc_list = [] # [0]: cnn, [1]: cnn-bn, [2]: lstm, [4]: lstm-bn
for i, classifer in enumerate(classifer_dict):
    recordings = select_train(data_name='mnist05', classifer=classifer, optimizer_name='sgd', loss='crossentropy', estimation=True)
    acc_list.append(recordings.evaluation.accuracy)
    print(classifer + ': | Mean of Accuracy: {:4.6f} | Std of Accuracy: {:4.6f} | Max of Accuracy: {:4.6f} | Min of Accuracy: {:4.6f}'\
      .format(np.mean(acc_list[i]), np.std(acc_list[i]), \
              np.max(acc_list[i]), np.min(acc_list[i])))
mean1 = [np.mean(accs) for accs in acc_list]
std1 = [np.std(stds) for stds in acc_list]




classifer_dict = ['cnn', 'cnn-bn', 'lstm', 'lstm-bn']
acc_list = [] # [0]: cnn, [1]: cnn-bn, [2]: lstm, [4]: lstm-bn
for i, classifer in enumerate(classifer_dict):
    recordings = select_train(data_name='mnist06', classifer=classifer, optimizer_name='sgd', loss='crossentropy', estimation=True)
    acc_list.append(recordings.evaluation.accuracy)
    print(classifer + ': | Mean of Accuracy: {:4.6f} | Std of Accuracy: {:4.6f} | Max of Accuracy: {:4.6f} | Min of Accuracy: {:4.6f}'\
      .format(np.mean(acc_list[i]), np.std(acc_list[i]), \
              np.max(acc_list[i]), np.min(acc_list[i])))
mean2 = [np.mean(accs) for accs in acc_list]
std2 = [np.std(stds) for stds in acc_list]


classifer_dict = ['cnn2', 'cnn2-bn', 'lstm2', 'lstm2-bn']
acc_list = [] # [0]: cnn, [1]: cnn-bn, [2]: lstm, [4]: lstm-bn
for i, classifer in enumerate(classifer_dict):
    recordings = select_train(data_name='cifar', classifer=classifer, optimizer_name='sgd', loss='crossentropy', estimation=True)
    acc_list.append(recordings.evaluation.accuracy)
    print(classifer + ': | Mean of Accuracy: {:4.6f} | Std of Accuracy: {:4.6f} | Max of Accuracy: {:4.6f} | Min of Accuracy: {:4.6f}'\
      .format(np.mean(acc_list[i]), np.std(acc_list[i]), \
              np.max(acc_list[i]), np.min(acc_list[i])))
mean3 = [np.mean(accs) for accs in acc_list]
std3 = [np.std(stds) for stds in acc_list]

draw_scores(mean1, mean2, mean3, std1, std2, std3, title="classifers with no transition matrix by SGD", file="classifers_with_no_transition_sgd")


Training by 10-fold and repeat with 10 epoches.....
cnn: | Mean of Accuracy: 0.666667 | Std of Accuracy: 0.056024 | Max of Accuracy: 0.720667 | Min of Accuracy: 0.542333
Training by 10-fold and repeat with 10 epoches.....
cnn-bn: | Mean of Accuracy: 0.883533 | Std of Accuracy: 0.095701 | Max of Accuracy: 0.930333 | Min of Accuracy: 0.598667
Training by 10-fold and repeat with 10 epoches.....
lstm: | Mean of Accuracy: 0.743500 | Std of Accuracy: 0.017149 | Max of Accuracy: 0.771333 | Min of Accuracy: 0.713333
Training by 10-fold and repeat with 10 epoches.....
lstm-bn: | Mean of Accuracy: 0.742467 | Std of Accuracy: 0.008265 | Max of Accuracy: 0.754667 | Min of Accuracy: 0.725333
Training by 10-fold and repeat with 10 epoches.....
cnn: | Mean of Accuracy: 0.412167 | Std of Accuracy: 0.017428 | Max of Accuracy: 0.419667 | Min of Accuracy: 0.360000
Training by 10-fold and repeat with 10 epoches.....


## 1.2 Different classifers(cnn, cnn-bn, lstm, lstm-bn) by RMSprop

In [None]:
mean1, mean2, mean3 = [], [], []
std1, std2, std3 = [], [], []

### 1.2.1 with T

In [None]:
classifer_dict = ['cnn', 'cnn-bn', 'lstm', 'lstm-bn']
acc_list = [] # [0]: cnn, [1]: cnn-bn, [2]: lstm, [4]: lstm-bn
for i, classifer in enumerate(classifer_dict):
    recordings = select_train(data_name='mnist05', classifer=classifer, optimizer_name='rmsprop', loss='crossentropy', estimation=False)
    acc_list.append(recordings.evaluation.accuracy)
    print(classifer + ': | Mean of Accuracy: {:4.6f} | Std of Accuracy: {:4.6f} | Max of Accuracy: {:4.6f} | Min of Accuracy: {:4.6f}'\
      .format(np.mean(acc_list[i]), np.std(acc_list[i]), \
              np.max(acc_list[i]), np.min(acc_list[i])))
mean1 = [np.mean(accs) for accs in acc_list]
std1 = [np.std(stds) for stds in acc_list]




classifer_dict = ['cnn', 'cnn-bn', 'lstm', 'lstm-bn']
acc_list = [] # [0]: cnn, [1]: cnn-bn, [2]: lstm, [4]: lstm-bn
for i, classifer in enumerate(classifer_dict):
    recordings = select_train(data_name='mnist06', classifer=classifer, optimizer_name='rmsprop', loss='crossentropy', estimation=False)
    acc_list.append(recordings.evaluation.accuracy)
    print(classifer + ': | Mean of Accuracy: {:4.6f} | Std of Accuracy: {:4.6f} | Max of Accuracy: {:4.6f} | Min of Accuracy: {:4.6f}'\
      .format(np.mean(acc_list[i]), np.std(acc_list[i]), \
              np.max(acc_list[i]), np.min(acc_list[i])))
mean2 = [np.mean(accs) for accs in acc_list]
std2 = [np.std(stds) for stds in acc_list]


classifer_dict = ['cnn2', 'cnn2-bn', 'lstm2', 'lstm2-bn']
acc_list = [] # [0]: cnn, [1]: cnn-bn, [2]: lstm, [4]: lstm-bn
for i, classifer in enumerate(classifer_dict):
    recordings = select_train(data_name='cifar', classifer=classifer, optimizer_name='rmsprop', loss='crossentropy', estimation=False)
    acc_list.append(recordings.evaluation.accuracy)
    print(classifer + ': | Mean of Accuracy: {:4.6f} | Std of Accuracy: {:4.6f} | Max of Accuracy: {:4.6f} | Min of Accuracy: {:4.6f}'\
      .format(np.mean(acc_list[i]), np.std(acc_list[i]), \
              np.max(acc_list[i]), np.min(acc_list[i])))
mean3 = [np.mean(accs) for accs in acc_list]
std3 = [np.std(stds) for stds in acc_list]

draw_scores(mean1, mean2, mean3, std1, std2, std3, title="classifers with transition matrix RMSprop", file="classifers_with_transition_RMS")


### 1.2.2 without T

In [None]:
classifer_dict = ['cnn', 'cnn-bn', 'lstm', 'lstm-bn']
acc_list = [] # [0]: cnn, [1]: cnn-bn, [2]: lstm, [4]: lstm-bn
for i, classifer in enumerate(classifer_dict):
    recordings = select_train(data_name='mnist05', classifer=classifer, optimizer_name='rmsprop', loss='crossentropy', estimation=True)
    acc_list.append(recordings.evaluation.accuracy)
    print(classifer + ': | Mean of Accuracy: {:4.6f} | Std of Accuracy: {:4.6f} | Max of Accuracy: {:4.6f} | Min of Accuracy: {:4.6f}'\
      .format(np.mean(acc_list[i]), np.std(acc_list[i]), \
              np.max(acc_list[i]), np.min(acc_list[i])))
mean1 = [np.mean(accs) for accs in acc_list]
std1 = [np.std(stds) for stds in acc_list]




classifer_dict = ['cnn', 'cnn-bn', 'lstm', 'lstm-bn']
acc_list = [] # [0]: cnn, [1]: cnn-bn, [2]: lstm, [4]: lstm-bn
for i, classifer in enumerate(classifer_dict):
    recordings = select_train(data_name='mnist06', classifer=classifer, optimizer_name='rmsprop', loss='crossentropy', estimation=True)
    acc_list.append(recordings.evaluation.accuracy)
    print(classifer + ': | Mean of Accuracy: {:4.6f} | Std of Accuracy: {:4.6f} | Max of Accuracy: {:4.6f} | Min of Accuracy: {:4.6f}'\
      .format(np.mean(acc_list[i]), np.std(acc_list[i]), \
              np.max(acc_list[i]), np.min(acc_list[i])))
mean2 = [np.mean(accs) for accs in acc_list]
std2 = [np.std(stds) for stds in acc_list]


classifer_dict = ['cnn2', 'cnn2-bn', 'lstm2', 'lstm2-bn']
acc_list = [] # [0]: cnn, [1]: cnn-bn, [2]: lstm, [4]: lstm-bn
for i, classifer in enumerate(classifer_dict):
    recordings = select_train(data_name='cifar', classifer=classifer, optimizer_name='rmsprop', loss='crossentropy', estimation=True)
    acc_list.append(recordings.evaluation.accuracy)
    print(classifer + ': | Mean of Accuracy: {:4.6f} | Std of Accuracy: {:4.6f} | Max of Accuracy: {:4.6f} | Min of Accuracy: {:4.6f}'\
      .format(np.mean(acc_list[i]), np.std(acc_list[i]), \
              np.max(acc_list[i]), np.min(acc_list[i])))
mean3 = [np.mean(accs) for accs in acc_list]
std3 = [np.std(stds) for stds in acc_list]

draw_scores(mean1, mean2, mean3, std1, std2, std3, title="classifers without transition matrix RMSprop", file="classifers_no_transition_RMS")

## 1.3 Different classifers(cnn, cnn-bn, lstm, lstm-bn) by Adam

In [None]:
mean1, mean2, mean3 = [], [], []
std1, std2, std3 = [], [], []

### 1.3.1 with T

In [None]:
classifer_dict = ['cnn', 'cnn-bn', 'lstm', 'lstm-bn']
acc_list = [] # [0]: cnn, [1]: cnn-bn, [2]: lstm, [4]: lstm-bn
for i, classifer in enumerate(classifer_dict):
    recordings = select_train(data_name='mnist05', classifer=classifer, optimizer_name='adam', loss='crossentropy', estimation=False)
    acc_list.append(recordings.evaluation.accuracy)
    print(classifer + ': | Mean of Accuracy: {:4.6f} | Std of Accuracy: {:4.6f} | Max of Accuracy: {:4.6f} | Min of Accuracy: {:4.6f}'\
      .format(np.mean(acc_list[i]), np.std(acc_list[i]), \
              np.max(acc_list[i]), np.min(acc_list[i])))
mean1 = [np.mean(accs) for accs in acc_list]
std1 = [np.std(stds) for stds in acc_list]




classifer_dict = ['cnn', 'cnn-bn', 'lstm', 'lstm-bn']
acc_list = [] # [0]: cnn, [1]: cnn-bn, [2]: lstm, [4]: lstm-bn
for i, classifer in enumerate(classifer_dict):
    recordings = select_train(data_name='mnist06', classifer=classifer, optimizer_name='adam', loss='crossentropy', estimation=False)
    acc_list.append(recordings.evaluation.accuracy)
    print(classifer + ': | Mean of Accuracy: {:4.6f} | Std of Accuracy: {:4.6f} | Max of Accuracy: {:4.6f} | Min of Accuracy: {:4.6f}'\
      .format(np.mean(acc_list[i]), np.std(acc_list[i]), \
              np.max(acc_list[i]), np.min(acc_list[i])))
mean2 = [np.mean(accs) for accs in acc_list]
std2 = [np.std(stds) for stds in acc_list]


classifer_dict = ['cnn2', 'cnn2-bn', 'lstm2', 'lstm2-bn']
acc_list = [] # [0]: cnn, [1]: cnn-bn, [2]: lstm, [4]: lstm-bn
for i, classifer in enumerate(classifer_dict):
    recordings = select_train(data_name='cifar', classifer=classifer, optimizer_name='adam', loss='crossentropy', estimation=False)
    acc_list.append(recordings.evaluation.accuracy)
    print(classifer + ': | Mean of Accuracy: {:4.6f} | Std of Accuracy: {:4.6f} | Max of Accuracy: {:4.6f} | Min of Accuracy: {:4.6f}'\
      .format(np.mean(acc_list[i]), np.std(acc_list[i]), \
              np.max(acc_list[i]), np.min(acc_list[i])))
mean3 = [np.mean(accs) for accs in acc_list]
std3 = [np.std(stds) for stds in acc_list]

draw_scores(mean1, mean2, mean3, std1, std2, std3, title="classifers with transition matrix Adam", file="classifers_with_transition_adam")


### 1.3.2 without T

In [None]:
classifer_dict = ['cnn', 'cnn-bn', 'lstm', 'lstm-bn']
acc_list = [] # [0]: cnn, [1]: cnn-bn, [2]: lstm, [4]: lstm-bn
for i, classifer in enumerate(classifer_dict):
    recordings = select_train(data_name='mnist05', classifer=classifer, optimizer_name='adam', loss='crossentropy', estimation=True)
    acc_list.append(recordings.evaluation.accuracy)
    print(classifer + ': | Mean of Accuracy: {:4.6f} | Std of Accuracy: {:4.6f} | Max of Accuracy: {:4.6f} | Min of Accuracy: {:4.6f}'\
      .format(np.mean(acc_list[i]), np.std(acc_list[i]), \
              np.max(acc_list[i]), np.min(acc_list[i])))
mean1 = [np.mean(accs) for accs in acc_list]
std1 = [np.std(stds) for stds in acc_list]




classifer_dict = ['cnn', 'cnn-bn', 'lstm', 'lstm-bn']
acc_list = [] # [0]: cnn, [1]: cnn-bn, [2]: lstm, [4]: lstm-bn
for i, classifer in enumerate(classifer_dict):
    recordings = select_train(data_name='mnist06', classifer=classifer, optimizer_name='adam', loss='crossentropy', estimation=True)
    acc_list.append(recordings.evaluation.accuracy)
    print(classifer + ': | Mean of Accuracy: {:4.6f} | Std of Accuracy: {:4.6f} | Max of Accuracy: {:4.6f} | Min of Accuracy: {:4.6f}'\
      .format(np.mean(acc_list[i]), np.std(acc_list[i]), \
              np.max(acc_list[i]), np.min(acc_list[i])))
mean2 = [np.mean(accs) for accs in acc_list]
std2 = [np.std(stds) for stds in acc_list]


classifer_dict = ['cnn2', 'cnn2-bn', 'lstm2', 'lstm2-bn']
acc_list = [] # [0]: cnn, [1]: cnn-bn, [2]: lstm, [4]: lstm-bn
for i, classifer in enumerate(classifer_dict):
    recordings = select_train(data_name='cifar', classifer=classifer, optimizer_name='adam', loss='crossentropy', estimation=True)
    acc_list.append(recordings.evaluation.accuracy)
    print(classifer + ': | Mean of Accuracy: {:4.6f} | Std of Accuracy: {:4.6f} | Max of Accuracy: {:4.6f} | Min of Accuracy: {:4.6f}'\
      .format(np.mean(acc_list[i]), np.std(acc_list[i]), \
              np.max(acc_list[i]), np.min(acc_list[i])))
mean3 = [np.mean(accs) for accs in acc_list]
std3 = [np.std(stds) for stds in acc_list]

draw_scores(mean1, mean2, mean3, std1, std2, std3, title="classifers without transition matrix Adam", file="classifers_no_transition_adam")


For MNIST 0.5 

Benchmark accuracy on test dataset (crossvalidated) (10 epochs):  0.892667 (std 0.082232)

Accuracy Estimations:



| optimizer | No T | Given T | 
| ----------| ---- | ------- | 
|   sgd     |   0.8627 (0.0440)   |   0.7263 (0.1993)      | 
|   RMSprop    |   0.9313 (0.0049)   |   0.9140 (0.0123)      |             |
|   Adam   |   0.6211 (0.0315)   |   0.9029 (0.0146)      |             |


In [16]:
params = Params()
########## Wrap data with loader ##########
mnist05_train_loader = DataLoader(mnist05_train_set, batch_size=params.batch_size, sampler=mnist05_train_sampler)
mnist05_validation_loader = DataLoader(mnist05_train_set, batch_size=params.batch_size, sampler=mnist05_validation_sampler)
mnist05_test_loader = DataLoader(mnist05_test_set, batch_size=params.batch_size, shuffle=False)

mnist06_train_loader = DataLoader(mnist06_train_set, batch_size=params.batch_size, sampler=mnist06_train_sampler)
mnist06_validation_loader = DataLoader(mnist06_train_set, batch_size=params.batch_size, sampler=mnist06_validation_sampler)
mnist06_test_loader = DataLoader(mnist06_test_set, batch_size=params.batch_size, shuffle=False)

cifar_train_loader = DataLoader(cifar_train_set, batch_size=params.batch_size, sampler=cifar_train_sampler)
cifar_validation_loader = DataLoader(cifar_train_set, batch_size=params.batch_size, sampler=cifar_validation_sampler)
cifar_test_loader = DataLoader(cifar_test_set, batch_size=params.batch_size, shuffle=False)


# ====================================
# Choose dataset and Transition Matrix
datasets_dict = {
    "mnist05": {
        "loaders": (mnist05_train_loader, mnist05_validation_loader, mnist05_test_loader),
        "dataset": mnist05_train_set,
        "testset": mnist05_test_set,
        "transition_matrix": torch.from_numpy(mnist05_transitions.T).to(device) # Transpose because defined differently than in lectures
        },
    "mnist06": {
        "loaders": (mnist06_train_loader, mnist06_validation_loader, mnist06_test_loader),
        "dataset": mnist06_train_set,
        "testset": mnist06_test_set,
        "transition_matrix": torch.from_numpy(mnist06_transitions.T).to(device) # Transpose because defined differently than in lectures
        },
    "cifar": {
        "loaders": (cifar_train_loader, cifar_validation_loader, cifar_test_loader),
        "dataset": cifar_train_set,
        "testset": cifar_test_set,
        "transition_matrix": torch.from_numpy(np.array(
            [[0.2796548,  0.37073188, 0.33353805],
            [0.37033914, 0.35247205, 0.28679701],
            [0.35000606, 0.27679607, 0.37966494]]
            ).T).to(device)
        },
}
# ==============
# Change dataset here
data_name = 'cifar'
# Change whether to train the network for transition matrix estimation
ESTIMATION = True
# ==============





if not ESTIMATION:
    transition_matrix = datasets_dict[DATASET]['transition_matrix']
else:
    transition_matrix = None

train_loader, validation_loader, test_loader = datasets_dict[data_name]['loaders']
dataset = datasets_dict[data_name]['dataset']
testset = datasets_dict[data_name]['testset']

# Note that the transition matrix estimated is actually the transpose of the transition matrix defined in report.
def estimate_transition_matrix(trained_model, train_set, test_set, device):
    
    # First, find the anchor point xi
    nbr_classes = len(set(train_set.tensors[1].numpy()))
    nbr_train_samples = len((train_set.tensors[1].numpy()))

    x_i = []
    softmax = nn.Softmax(dim=1) # Softmax needed as it is not included in the architecture
    with torch.no_grad():
        trained_model.eval()
        all_outputs_train = trained_model(train_set.tensors[0].float().to(device)).cpu().numpy()
        all_outputs_test = trained_model(test_set.tensors[0].float().to(device)).cpu().numpy()

    # Merge train set and test set to get a bigger set to draw our samples from
    all_outputs = np.concatenate((all_outputs_train, all_outputs_test))
    x_i_indices = np.argmax(all_outputs, axis=0)

    estimated_transition_matrix = np.zeros((nbr_classes,nbr_classes))
    with torch.no_grad():
        trained_model.eval()
        for i, index in enumerate(x_i_indices):
            if index < nbr_train_samples:
                p_i = softmax(trained_model(train_set.tensors[0][index].float().to(device).view(1, 3, 32, 32)))
            else:
                test_index = index - nbr_train_samples
                p_i = softmax(trained_model(train_set.tensors[0][test_index].float().to(device).view(1, 3, 32, 32)))
            estimated_transition_matrix[i,:] = p_i.cpu().numpy()[0]

    return estimated_transition_matrix

# Generalization for estimation of the transtion matrix

N = 5
ESTIMATION = True
params.epochs = 7

nbr_classes = len(set(dataset.tensors[1].numpy()))
estimates = np.zeros((N, nbr_classes, nbr_classes))
for n in range(N):
    model = CNN2(number_classes=nbr_classes).to(device).float()
    criterion = nn.CrossEntropyLoss(reduction='none')
    optimizer = optim.SGD(model.parameters(), lr=params.learning_rate, momentum=params.momentum, weight_decay=params.weight_decay)
    trained_model, _ = train(model, optimizer, criterion, dataset, device, params, test_loader, transition_matrix=transition_matrix, verbose=False)
    estimates[n,:,:] = estimate_transition_matrix(trained_model, dataset, testset, device)

print('========\nEstimated Transition Matrix:\n{}\n--------\nStandard Deviation:\n{}\n========'.format(
    np.mean(estimates, axis=0), np.std(estimates, axis=0)
    )
)

Training by 10-fold and repeat with 10 epoches.....
Training by 10-fold and repeat with 10 epoches.....
Training by 10-fold and repeat with 10 epoches.....
Training by 10-fold and repeat with 10 epoches.....
Training by 10-fold and repeat with 10 epoches.....
Estimated Transition Matrix:
[[0.58491454 0.16692027 0.2481652 ]
 [0.20071659 0.55103179 0.24825158]
 [0.22351401 0.18679138 0.5896946 ]]
--------
Standard Deviation:
[[0.04851773 0.03867952 0.05788719]
 [0.07455217 0.17647435 0.12320584]
 [0.06653687 0.05194869 0.09505719]]


In [17]:
32 * 7 * 7

1568