In [1]:
import torch
import torch.nn as nn
import numpy as np
import math


def kronecker(matrix1, matrix2):
    return torch.ger(matrix1.view(-1), matrix2.view(-1)).reshape(*(matrix1.size() + matrix2.size())).permute([0, 2, 1, 3]).reshape(matrix1.size(0) * matrix2.size(0), matrix1.size(1) * matrix2.size(1))



class simple_fcnn(nn.Module):
    '''
    2 layer feed forward neural network.
    Will use leaky ReLU activation functions.
    Activation = {'relu', 'linear'}
    '''

    def __init__(self, Input_size=3072, Hidden_size=3072, Output_size=1, Activation="relu"):
        super(simple_fcnn, self).__init__()
        '''
        Inputs: Input_size, Hidden_size, Output_size, Activation
        '''
        # Initialize architecture parameters
        self.Input_size = Input_size
        self.Hidden_size = Hidden_size
        self.Output_size = Output_size
        self.Activation = Activation


        # Initialize weights through He initialization (by default in nn.Linear)

        self.i2h = nn.Linear(Input_size, Hidden_size, bias=True)
        self.i2h.bias = torch.nn.Parameter(torch.zeros_like(self.i2h.bias))
#         self.i2h.weight = torch.nn.init.normal_(self.i2h.weight, mean=0.0, std=math.sqrt(2/(Input_size)))
        self.i2h.weight = torch.nn.init.kaiming_normal_(self.i2h.weight, a=0.01)


        # Initialize densly connected output layer
        self.h2o = nn.Linear(Hidden_size, Output_size)
        self.h2o.bias = torch.nn.Parameter(torch.zeros_like(self.h2o.bias))
        self.h2o.weight = torch.nn.init.kaiming_normal_(self.h2o.weight, a=0.01)

        # Initialize nonlinearities
        self.relu = nn.LeakyReLU()
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        '''
        Forward step for network. Establishes Architecture.
        Inputs: Input
        Outputs: Output
        '''
        # Prepare input for appropriate architecture


        # Set Activation function to calculate hidden layer

        if self.Activation == 'relu':
            Hidden = self.relu(self.i2h(x))
        else:
            Hidden = self.i2h(x)

        # Calculate Output layer
        Output = self.sigmoid(self.h2o(Hidden))
        return(Output)

class ktree_gen(nn.Module):
    '''
    k-Tree neural network
    '''

    def __init__(self, ds='mnist', Activation="relu", Sparse=True,
                 Input_order=None, Repeats=1, Padded=False):
        super(ktree_gen, self).__init__()
        '''
        Inputs: ds (dataset), activation, sparse, input_order, repeats, padded
        '''
        # Initialize architecture parameters
        self.ds = ds
        self.Activation = Activation
        self.Sparse = Sparse
        self.Input_order = Input_order
        self.Repeats = Repeats

        # Initialize weights
        # Set biases to 0
        # Set kaiming initialize weights with gain to correct for sparsity
        # Set freeze masks

        #Specify tree dimensions
        # If using 28x28 datasets...
        if (ds == 'mnist') or (ds == 'fmnist') or (ds == 'kmnist') or (ds == 'emnist'):
            # If padded, use 1024 sized tree, completely binary tree
            if Padded:
                self.k = [1024, 512, 256, 128, 64, 32, 16, 8, 4, 2, 1]
            # If not padded, use 784 sized tree,
            # 7:1 between layers 1 and 2, and layers 2 and 3
            else:
                self.k = [784, 112, 16, 8, 4, 2, 1]
        # If using 3x32x32 datasets...
        elif (ds == 'svhn') or (ds == 'cifar10'):
            # Use 3072 sized tree
            # 3:1 between layers 1 and 2, otherwise binary
            self.k = [3072, 1024, 512, 256, 128, 64, 32, 16, 8, 4, 2, 1]
        # If using 16x16 datasets...
        elif ds == 'usps':
            # Use 256 sized tree
            self.k = [256, 128, 64, 32, 16, 8, 4, 2, 1]
        else:
            print('Select a dataset')
            return(None)

        # Make layers of tree architecture

        # Name each layer in each subtree for reference later
        self.names = np.empty((self.Repeats, len(self.k)-1),dtype=object)
        # Initialize freeze mask for use in training loop
        self.freeze_mask_set = []
        # For each repeat or subtree, make a sparse layer that is initialized correctly
        for j in range(self.Repeats):
            # For each layer within each subtree
            for i in range(len(self.k)-1):
                # Assign name of the layer, indexed by layer (i) and subtree (j)
                name = ''.join(['w',str(j),'_',str(i)])
                # Initialize the layer with the appropriate name
                self.add_module(name, nn.Linear(self.k[i],self.k[i+1]))
                # Set bias of layer to zeros
                self._modules[name].bias = nn.Parameter(torch.zeros_like(self._modules[name].bias))
                # Use custom method to re-initialize the layer weights and create freeze mask for that layer
                self._modules[name].weight.data, freeze_mask = self.initialize(self._modules[name])
                # Add the layer name to the list of names
                self.names[j,i] = name
                # Set the freeze mask for the first subtree, which should be the same for all subtrees
                if j < 1:
                    self.freeze_mask_set.append(freeze_mask)

        # Initialize root node, aka soma node aka output node
        self.root = nn.Linear(Repeats, 1)

        # Initialize nonlinearities
        self.relu = nn.LeakyReLU()
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        '''
        Forward step for network. Establishes Architecture.
        Inputs: Input
        Outputs: Output
        '''

        y_out = []
        # Step through every layer in each subtree of model, applying nonlinearities
        for j in range(self.Repeats):
            y = x
            for i in range(len(self.k)-1):
                if self.Activation == 'relu':
                    y = self.relu(self._modules[self.names[j,i]](y))
                else:
                    y = self._modules[self.names[j,i]](y)
            # keep track of pen-ultimate layer outputs
            y_out.append(y)

        # Calculate final output, joining the outputs of each subtree together
        combined_output = torch.cat(y_out, dim=1)
        self.root.weight.data.clamp_(0)  # Non-negative constraint on root weights
        output = self.sigmoid(self.root(combined_output))

        return(output)

    def initialize(self, layer):
        # Kaiming initialize weights accounting for sparsity

        # Extract weights from layer we are reinitializing
        weights = layer.weight.data
        # If sparse, change the initializations based on density (sparsity)
        if self.Sparse:
            if weights.shape[1] == 3072: # first layer of 3x32x32 image datasets
                inp_block = torch.ones((1,3))
            elif (weights.shape[1] == 784) or (weights.shape[1] == 112): # first or second layer of 28x28 datasets
                inp_block = torch.ones((1,7))
            else:
                inp_block = torch.ones((1,2)) # all other layers (or 32x32)

            # Set up mask for where each node receives a set of inputs of equal size to the input block
            inp_mask = kronecker(torch.eye(weights.shape[0]), inp_block)

            # Calculate density
            density = len(np.where(inp_mask)[0])/len(inp_mask.reshape(-1))

            # Generate Kaiming initialization with gain = 1/density
            weights = torch.nn.init.normal_(weights, mean=0.0, std=math.sqrt(2/(weights.shape[1]*density)))
            # Make weights positive
            weights.abs_()

            # Where no inputs will be received, set weights to zero
            weights[inp_mask == 0] = 0
        else: # If not sparse, use typical kaiming normalization
            weights = torch.nn.init.normal_(weights, mean=0.0, std=math.sqrt(2/(weights.shape[1])))
            weights.abs_()

        # Generate freeze mask for use in training to keep weights initialized to zero at zero
        mask_gen = torch.zeros_like(weights)
        # Indicate where weights are equal to zero
        freeze_mask = mask_gen == weights

        return(weights, freeze_mask)

In [2]:
import torch
from torch.utils.data import DataLoader
import torch.nn as nn
import numpy as np
import torchvision
from torchvision import transforms

def format_data_weighted(Data_set, Target_class_1, Target_class_2, Data_weighting='default', permute=False, padded=True):
    '''
    Change labels so that target class is of value 1 and all other classes are
    of value 0. Dataset will be a 3 member tuple: data, label_binary, label_default.
    Inputs: Data_set, Target_class_1, Target_class_2, Data_weighting, permute, padded
    Outputs: Data_set_formatted
    '''
    # If Data_weighting == paired, then only the 2 classes, with 1:1 data weighting, are returned labeled 0 and 1
    if Data_weighting == 'paired':
        # Load intire dataset with batch size equal to entire dataset
        Loader = DataLoader(Data_set, batch_size=len(Data_set), shuffle=True)
        for _, (inputs, labels) in enumerate(Loader):
            # data and label_default contain entire dataset
            data = inputs
            label_default = labels

        # Filter out all classes except Target classes
        # Get indices for filter
        selector_1 = np.where(label_default.numpy() == Target_class_1)
        selector_2 = np.where(label_default.numpy() == Target_class_2)

        # Filter labels and data (images)
        label_1 = label_default[selector_1]
        data_1 = data[selector_1]
        label_2 = label_default[selector_2]
        data_2 = data[selector_2]

        # combine filtered data and label for each class
        label_pair = torch.cat((label_1, label_2), 0)
        data_pair = torch.cat((data_1, data_2), 0)

        # Assign binary labels to each class
        label_binary = np.where(label_pair.numpy() == Target_class_1, 1, 0)
        label_binary = torch.from_numpy(label_binary).long()
        ## permute with get_permutation function
        if permute:
            if data_pair.shape[1:] == torch.Size([1,28,28]):
                # Pad data to make it 32x32
                padding_f = torch.nn.ZeroPad2d(2)
                data_pair = padding_f(data_pair)
                # make 2d image a 1d array
                data_pair = data_pair.view(len(data_pair),-1)
                # get permutation
                perm_idx = get_permutation(5)
                # Permute data
                data_pair = data_pair[:,perm_idx]
            elif data_pair.shape[1:] == torch.Size([3,32,32]): #for CIFAR10 and SVHN datasets
                # make 2d image a 1d array
                data_pair = data_pair.view(len(data_pair),-1)
                # get permutation
                perm_idx = get_permutation(5)
                perm_idx = np.concatenate((perm_idx,perm_idx,perm_idx),0)
                # permute data
                data_pair = data_pair[:,perm_idx]
            else:
                # make 2d image a 1d array
                data_pair = data_pair.view(len(data_pair),-1)
                # get permutation
                perm_idx = get_permutation(4)
                # permute data
                data_pair = data_pair[:,perm_idx]
        else: # Only if padding, then pad 28x28 to 32x32
            if padded and data_pair.shape[1:] == torch.Size([1,28,28]):
                # Pad data
                padding_f = torch.nn.ZeroPad2d(2)
                data_pair = padding_f(data_pair)
            # make 2d image a 1d array
            data_pair = data_pair.view(len(data_pair),-1)
        # Put now formatted data and labels into a dataset
        Data_set_formatted = torch.utils.data.TensorDataset(data_pair, label_binary, label_pair)

    else: # Keep all classes, only label target class with 1 and all others with 0
        # Load intire dataset with batch size equal to entire dataset
        Loader = DataLoader(Data_set, batch_size=len(Data_set), shuffle=True)
        for _, (inputs, labels) in enumerate(Loader):
            # data and label_default contain entire dataset
            data = inputs
            label_default = labels

        # Assign binary labels to each class
        label_binary = np.where(labels.numpy() == Target_class_1, 1, 0)
        label_binary = torch.from_numpy(label_binary).long()
    ## permute with get_permutation function
        if permute:
            if data.shape[1:] == torch.Size([1,28,28]):
                # Pad data
                padding_f = torch.nn.ZeroPad2d(2)
                data = padding_f(data)
                # make 2d image a 1d array
                data = data.view(len(data),-1)
                # get permutation
                perm_idx = get_permutation(5)
                # Permute data
                data = data[:,perm_idx]
            elif data.shape[1:] == torch.Size([3,32,32]):
                # make 2d image a 1d array
                data = data.view(len(data),-1)
                # get permutation
                perm_idx = get_permutation(5)
                perm_idx = np.concatenate((perm_idx,perm_idx,perm_idx),0)
                # Permute data
                data = data[:,perm_idx]
            else:
                # make 2d image a 1d array
                data = data.view(len(data),-1)
                # get permutation
                perm_idx = get_permutation(4)
                # Permute data
                data = data[:,perm_idx]
        else:
            # make 2d image a 1d array
            data = data.view(len(data),-1)
        # Put now formatted data and labels into a dataset
        Data_set_formatted = torch.utils.data.TensorDataset(data, label_binary, labels)

    return Data_set_formatted


def dataset_weighted_split_all(Batch_size=32, Target_class_1=0, Target_class_2=1,
                                 Data_weighting='default', Split=5, ds='mnist', permute=False,
                                 padded=True):
    '''
    Produces dataset that will be fed into a network model.
    Inputs: Batch_size, Target_class_1, Target_class_2, Data_weighting, Split, dataset, permute, padded
    Outputs: set of Train_loaders, set of Valid_loaders, a single Test_loader
    '''
    transform = transforms.ToTensor()

    # Load Datasets
    if ds == 'mnist':
        Train_set = torchvision.datasets.MNIST(root='./data', train=True,
                                               download=True, transform=transform)
        Test_set  = torchvision.datasets.MNIST(root='./data', train=False,
                                               download=True, transform=transform)
    elif ds == 'fmnist':
        Train_set = torchvision.datasets.FashionMNIST(root='./fmdata', train=True,
                                           download=True, transform=transform)
        Test_set  = torchvision.datasets.FashionMNIST(root='./fmdata', train=False,
                                          download=True, transform=transform)
    elif ds == 'cifar10':
        Train_set = torchvision.datasets.CIFAR10(root='./cifardata', train=True,
                                           download=True, transform=transform)
        Test_set  = torchvision.datasets.CIFAR10(root='./cifardata', train=False,
                                          download=True, transform=transform)
    elif ds == 'kmnist':
        Train_set = torchvision.datasets.KMNIST(root='./kmnist', train=True,
                                                transform=transform , download=True)
        Test_set = torchvision.datasets.KMNIST(root='./kmnist', train=False,
                                               transform=transform, download=True)
    elif ds == 'emnist':
        # 0-9: numbers
        # 10-35: uppercase letters
        # 36-61: lowercase letters
        Train_set = torchvision.datasets.EMNIST(root='./data', split='byclass', train=True,
                                                transform=transform, download=True)
        Test_set = torchvision.datasets.EMNIST(root='./data', split='byclass', train=False,
                                               transform=transform, download=True)
    elif ds == 'svhn':
        Train_set = torchvision.datasets.SVHN(root='./data', split='train',
                                              transform=transform, download=True)
        Test_set = torchvision.datasets.SVHN(root='./data', split='test',
                                             transform=transform, download=True)
    elif ds == 'usps':
        Train_set = torchvision.datasets.USPS(root='./data', train=True,
                                              transform=transform, download=True)
        Test_set = torchvision.datasets.USPS(root='./data', train=False,
                                             transform=transform, download=True)
    else:
        print('Error: Specify dataset')
        return None

    # Assign Binary Labels to target classes
    Train_set = format_data_weighted(Train_set, Target_class_1, Target_class_2, Data_weighting=Data_weighting, permute=permute, padded=padded)
    Test_set  = format_data_weighted(Test_set, Target_class_1, Target_class_2, Data_weighting=Data_weighting, permute=permute, padded=padded)

    # Set length for dataset splitting purposes
    train_len = Train_set.tensors[0].size()[0]
    test_len = Test_set.tensors[0].size()[0]

    # Make validset from training data such that it is equal in size to the test set
    Train_set, Valid_set = torch.utils.data.dataset.random_split(Train_set, (train_len-test_len, test_len))

    # Since random_split sends out a subset and the original dataset is normally used from that.
    # remake datasets so that they no longer depend on the original dataset
    Train_set = torch.utils.data.TensorDataset(Train_set[:][0], Train_set[:][1], Train_set[:][2])
    Valid_set = torch.utils.data.TensorDataset(Valid_set[:][0], Valid_set[:][1], Valid_set[:][2])

    if Data_weighting == 'paired': # paired is 1:1 weighting

        # Split Training set and Valid set into multiple dataloaders and return array of dataloaders
        Train_loader_split, Valid_loader_split = [],[]
        for i in range(Split):
            # get size of each split dataset
            spl = int(len(Train_set)/Split)
            # Split dataset
            Train_set_split = torch.utils.data.TensorDataset(Train_set[i*spl:(i+1)*spl][0],
                                                              Train_set[i*spl:(i+1)*spl][1],
                                                              Train_set[i*spl:(i+1)*spl][2])
            # make dataloader from split section of original dataset
            Train_loader = DataLoader(Train_set_split, batch_size=Batch_size, shuffle=True)
            # Add split dataloader to the list of dataloaders
            Train_loader_split.append(Train_loader)

            # get size of each split dataset
            spl = int(len(Valid_set)/Split)
            # Split dataset
            Valid_set_split = torch.utils.data.TensorDataset(Valid_set[i*spl:(i+1)*spl][0],
                                                              Valid_set[i*spl:(i+1)*spl][1],
                                                              Valid_set[i*spl:(i+1)*spl][2])
            # make dataloader from split section of original dataset
            Valid_loader = DataLoader(Valid_set_split, batch_size=Batch_size, shuffle=True)
            # Add split dataloader to the list of dataloaders
            Valid_loader_split.append(Valid_loader)

        # make dataloader from original test set
        Test_loader = DataLoader(Test_set, batch_size=Batch_size, shuffle=False)

    else: #Default is 1:9 oversampled weighting. Oversample target dataset

        # Determine ratios of each class
        trainratio = np.bincount(Train_set.tensors[1])
        validratio = np.bincount(Valid_set.tensors[1])
        testratio = np.bincount(Test_set.tensors[1])

        # Convert ratios to counts of how many samples belong to each class
        train_classcount = trainratio.tolist()
        valid_classcount = validratio.tolist()
        test_classcount = testratio.tolist()

        # Use counts to calculate original dataweighting of each class
        train_weights = 1./torch.tensor(train_classcount, dtype=torch.float)
        valid_weights = 1./torch.tensor(valid_classcount, dtype=torch.float)
        test_weights = 1./torch.tensor(test_classcount, dtype=torch.float)

        # Select target class to get original weights of that class
        train_sampleweights = train_weights[Train_set.tensors[1]]
        valid_sampleweights = train_weights[Valid_set.tensors[1]]
        test_sampleweights = test_weights[Test_set.tensors[1]]

        # Make samplers for data loader in order to oversample target class
        train_sampler = torch.utils.data.sampler.WeightedRandomSampler(weights=train_sampleweights,
                                                                       num_samples=len(train_sampleweights))
        valid_sampler = torch.utils.data.sampler.WeightedRandomSampler(weights=valid_sampleweights,
                                                                       num_samples=len(valid_sampleweights))
        test_sampler = torch.utils.data.sampler.WeightedRandomSampler(weights=test_sampleweights,
                                                                       num_samples=len(test_sampleweights))

        # Split Training set and Valid set into multiple dataloaders and return array of dataloaders
        Train_loader_split, Valid_loader_split = [],[]
        for i in range(Split):
            # get size of each split dataset
            spl = int(len(Train_set)/Split)
            # Split dataset
            Train_set_split = torch.utils.data.TensorDataset(Train_set[i*spl:(i+1)*spl][0],
                                                              Train_set[i*spl:(i+1)*spl][1],
                                                              Train_set[i*spl:(i+1)*spl][2])
            # make dataloader from split section of original dataset
            Train_loader = DataLoader(Train_set_split, batch_size=Batch_size, shuffle=True)
            # Add split dataloader to the list of dataloaders
            Train_loader_split.append(Train_loader)

            # get size of each split dataset
            spl = int(len(Valid_set)/Split)
            # Split dataset
            Valid_set_split = torch.utils.data.TensorDataset(Valid_set[i*spl:(i+1)*spl][0],
                                                              Valid_set[i*spl:(i+1)*spl][1],
                                                              Valid_set[i*spl:(i+1)*spl][2])
            # make dataloader from split section of original dataset
            Valid_loader = DataLoader(Valid_set_split, batch_size=Batch_size, shuffle=True)
            # Add split dataloader to the list of dataloaders
            Valid_loader_split.append(Valid_loader)
        # make dataloader from original test set
        Test_loader = DataLoader(Test_set, batch_size=Batch_size, sampler=test_sampler)

    return Train_loader_split, Valid_loader_split, Test_loader




def get_matrix(n):
    '''
     Assumes that the matrix is of size 2^n x 2^n for some n

     EXAMPLE for n=4

     Old order:

      1  2  3  4
      5  6  7  8
      9 10 11 12
     13 14 15 16

     New order:

      1  2  5  6
      3  4  7  8
      9 10 13 14
     11 12 15 16

     Function returns numbers from old order, read in the order of the new numbers:

     [1, 2, 5, 6, 3, 4, 7, 8, 9, 10, 13, 14, 11, 12, 15, 16]

     So if you previously had a data vector v from a matrix size 32 x 32,
     you can now use v[get_permutation(5)] to reorder the elements.
    '''
    if n == 0:
        return np.array([[1]])
    else:
        smaller = get_matrix(n - 1)
        num_in_smaller = 2 ** (2 * n - 2)
        first_stack = np.hstack((smaller, smaller + num_in_smaller))
        return np.vstack((first_stack, first_stack + 2 * num_in_smaller))

def get_permutation(n):
    return get_matrix(n).ravel() - 1

In [3]:
import numpy as np
import torch

class EarlyStopping:
    """Early stops the training if validation loss doesn't improve after a given patience."""
    def __init__(self, patience=7, verbose=False, delta=0, path='checkpoint.pt', trace_func=print):
        """
        Args:
            patience (int): How long to wait after last time validation loss improved.
                            Default: 7
            verbose (bool): If True, prints a message for each validation loss improvement.
                            Default: False
            delta (float): Minimum change in the monitored quantity to qualify as an improvement.
                            Default: 0
            path (str): Path for the checkpoint to be saved to.
                            Default: 'checkpoint.pt'
            trace_func (function): trace print function.
                            Default: print
        """
        self.patience = patience
        self.verbose = verbose
        self.counter = 0
        self.best_score = None
        self.early_stop = False
        self.val_loss_min = np.Inf
        self.delta = delta
        self.path = path
        self.trace_func = trace_func
    def __call__(self, val_loss, model):

        score = -val_loss

        if self.best_score is None:
            self.best_score = score
            self.save_checkpoint(val_loss, model)
        elif score < self.best_score + self.delta:
            self.counter += 1
            self.trace_func(f'EarlyStopping counter: {self.counter} out of {self.patience}')
            if self.counter >= self.patience:
                self.early_stop = True
        else:
            self.best_score = score
            self.save_checkpoint(val_loss, model)
            self.counter = 0

    def save_checkpoint(self, val_loss, model):
        '''Saves model when validation loss decrease.'''
        if self.verbose:
            self.trace_func(f'Validation loss decreased ({self.val_loss_min:.6f} --> {val_loss:.6f}).  Saving model ...')
        torch.save(model.state_dict(), self.path)
        self.val_loss_min = val_loss

In [4]:
import torch
from torch.utils.data import DataLoader
import torch.optim as optim
import torch.nn as nn
import numpy as np
import math
from torch.optim.optimizer import required
from torch.utils.data.dataset import random_split
import torch.nn.functional as F
import torch.optim as optim
from torch.optim import Optimizer



def train_test_ktree(model, trainloader, validloader, testloader, epochs=10, randorder=False, patience=60):
    '''
    Trains and tests k-tree models
    Inputs: model, trainloader, validloader, testloader, epochs, randorder, patience
    Outputs: train loss_curve, train acc_curve, test ave_loss, test accuracy, trained model
    '''
    # Initialize loss function and optimizer
    criterion = nn.BCELoss()
    optimizer = optim.Adam(model.parameters(), lr=0.001)

    # to track training loss and accuracy as model trains
    loss_curve = []
    acc_curve = []

    # to track the validation loss as the model trains
    valid_losses = []
    # to track the average validation loss per epoch as the model trains
    avg_valid_losses = []

    # if randorder == True, generate the randomizer index array for randomizing the input image pixel order
    if randorder == True:
        ordering = torch.randperm(len(trainloader.dataset.tensors[0][0]))

    # Initialize early stopping object
    early_stopping = EarlyStopping(patience=patience, verbose=False)

    for epoch in range(epochs):  # loop over the dataset multiple times
        ######################
        # train the model    #
        ######################
        running_loss = 0.0
        running_acc = 0.0
        model.train()

        for i, data in enumerate(trainloader):
            # get the inputs; data is a list of [inputs, labels]
            inputs, labels, _ = data
            inputs, labels, _ = data
            if randorder:
                ordering = torch.randperm(len(inputs[0])).cuda()
                inputs = inputs[:, ordering].cuda()
            else:
                inputs = inputs.cuda()
            labels = labels.cuda()
            if randorder == True:
                # Randomize pixel order
                inputs = inputs[:,ordering].cuda()
            else:
                inputs = inputs.cuda()

            labels = labels.cuda()

            # zero the parameter gradients
            optimizer.zero_grad()

            # forward + backward + optimize
            outputs = model(inputs)
            outputs.cuda()
            loss = criterion(outputs, labels.float().reshape(-1,1))
            loss.backward()

####        # Freeze select weights by zeroing out gradients
            for child in model.children():
                for param in child.parameters():
                    for freeze_mask in model.freeze_mask_set:
                        if param.grad.shape == freeze_mask.shape:
                            param.grad[freeze_mask] = 0

            optimizer.step()

            # print statistics
            running_loss += loss.item()
            running_acc += (torch.round(outputs) == labels.float().reshape(-1,1)).sum().item()/trainloader.batch_size
            # Generate loss and accuracy curves by saving average every 4th minibatch
            if (i % 4) == 3:
                loss_curve.append(running_loss/4)
                acc_curve.append(running_acc/4)
                running_loss = 0.0
                running_acc = 0.0

        ######################
        # validate the model #
        ######################
        model.eval() # prep model for evaluation
        for _, data in enumerate(validloader):
            inputs, labels, _ = data
            if randorder == True:
                # Randomize pixel order
                inputs = inputs[:,ordering].cuda()
            else:
                inputs = inputs.cuda()
            labels = labels.cuda()
            # forward pass: compute predicted outputs by passing inputs to the model
            model = model.cuda()
            output = model(inputs)
            # calculate the loss
            loss = criterion(output, labels.float().reshape(-1,1))
            # record validation loss
            valid_losses.append(loss.item())

        valid_loss = np.average(valid_losses)


        # early_stopping needs the validation loss to check if it has decreased,
        # and if it has, it will make a checkpoint of the current model
        early_stopping(valid_loss, model)

        if early_stopping.early_stop:
            print("Early stopping")
            break

    # load the last checkpoint with the best model
#    model.load_state_dict(torch.load('checkpoint.pt'))

    print('Finished Training, %d epochs' % (epoch+1))

    ######################
    # test the model     #
    ######################
    correct = 0
    total = 0
    with torch.no_grad():
        for data in testloader:
            images, labels, _ = data
            if randorder == True:
                # Randomize pixel order
                inputs = inputs[:,ordering].cuda()
            else:
                inputs = inputs.cuda()
            images = images.cuda()
            labels = labels.cuda()
            # forward pass: compute predicted outputs by passing inputs to the model
            model = model.cuda()
            outputs = model(images)
            # calculate the loss
            loss = criterion(outputs, labels.float().reshape(-1,1))
            # Sum up correct labelings
            predicted = torch.round(outputs)
            total += labels.size(0)
            correct += (predicted == labels.float().reshape(-1,1)).sum().item()
    # Calculate test accuracy
    accuracy = correct/total

    print('Accuracy of the network on the test images: %2f %%' % (
        100 * accuracy))

    if randorder == True:
        return(loss_curve, acc_curve, loss, accuracy, model, ordering)
    else:
        return(loss_curve, acc_curve, loss, accuracy, model)

def train_test_fc(model, trainloader, validloader, testloader, epochs=10, patience=60):
    '''
    Trains and tests fcnn models
    Inputs: model, trainloader, validloader, testloader, epochs, patience
    Outputs: train loss_curve, train acc_curve, test ave_loss, test accuracy, trained model
    '''
    # Initialize loss function and optimizer
    criterion = nn.BCELoss()
    optimizer = optim.Adam(model.parameters(), lr=0.001)


    # to track the validation loss as the model trains
    valid_losses = []
    # to track the average validation loss per epoch as the model trains
    avg_valid_losses = []

    # to track training loss and accuracy as model trains
    loss_curve = []
    acc_curve = []

    # Initialize early stopping object
    early_stopping = EarlyStopping(patience=patience, verbose=False)


    for epoch in range(epochs):  # loop over the dataset multiple times
        ######################
        # train the model    #
        ######################
        running_loss = 0.0
        running_acc = 0.0
        model.train()

        for i, data in enumerate(trainloader):
            # get the inputs; data is a list of [inputs, labels]
            inputs, labels, _ = data
            inputs = inputs.cuda()
            labels = labels.cuda()

            # zero the parameter gradients
            optimizer.zero_grad()

            # forward + backward + optimize
            outputs = model(inputs)
            loss = criterion(outputs, labels.float().reshape(-1,1))
            loss.backward()

            optimizer.step()

            # print statistics
            running_loss += loss.item()
            running_acc += (torch.round(outputs) == labels.float().reshape(-1,1)).sum().item()/trainloader.batch_size
            if i % 4 == 3:      # Generate loss and accuracy curves by saving average every 4th minibatch
                loss_curve.append(running_loss/4)
                acc_curve.append(running_acc/4)
                running_loss = 0.0
                running_acc = 0.0

        ######################
        # validate the model #
        ######################
        model.eval() # prep model for evaluation
        for _, data in enumerate(validloader):
            inputs, labels, _ = data
            inputs = inputs.cuda()
            labels = labels.cuda()
            # forward pass: compute predicted outputs by passing inputs to the model
            model = model.cuda()
            output = model(inputs)
            # calculate the loss
            loss = criterion(output, labels.float().reshape(-1,1))
            # record validation loss
            valid_losses.append(loss.item())

        valid_loss = np.average(valid_losses)


        # early_stopping needs the validation loss to check if it has decresed,
        # and if it has, it will make a checkpoint of the current model
        early_stopping(valid_loss, model)

        if early_stopping.early_stop:
            print("Early stopping")
            break

    # load the last checkpoint with the best model
#    model.load_state_dict(torch.load('checkpoint.pt'))

    print('Finished Training, %d epochs' % (epoch+1))

    correct = 0
    all_loss = 0
    total = 0
    with torch.no_grad():
        for data in testloader:
            images, labels, _ = data
            images = images.cuda()
            labels = labels.cuda()
            # forward pass: compute predicted outputs by passing inputs to the model
            model = model.cuda()
            outputs = model(images)
            # calculate the loss
            loss = criterion(outputs, labels.float().reshape(-1,1))
            # Sum up correct labelings
            predicted = torch.round(outputs)
            total += labels.size(0)
            correct += (predicted == labels.float().reshape(-1,1)).sum().item()
            all_loss += loss
    # Calculate test accuracy
    accuracy = correct/total
    # Calculate average loss
    ave_loss = all_loss.item()/total

    print('Accuracy of the network on the 10000 test images: %d %%' % (
        100 * accuracy))

    return(loss_curve, acc_curve, ave_loss, accuracy, model)

Train fcnn of sizes 1, 2, 4, 8, 16, and 32 on datasets

In [50]:
# Run Order: 3rd, 1 out of 2
### Train and test fcnn model
### Saves test loss and test accuracy
### all classes script, early stopping implemented

import torch
from torch.utils.data import DataLoader
import torch.optim as optim
import torch.nn as nn
import numpy as np
import math
import torchvision
from torchvision import transforms
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
import matplotlib
import matplotlib.pyplot as plt
from torch.optim.optimizer import required
from torch.utils.data.dataset import random_split
import scipy
import os
import glob
import pandas as pd
import pickle


# Test space for networks
# Select Class Set
class_set = 0


# Initialize settings
bs = 256
weighting = 'paired'
trials = 10
epochs = 2000
trees_set = [1,2,4,8,16,32]

# Load class-dataset list
classes = np.load('./results/classes.npy', allow_pickle=True)
classes = np.concatenate((classes[:3], classes[4:7]), axis=0)


# Initialize final test loss and accuracy variables
loss = np.zeros((len(classes), trials, len(trees_set)))
acc = np.zeros((len(classes), trials, len(trees_set)))


# For each dataset enumerated from classes list
for j, (t1, t2, ds) in enumerate(classes):
    print(t1, t2, ds)
    # Load data loaders
    trainloaders, validloaders, testloader = dataset_weighted_split_all(bs, t1, t2, weighting, trials, ds, permute=False)
    # Initialize input size for model initialization purposes
    input_size = trainloaders[0].dataset.tensors[0][0].shape[0]
    # For each trial
    for i in range(trials):
        # For every k-tree defined by trees_set
        for k, trees in enumerate(trees_set):
            print(j, i, k)
            # Initialize the fcnn model, such that hidden layer is twice the number of trees
            model = simple_fcnn(input_size, 2*trees, 1).cuda()
            #Train and test fcnn, assigning loss and acc values
            loss_curve, acc_curve, loss[j,i,k], acc[j,i,k], model_t = train_test_fc(model, trainloaders[i],
                                              validloaders[i], testloader, epochs=epochs)

            # Save accuracy and loss arrays
            np.save('./results/fcnn_acc_'+str(class_set)+'.npy', acc)
            np.save('./results/fcnn_loss_'+str(class_set)+'.npy', loss)

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
EarlyStopping counter: 35 out of 60
EarlyStopping counter: 36 out of 60
EarlyStopping counter: 37 out of 60
EarlyStopping counter: 38 out of 60
EarlyStopping counter: 39 out of 60
EarlyStopping counter: 40 out of 60
EarlyStopping counter: 41 out of 60
EarlyStopping counter: 42 out of 60
EarlyStopping counter: 43 out of 60
EarlyStopping counter: 44 out of 60
EarlyStopping counter: 45 out of 60
EarlyStopping counter: 46 out of 60
EarlyStopping counter: 47 out of 60
EarlyStopping counter: 48 out of 60
EarlyStopping counter: 49 out of 60
EarlyStopping counter: 50 out of 60
EarlyStopping counter: 51 out of 60
EarlyStopping counter: 52 out of 60
EarlyStopping counter: 53 out of 60
EarlyStopping counter: 54 out of 60
EarlyStopping counter: 55 out of 60
EarlyStopping counter: 56 out of 60
EarlyStopping counter: 57 out of 60
EarlyStopping counter: 58 out of 60
EarlyStopping counter: 59 out of 60
EarlyStopping counter: 60 out of 60

Train LDA model on datasets

In [37]:
# Run Order: 3rd, 2 out of 2
### Train and test lda model
### Saves test accuracy
### all classes script, early stopping implemented

import torch
from torch.utils.data import DataLoader
import torch.optim as optim
import torch.nn as nn
import numpy as np
import math
import torchvision
from torchvision import transforms
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
import matplotlib
import matplotlib.pyplot as plt
from torch.optim.optimizer import required
from torch.utils.data.dataset import random_split
import scipy
import os
import glob
import pandas as pd
import pickle



# Initialize settings
bs = 256
weighting = 'paired'
trials = 10

# Load class-dataset list
classes = np.load('./results/classes.npy', allow_pickle=True)
classes = np.concatenate((classes[:3], classes[4:7]), axis=0)

# Initialize test accuracy variable
score_test = np.zeros((len(classes), trials))


for j, (t1, t2, ds) in enumerate(classes):
    print(t1, t2, ds)
    print('lda')
    # Get correctly labeled and paired class datasets
    trainloaders, validloaders, testloader = dataset_weighted_split_all(bs, t1, t2, weighting, trials, ds)
    for i in range(trials):
        print(j, i)
        # Reassign datasets
        X_train = trainloaders[i].dataset.tensors[0]
        y_train = trainloaders[i].dataset.tensors[1]
        X_test = testloader.dataset.tensors[0]
        y_test = testloader.dataset.tensors[1]

        # initialize lda
        lda = LinearDiscriminantAnalysis()

        # fit to images, labels
        lda.fit(X_train, y_train)

        # see accuracy for validation set
        score_test[j,i] = lda.score(X_test, y_test)
        print(score_test[j,i])

        # Save accuracy array
        np.save('./results/lda_score_test.npy', score_test)

3 5 mnist
lda
0 0
0.868559411146162
0 1
0.8832807570977917
0 2
0.8769716088328076
0 3
0.8764458464773922
0 4
0.8796004206098843
0 5
0.8748685594111462
0 6
0.8717139852786541
0 7
0.8780231335436383
0 8
0.8564668769716088
0 9
0.8738170347003155
0 6 fmnist
lda
1 0
0.6615
1 1
0.6785
1 2
0.6745
1 3
0.671
1 4
0.67
1 5
0.643
1 6
0.693
1 7
0.642
1 8
0.6715
1 9
0.6845
2 6 kmnist
lda
2 0
0.6875
2 1
0.6435
2 2
0.6895
2 3
0.6855
2 4
0.669
2 5
0.681
2 6
0.69
2 7
0.686
2 8
0.658
2 9
0.684
5 6 svhn
lda
Downloading http://ufldl.stanford.edu/housenumbers/train_32x32.mat to ./data/train_32x32.mat


100%|██████████| 182040794/182040794 [00:47<00:00, 3841842.68it/s]


Downloading http://ufldl.stanford.edu/housenumbers/test_32x32.mat to ./data/test_32x32.mat


100%|██████████| 64275384/64275384 [00:12<00:00, 5050558.42it/s] 


3 0
0.5161660169685852
3 1
0.5090575556065122
3 2
0.5299243292822747
3 3
0.5134143545058473
3 4
0.508598945196056
3 5
0.5251089199724833
3 6
0.5193762898417794
3 7
0.5335932125659253
3 8
0.5113506076587938
3 9
0.5180004586104104
3 5 usps
lda
Downloading https://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/multiclass/usps.bz2 to ./data/usps.bz2


100%|██████████| 6579383/6579383 [00:00<00:00, 17354229.67it/s]


Downloading https://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/multiclass/usps.t.bz2 to ./data/usps.t.bz2


100%|██████████| 1831726/1831726 [00:00<00:00, 5820852.90it/s]


4 0
0.8865030674846626
4 1
0.8159509202453987
4 2
0.8374233128834356
4 3
0.8558282208588958
4 4
0.843558282208589
4 5
0.843558282208589
4 6
0.8742331288343558
4 7
0.8374233128834356
4 8
0.8926380368098159
4 9
0.8619631901840491
3 5 cifar10
lda
Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ./cifardata/cifar-10-python.tar.gz


100%|██████████| 170498071/170498071 [00:13<00:00, 12849721.20it/s]


Extracting ./cifardata/cifar-10-python.tar.gz to ./cifardata
Files already downloaded and verified
5 0
0.5465
5 1
0.528
5 2
0.5305
5 3
0.531
5 4
0.539
5 5
0.5215
5 6
0.5155
5 7
0.5315
5 8
0.5215
5 9
0.527


Train k-trees of sizes 1, 2, 4, 8, 16, and 32 on datasets

In [52]:
# Run Order: 4th, 1 out of 3
### Train and test k-tree model
### Saves test loss and test accuracy
### Uses original image input order
### all classes script, early stopping implemented

import torch
from torch.utils.data import DataLoader
import torch.optim as optim
import torch.nn as nn
import numpy as np
import math
import torchvision
from torchvision import transforms
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
import matplotlib
import matplotlib.pyplot as plt
from torch.optim.optimizer import required
from torch.utils.data.dataset import random_split
import scipy
import os
import glob
import pandas as pd
import pickle



# Test space for networks
# Select Class Set
class_set = 0

# Initialize settings
bs = 256
weighting = 'paired'
trials = 10
epochs = 2000
trees_set = [1,2,4,8,16,32]

# Load class-dataset list
classes = np.load('./results/classes.npy', allow_pickle=True)
classes = np.concatenate((classes[:3], classes[4:7]), axis=0)

# if class_set == 0:
#     classes = classes[0:2] # mnist fmnist
# elif class_set == 1:
#     classes = classes[2:4] # kmnist emnist
# elif class_set == 2:
#     classes = classes[4:6] # svhn usps
# else:
#     classes = classes[6].reshape(1,-1)

# Initialize final test loss and accuracy variables
loss = np.zeros((len(classes), trials, len(trees_set)))
acc = np.zeros((len(classes), trials, len(trees_set)))

# For each dataset enumerated from classes list
for j, (t1, t2, ds) in enumerate(classes):
    print(t1, t2, ds)
    # Load data loaders
    trainloaders, validloaders, testloader = dataset_weighted_split_all(bs, t1, t2, weighting, trials, ds, permute=False)
    # Initialize input size for model initialization purposes
    input_size = trainloaders[0].dataset.tensors[0][0].shape[0]
    # For each trial
    for i in range(trials):
        # For every k-tree defined by trees_set
        for k, trees in enumerate(trees_set):
            print(j, i, k)
            # Initialize the ktree model
            model = ktree_gen(ds=ds, Repeats=trees, Padded=True).cuda()

            #Train and test ktree, assigning loss and acc values
            loss_curve, acc_curve, loss[j,i,k], acc[j,i,k], model_t = train_test_ktree(model, trainloaders[i],
                                                                                  validloaders[i], testloader, epochs = epochs, randorder=False)
            # Save accuracy and loss arrays
            np.save('./results/ktree_acc_orig_'+str(class_set)+'.npy', acc)
            np.save('./results/ktree_loss_orig_'+str(class_set)+'.npy', loss)

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
EarlyStopping counter: 10 out of 60
EarlyStopping counter: 11 out of 60
EarlyStopping counter: 12 out of 60
EarlyStopping counter: 13 out of 60
EarlyStopping counter: 14 out of 60
EarlyStopping counter: 15 out of 60
EarlyStopping counter: 16 out of 60
EarlyStopping counter: 17 out of 60
EarlyStopping counter: 18 out of 60
EarlyStopping counter: 19 out of 60
EarlyStopping counter: 20 out of 60
EarlyStopping counter: 21 out of 60
EarlyStopping counter: 22 out of 60
EarlyStopping counter: 23 out of 60
EarlyStopping counter: 24 out of 60
EarlyStopping counter: 25 out of 60
EarlyStopping counter: 26 out of 60
EarlyStopping counter: 27 out of 60
EarlyStopping counter: 28 out of 60
EarlyStopping counter: 29 out of 60
EarlyStopping counter: 30 out of 60
EarlyStopping counter: 31 out of 60
EarlyStopping counter: 32 out of 60
EarlyStopping counter: 33 out of 60
EarlyStopping counter: 34 out of 60
EarlyStopping counter: 35 out of 60

KeyboardInterrupt: 

Train 1-fcnn on MNIST

In [62]:
# Run Order: 3rd, 1 out of 2
### Train and test fcnn model
### Saves test loss and test accuracy
### all classes script, early stopping implemented

import torch
from torch.utils.data import DataLoader
import torch.optim as optim
import torch.nn as nn
import numpy as np
import math
import torchvision
from torchvision import transforms
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
import matplotlib
import matplotlib.pyplot as plt
from torch.optim.optimizer import required
from torch.utils.data.dataset import random_split
import scipy
import os
import glob
import pandas as pd
import pickle


# Test space for networks
# Select Class Set
class_set = 0


# Initialize settings
bs = 256
weighting = 'paired'
trials = 10
epochs = 2000
trees_set = [1]

# Load class-dataset list
classes = np.load('./results/classes.npy', allow_pickle=True)
classes = classes[0:1]


# Initialize final test loss and accuracy variables
loss = np.zeros((len(classes), trials, len(trees_set)))
acc = np.zeros((len(classes), trials, len(trees_set)))


# For each dataset enumerated from classes list
for j, (t1, t2, ds) in enumerate(classes):
    print(t1, t2, ds)
    # Load data loaders
    trainloaders, validloaders, testloader = dataset_weighted_split_all(bs, t1, t2, weighting, trials, ds, permute=False)
    # Initialize input size for model initialization purposes
    input_size = trainloaders[0].dataset.tensors[0][0].shape[0]
    # For each trial
    for i in range(trials):
        # For every k-tree defined by trees_set
        for k, trees in enumerate(trees_set):
            print(j, i, k)
            # Initialize the fcnn model, such that hidden layer is twice the number of trees
            model = simple_fcnn(input_size, 2*trees, 1).cuda()
            #Train and test fcnn, assigning loss and acc values
            loss_curve, acc_curve, loss[j,i,k], acc[j,i,k], model_t = train_test_fc(model, trainloaders[i],
                                              validloaders[i], testloader, epochs=epochs)

            # Save accuracy and loss arrays
            np.save('./results/fcnn_acc_'+str(class_set)+'.npy', acc)
            np.save('./results/fcnn_loss_'+str(class_set)+'.npy', loss)

3 5 mnist
0 0 0
EarlyStopping counter: 1 out of 60
EarlyStopping counter: 2 out of 60
EarlyStopping counter: 3 out of 60
EarlyStopping counter: 4 out of 60
EarlyStopping counter: 5 out of 60
EarlyStopping counter: 6 out of 60
EarlyStopping counter: 7 out of 60
EarlyStopping counter: 8 out of 60
EarlyStopping counter: 9 out of 60
EarlyStopping counter: 10 out of 60
EarlyStopping counter: 11 out of 60
EarlyStopping counter: 12 out of 60
EarlyStopping counter: 13 out of 60
EarlyStopping counter: 14 out of 60
EarlyStopping counter: 15 out of 60
EarlyStopping counter: 16 out of 60
EarlyStopping counter: 17 out of 60
EarlyStopping counter: 18 out of 60
EarlyStopping counter: 19 out of 60
EarlyStopping counter: 20 out of 60
EarlyStopping counter: 21 out of 60
EarlyStopping counter: 22 out of 60
EarlyStopping counter: 23 out of 60
EarlyStopping counter: 24 out of 60
EarlyStopping counter: 25 out of 60
EarlyStopping counter: 26 out of 60
EarlyStopping counter: 27 out of 60
EarlyStopping counter

Train 1-ktree on MNIST

In [63]:
### Training 1-tree on MNIST

import torch
from torch.utils.data import DataLoader
import torch.optim as optim
import torch.nn as nn
import numpy as np
import math
import torchvision
from torchvision import transforms
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
import matplotlib
import matplotlib.pyplot as plt
from torch.optim.optimizer import required
from torch.utils.data.dataset import random_split
import scipy
import os
import glob
import pandas as pd
import pickle



# Test space for networks
# Select Class Set
class_set = 0

# Initialize settings
bs = 256
weighting = 'paired'
trials = 10
epochs = 2000
trees_set = [1]

# Load class-dataset list
classes = np.load('./results/classes.npy', allow_pickle=True)
classes = classes[0:1]

# if class_set == 0:
#     classes = classes[0:2] # mnist fmnist
# elif class_set == 1:
#     classes = classes[2:4] # kmnist emnist
# elif class_set == 2:
#     classes = classes[4:6] # svhn usps
# else:
#     classes = classes[6].reshape(1,-1)

# Initialize final test loss and accuracy variables
loss = np.zeros((len(classes), trials, len(trees_set)))
acc = np.zeros((len(classes), trials, len(trees_set)))

# For each dataset enumerated from classes list
for j, (t1, t2, ds) in enumerate(classes):
    print(t1, t2, ds)
    # Load data loaders
    trainloaders, validloaders, testloader = dataset_weighted_split_all(bs, t1, t2, weighting, trials, ds, permute=False)
    # Initialize input size for model initialization purposes
    input_size = trainloaders[0].dataset.tensors[0][0].shape[0]
    # For each trial
    for i in range(trials):
        # For every k-tree defined by trees_set
        for k, trees in enumerate(trees_set):
            print(j, i, k)
            # Initialize the ktree model
            model = ktree_gen(ds=ds, Repeats=trees, Padded=True).cuda()

            #Train and test ktree, assigning loss and acc values
            loss_curve, acc_curve, loss[j,i,k], acc[j,i,k], model_t = train_test_ktree(model, trainloaders[i],
                                                                                  validloaders[i], testloader, epochs = epochs, randorder=False)
            # Save accuracy and loss arrays
            np.save('./results/ktree_acc_orig_'+str(class_set)+'.npy', acc)
            np.save('./results/ktree_loss_orig_'+str(class_set)+'.npy', loss)

3 5 mnist
0 0 0
Finished Training, 2000 epochs
Accuracy of the network on the test images: 91.692955 %
0 1 0
EarlyStopping counter: 1 out of 60
EarlyStopping counter: 2 out of 60
EarlyStopping counter: 3 out of 60
EarlyStopping counter: 4 out of 60
EarlyStopping counter: 5 out of 60
EarlyStopping counter: 6 out of 60
EarlyStopping counter: 7 out of 60
EarlyStopping counter: 8 out of 60
EarlyStopping counter: 9 out of 60
EarlyStopping counter: 10 out of 60
EarlyStopping counter: 11 out of 60
EarlyStopping counter: 12 out of 60
EarlyStopping counter: 13 out of 60
EarlyStopping counter: 14 out of 60
EarlyStopping counter: 15 out of 60
EarlyStopping counter: 16 out of 60
EarlyStopping counter: 17 out of 60
EarlyStopping counter: 18 out of 60
EarlyStopping counter: 19 out of 60
EarlyStopping counter: 20 out of 60
EarlyStopping counter: 21 out of 60
EarlyStopping counter: 22 out of 60
EarlyStopping counter: 23 out of 60
EarlyStopping counter: 24 out of 60
EarlyStopping counter: 25 out of 60


Train 32-fcnn on MNIST

In [59]:
# Run Order: 3rd, 1 out of 2
### Train and test fcnn model
### Saves test loss and test accuracy
### all classes script, early stopping implemented

import torch
from torch.utils.data import DataLoader
import torch.optim as optim
import torch.nn as nn
import numpy as np
import math
import torchvision
from torchvision import transforms
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
import matplotlib
import matplotlib.pyplot as plt
from torch.optim.optimizer import required
from torch.utils.data.dataset import random_split
import scipy
import os
import glob
import pandas as pd
import pickle


# Test space for networks
# Select Class Set
class_set = 0


# Initialize settings
bs = 256
weighting = 'paired'
trials = 10
epochs = 2000
trees_set = [32]

# Load class-dataset list
classes = np.load('./results/classes.npy', allow_pickle=True)
classes = classes[0:1]


# Initialize final test loss and accuracy variables
loss = np.zeros((len(classes), trials, len(trees_set)))
acc = np.zeros((len(classes), trials, len(trees_set)))


# For each dataset enumerated from classes list
for j, (t1, t2, ds) in enumerate(classes):
    print(t1, t2, ds)
    # Load data loaders
    trainloaders, validloaders, testloader = dataset_weighted_split_all(bs, t1, t2, weighting, trials, ds, permute=False)
    # Initialize input size for model initialization purposes
    input_size = trainloaders[0].dataset.tensors[0][0].shape[0]
    # For each trial
    for i in range(trials):
        # For every k-tree defined by trees_set
        for k, trees in enumerate(trees_set):
            print(j, i, k)
            # Initialize the fcnn model, such that hidden layer is twice the number of trees
            model = simple_fcnn(input_size, 2*trees, 1).cuda()
            #Train and test fcnn, assigning loss and acc values
            loss_curve, acc_curve, loss[j,i,k], acc[j,i,k], model_t = train_test_fc(model, trainloaders[i],
                                              validloaders[i], testloader, epochs=epochs)

            # Save accuracy and loss arrays
            np.save('./results/fcnn_acc_'+str(class_set)+'.npy', acc)
            np.save('./results/fcnn_loss_'+str(class_set)+'.npy', loss)

3 5 mnist
0 0 0
EarlyStopping counter: 1 out of 60
EarlyStopping counter: 1 out of 60
EarlyStopping counter: 2 out of 60
EarlyStopping counter: 3 out of 60
EarlyStopping counter: 4 out of 60
EarlyStopping counter: 5 out of 60
EarlyStopping counter: 6 out of 60
EarlyStopping counter: 7 out of 60
EarlyStopping counter: 8 out of 60
EarlyStopping counter: 9 out of 60
EarlyStopping counter: 10 out of 60
EarlyStopping counter: 11 out of 60
EarlyStopping counter: 12 out of 60
EarlyStopping counter: 13 out of 60
EarlyStopping counter: 14 out of 60
EarlyStopping counter: 15 out of 60
EarlyStopping counter: 16 out of 60
EarlyStopping counter: 17 out of 60
EarlyStopping counter: 18 out of 60
EarlyStopping counter: 19 out of 60
EarlyStopping counter: 20 out of 60
EarlyStopping counter: 21 out of 60
EarlyStopping counter: 22 out of 60
EarlyStopping counter: 23 out of 60
EarlyStopping counter: 24 out of 60
EarlyStopping counter: 25 out of 60
EarlyStopping counter: 26 out of 60
EarlyStopping counter:

In [60]:
### Training 32-tree on MNIST

import torch
from torch.utils.data import DataLoader
import torch.optim as optim
import torch.nn as nn
import numpy as np
import math
import torchvision
from torchvision import transforms
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
import matplotlib
import matplotlib.pyplot as plt
from torch.optim.optimizer import required
from torch.utils.data.dataset import random_split
import scipy
import os
import glob
import pandas as pd
import pickle



# Test space for networks
# Select Class Set
class_set = 0

# Initialize settings
bs = 256
weighting = 'paired'
trials = 10
epochs = 2000
trees_set = [32]

# Load class-dataset list
classes = np.load('./results/classes.npy', allow_pickle=True)
classes = classes[0:1]

# if class_set == 0:
#     classes = classes[0:2] # mnist fmnist
# elif class_set == 1:
#     classes = classes[2:4] # kmnist emnist
# elif class_set == 2:
#     classes = classes[4:6] # svhn usps
# else:
#     classes = classes[6].reshape(1,-1)

# Initialize final test loss and accuracy variables
loss = np.zeros((len(classes), trials, len(trees_set)))
acc = np.zeros((len(classes), trials, len(trees_set)))

# For each dataset enumerated from classes list
for j, (t1, t2, ds) in enumerate(classes):
    print(t1, t2, ds)
    # Load data loaders
    trainloaders, validloaders, testloader = dataset_weighted_split_all(bs, t1, t2, weighting, trials, ds, permute=False)
    # Initialize input size for model initialization purposes
    input_size = trainloaders[0].dataset.tensors[0][0].shape[0]
    # For each trial
    for i in range(trials):
        # For every k-tree defined by trees_set
        for k, trees in enumerate(trees_set):
            print(j, i, k)
            # Initialize the ktree model
            model = ktree_gen(ds=ds, Repeats=trees, Padded=True).cuda()

            #Train and test ktree, assigning loss and acc values
            loss_curve, acc_curve, loss[j,i,k], acc[j,i,k], model_t = train_test_ktree(model, trainloaders[i],
                                                                                  validloaders[i], testloader, epochs = epochs, randorder=False)
            # Save accuracy and loss arrays
            np.save('./results/ktree_acc_orig_'+str(class_set)+'.npy', acc)
            np.save('./results/ktree_loss_orig_'+str(class_set)+'.npy', loss)

3 5 mnist
0 0 0
EarlyStopping counter: 1 out of 60
EarlyStopping counter: 2 out of 60
EarlyStopping counter: 3 out of 60
EarlyStopping counter: 4 out of 60
EarlyStopping counter: 5 out of 60
EarlyStopping counter: 6 out of 60
EarlyStopping counter: 7 out of 60
EarlyStopping counter: 8 out of 60
EarlyStopping counter: 9 out of 60
EarlyStopping counter: 10 out of 60
EarlyStopping counter: 11 out of 60
EarlyStopping counter: 12 out of 60
EarlyStopping counter: 13 out of 60
EarlyStopping counter: 14 out of 60
EarlyStopping counter: 15 out of 60
EarlyStopping counter: 16 out of 60
EarlyStopping counter: 17 out of 60
EarlyStopping counter: 18 out of 60
EarlyStopping counter: 19 out of 60
EarlyStopping counter: 20 out of 60
EarlyStopping counter: 21 out of 60
EarlyStopping counter: 22 out of 60
EarlyStopping counter: 23 out of 60
EarlyStopping counter: 24 out of 60
EarlyStopping counter: 25 out of 60
EarlyStopping counter: 26 out of 60
EarlyStopping counter: 27 out of 60
EarlyStopping counter

Training tree on MNIST using model with non-negatively constrained weights

In [5]:
import torch
from torch.utils.data import DataLoader
import torch.optim as optim
import torch.nn as nn
import numpy as np
import math
import torchvision
from torchvision import transforms
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
import matplotlib
import matplotlib.pyplot as plt
from torch.optim.optimizer import required
from torch.utils.data.dataset import random_split
import scipy
import os
import glob
import pandas as pd
import pickle



# Test space for networks
# Select Class Set
class_set = 0

# Initialize settings
bs = 256
weighting = 'paired'
trials = 10
epochs = 2000
trees_set = [1]

# Load class-dataset list
classes = np.load('./results/classes.npy', allow_pickle=True)
classes = classes[0:1]

# if class_set == 0:
#     classes = classes[0:2] # mnist fmnist
# elif class_set == 1:
#     classes = classes[2:4] # kmnist emnist
# elif class_set == 2:
#     classes = classes[4:6] # svhn usps
# else:
#     classes = classes[6].reshape(1,-1)

# Initialize final test loss and accuracy variables
loss = np.zeros((len(classes), trials, len(trees_set)))
acc = np.zeros((len(classes), trials, len(trees_set)))

# For each dataset enumerated from classes list
for j, (t1, t2, ds) in enumerate(classes):
    print(t1, t2, ds)
    # Load data loaders
    trainloaders, validloaders, testloader = dataset_weighted_split_all(bs, t1, t2, weighting, trials, ds, permute=False)
    # Initialize input size for model initialization purposes
    input_size = trainloaders[0].dataset.tensors[0][0].shape[0]
    # For each trial
    for i in range(trials):
        # For every k-tree defined by trees_set
        for k, trees in enumerate(trees_set):
            print(j, i, k)
            # Initialize the ktree model
            model = ktree_gen(ds=ds, Repeats=trees, Padded=True).cuda()

            #Train and test ktree, assigning loss and acc values
            loss_curve, acc_curve, loss[j,i,k], acc[j,i,k], model_t = train_test_ktree(model, trainloaders[i],
                                                                                  validloaders[i], testloader, epochs = epochs, randorder=False)
            # Save accuracy and loss arrays
            np.save('./results/ktree_acc_orig_'+str(class_set)+'.npy', acc)
            np.save('./results/ktree_loss_orig_'+str(class_set)+'.npy', loss)

3 5 mnist
Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Failed to download (trying next):
HTTP Error 503: Service Unavailable

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-images-idx3-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-images-idx3-ubyte.gz to ./data/MNIST/raw/train-images-idx3-ubyte.gz


100%|██████████| 9912422/9912422 [00:01<00:00, 6211098.01it/s]


Extracting ./data/MNIST/raw/train-images-idx3-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Failed to download (trying next):
HTTP Error 503: Service Unavailable

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-labels-idx1-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-labels-idx1-ubyte.gz to ./data/MNIST/raw/train-labels-idx1-ubyte.gz


100%|██████████| 28881/28881 [00:00<00:00, 193264.84it/s]


Extracting ./data/MNIST/raw/train-labels-idx1-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Failed to download (trying next):
HTTP Error 503: Service Unavailable

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-images-idx3-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-images-idx3-ubyte.gz to ./data/MNIST/raw/t10k-images-idx3-ubyte.gz


100%|██████████| 1648877/1648877 [00:00<00:00, 1961364.09it/s]


Extracting ./data/MNIST/raw/t10k-images-idx3-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to ./data/MNIST/raw/t10k-labels-idx1-ubyte.gz


100%|██████████| 4542/4542 [00:00<00:00, 7140378.10it/s]

Extracting ./data/MNIST/raw/t10k-labels-idx1-ubyte.gz to ./data/MNIST/raw






0 0 0
Finished Training, 2000 epochs
Accuracy of the network on the test images: 53.101998 %
0 1 0
EarlyStopping counter: 1 out of 60
EarlyStopping counter: 2 out of 60
EarlyStopping counter: 3 out of 60
EarlyStopping counter: 4 out of 60
EarlyStopping counter: 5 out of 60
EarlyStopping counter: 6 out of 60
EarlyStopping counter: 7 out of 60
EarlyStopping counter: 8 out of 60
EarlyStopping counter: 9 out of 60
EarlyStopping counter: 10 out of 60
EarlyStopping counter: 11 out of 60
EarlyStopping counter: 12 out of 60
EarlyStopping counter: 13 out of 60
EarlyStopping counter: 14 out of 60
EarlyStopping counter: 15 out of 60
EarlyStopping counter: 16 out of 60
EarlyStopping counter: 17 out of 60
EarlyStopping counter: 18 out of 60
EarlyStopping counter: 19 out of 60
EarlyStopping counter: 20 out of 60
EarlyStopping counter: 21 out of 60
EarlyStopping counter: 22 out of 60
EarlyStopping counter: 23 out of 60
EarlyStopping counter: 24 out of 60
EarlyStopping counter: 25 out of 60
EarlyStopp

Training tree on FMNIST using model with non-negatively constrained weights

In [6]:
import torch
from torch.utils.data import DataLoader
import torch.optim as optim
import torch.nn as nn
import numpy as np
import math
import torchvision
from torchvision import transforms
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
import matplotlib
import matplotlib.pyplot as plt
from torch.optim.optimizer import required
from torch.utils.data.dataset import random_split
import scipy
import os
import glob
import pandas as pd
import pickle



# Test space for networks
# Select Class Set
class_set = 0

# Initialize settings
bs = 256
weighting = 'paired'
trials = 10
epochs = 2000
trees_set = [1]

# Load class-dataset list
classes = np.load('./results/classes.npy', allow_pickle=True)
classes = classes[1:2]

# if class_set == 0:
#     classes = classes[0:2] # mnist fmnist
# elif class_set == 1:
#     classes = classes[2:4] # kmnist emnist
# elif class_set == 2:
#     classes = classes[4:6] # svhn usps
# else:
#     classes = classes[6].reshape(1,-1)

# Initialize final test loss and accuracy variables
loss = np.zeros((len(classes), trials, len(trees_set)))
acc = np.zeros((len(classes), trials, len(trees_set)))

# For each dataset enumerated from classes list
for j, (t1, t2, ds) in enumerate(classes):
    print(t1, t2, ds)
    # Load data loaders
    trainloaders, validloaders, testloader = dataset_weighted_split_all(bs, t1, t2, weighting, trials, ds, permute=False)
    # Initialize input size for model initialization purposes
    input_size = trainloaders[0].dataset.tensors[0][0].shape[0]
    # For each trial
    for i in range(trials):
        # For every k-tree defined by trees_set
        for k, trees in enumerate(trees_set):
            print(j, i, k)
            # Initialize the ktree model
            model = ktree_gen(ds=ds, Repeats=trees, Padded=True).cuda()

            #Train and test ktree, assigning loss and acc values
            loss_curve, acc_curve, loss[j,i,k], acc[j,i,k], model_t = train_test_ktree(model, trainloaders[i],
                                                                                  validloaders[i], testloader, epochs = epochs, randorder=False)
            # Save accuracy and loss arrays
            np.save('./results/ktree_acc_orig_'+str(class_set)+'.npy', acc)
            np.save('./results/ktree_loss_orig_'+str(class_set)+'.npy', loss)

0 6 fmnist
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-images-idx3-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-images-idx3-ubyte.gz to ./fmdata/FashionMNIST/raw/train-images-idx3-ubyte.gz


100%|██████████| 26421880/26421880 [00:02<00:00, 11899373.18it/s]


Extracting ./fmdata/FashionMNIST/raw/train-images-idx3-ubyte.gz to ./fmdata/FashionMNIST/raw

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-labels-idx1-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-labels-idx1-ubyte.gz to ./fmdata/FashionMNIST/raw/train-labels-idx1-ubyte.gz


100%|██████████| 29515/29515 [00:00<00:00, 201767.22it/s]


Extracting ./fmdata/FashionMNIST/raw/train-labels-idx1-ubyte.gz to ./fmdata/FashionMNIST/raw

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-images-idx3-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-images-idx3-ubyte.gz to ./fmdata/FashionMNIST/raw/t10k-images-idx3-ubyte.gz


100%|██████████| 4422102/4422102 [00:01<00:00, 3698391.43it/s]


Extracting ./fmdata/FashionMNIST/raw/t10k-images-idx3-ubyte.gz to ./fmdata/FashionMNIST/raw

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-labels-idx1-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-labels-idx1-ubyte.gz to ./fmdata/FashionMNIST/raw/t10k-labels-idx1-ubyte.gz


100%|██████████| 5148/5148 [00:00<00:00, 19864100.27it/s]


Extracting ./fmdata/FashionMNIST/raw/t10k-labels-idx1-ubyte.gz to ./fmdata/FashionMNIST/raw

0 0 0
EarlyStopping counter: 1 out of 60
EarlyStopping counter: 2 out of 60
EarlyStopping counter: 1 out of 60
EarlyStopping counter: 2 out of 60
EarlyStopping counter: 3 out of 60
EarlyStopping counter: 1 out of 60
EarlyStopping counter: 2 out of 60
EarlyStopping counter: 3 out of 60
EarlyStopping counter: 4 out of 60
EarlyStopping counter: 5 out of 60
EarlyStopping counter: 6 out of 60
EarlyStopping counter: 7 out of 60
EarlyStopping counter: 8 out of 60
EarlyStopping counter: 9 out of 60
EarlyStopping counter: 10 out of 60
EarlyStopping counter: 11 out of 60
EarlyStopping counter: 12 out of 60
EarlyStopping counter: 13 out of 60
EarlyStopping counter: 14 out of 60
EarlyStopping counter: 15 out of 60
EarlyStopping counter: 16 out of 60
EarlyStopping counter: 17 out of 60
EarlyStopping counter: 18 out of 60
EarlyStopping counter: 19 out of 60
EarlyStopping counter: 20 out of 60
EarlyStopping c

Training on KMNIST using model with non-negatively constrained weights

In [7]:
import torch
from torch.utils.data import DataLoader
import torch.optim as optim
import torch.nn as nn
import numpy as np
import math
import torchvision
from torchvision import transforms
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
import matplotlib
import matplotlib.pyplot as plt
from torch.optim.optimizer import required
from torch.utils.data.dataset import random_split
import scipy
import os
import glob
import pandas as pd
import pickle



# Test space for networks
# Select Class Set
class_set = 0

# Initialize settings
bs = 256
weighting = 'paired'
trials = 10
epochs = 2000
trees_set = [1]

# Load class-dataset list
classes = np.load('./results/classes.npy', allow_pickle=True)
classes = classes[2:3]

# if class_set == 0:
#     classes = classes[0:2] # mnist fmnist
# elif class_set == 1:
#     classes = classes[2:4] # kmnist emnist
# elif class_set == 2:
#     classes = classes[4:6] # svhn usps
# else:
#     classes = classes[6].reshape(1,-1)

# Initialize final test loss and accuracy variables
loss = np.zeros((len(classes), trials, len(trees_set)))
acc = np.zeros((len(classes), trials, len(trees_set)))

# For each dataset enumerated from classes list
for j, (t1, t2, ds) in enumerate(classes):
    print(t1, t2, ds)
    # Load data loaders
    trainloaders, validloaders, testloader = dataset_weighted_split_all(bs, t1, t2, weighting, trials, ds, permute=False)
    # Initialize input size for model initialization purposes
    input_size = trainloaders[0].dataset.tensors[0][0].shape[0]
    # For each trial
    for i in range(trials):
        # For every k-tree defined by trees_set
        for k, trees in enumerate(trees_set):
            print(j, i, k)
            # Initialize the ktree model
            model = ktree_gen(ds=ds, Repeats=trees, Padded=True).cuda()

            #Train and test ktree, assigning loss and acc values
            loss_curve, acc_curve, loss[j,i,k], acc[j,i,k], model_t = train_test_ktree(model, trainloaders[i],
                                                                                  validloaders[i], testloader, epochs = epochs, randorder=False)
            # Save accuracy and loss arrays
            np.save('./results/ktree_acc_orig_'+str(class_set)+'.npy', acc)
            np.save('./results/ktree_loss_orig_'+str(class_set)+'.npy', loss)

2 6 kmnist
Downloading http://codh.rois.ac.jp/kmnist/dataset/kmnist/train-images-idx3-ubyte.gz
Downloading http://codh.rois.ac.jp/kmnist/dataset/kmnist/train-images-idx3-ubyte.gz to ./kmnist/KMNIST/raw/train-images-idx3-ubyte.gz


100%|██████████| 18165135/18165135 [00:12<00:00, 1464353.89it/s]


Extracting ./kmnist/KMNIST/raw/train-images-idx3-ubyte.gz to ./kmnist/KMNIST/raw

Downloading http://codh.rois.ac.jp/kmnist/dataset/kmnist/train-labels-idx1-ubyte.gz
Downloading http://codh.rois.ac.jp/kmnist/dataset/kmnist/train-labels-idx1-ubyte.gz to ./kmnist/KMNIST/raw/train-labels-idx1-ubyte.gz


100%|██████████| 29497/29497 [00:00<00:00, 271574.70it/s]


Extracting ./kmnist/KMNIST/raw/train-labels-idx1-ubyte.gz to ./kmnist/KMNIST/raw

Downloading http://codh.rois.ac.jp/kmnist/dataset/kmnist/t10k-images-idx3-ubyte.gz
Downloading http://codh.rois.ac.jp/kmnist/dataset/kmnist/t10k-images-idx3-ubyte.gz to ./kmnist/KMNIST/raw/t10k-images-idx3-ubyte.gz


100%|██████████| 3041136/3041136 [00:02<00:00, 1133327.48it/s]


Extracting ./kmnist/KMNIST/raw/t10k-images-idx3-ubyte.gz to ./kmnist/KMNIST/raw

Downloading http://codh.rois.ac.jp/kmnist/dataset/kmnist/t10k-labels-idx1-ubyte.gz
Downloading http://codh.rois.ac.jp/kmnist/dataset/kmnist/t10k-labels-idx1-ubyte.gz to ./kmnist/KMNIST/raw/t10k-labels-idx1-ubyte.gz


100%|██████████| 5120/5120 [00:00<00:00, 5460166.92it/s]


Extracting ./kmnist/KMNIST/raw/t10k-labels-idx1-ubyte.gz to ./kmnist/KMNIST/raw

0 0 0
EarlyStopping counter: 1 out of 60
EarlyStopping counter: 2 out of 60
EarlyStopping counter: 3 out of 60
EarlyStopping counter: 4 out of 60
EarlyStopping counter: 5 out of 60
EarlyStopping counter: 6 out of 60
EarlyStopping counter: 7 out of 60
EarlyStopping counter: 8 out of 60
EarlyStopping counter: 9 out of 60
EarlyStopping counter: 10 out of 60
EarlyStopping counter: 11 out of 60
EarlyStopping counter: 12 out of 60
EarlyStopping counter: 13 out of 60
EarlyStopping counter: 14 out of 60
EarlyStopping counter: 15 out of 60
EarlyStopping counter: 16 out of 60
EarlyStopping counter: 17 out of 60
EarlyStopping counter: 18 out of 60
EarlyStopping counter: 19 out of 60
EarlyStopping counter: 20 out of 60
EarlyStopping counter: 21 out of 60
EarlyStopping counter: 22 out of 60
EarlyStopping counter: 23 out of 60
EarlyStopping counter: 24 out of 60
EarlyStopping counter: 25 out of 60
EarlyStopping counter: