This will train the network I guess
And then I will likely need another file to test the network

# Import

In [7]:
import os
import sys
import time
import glob
import torch
import utils
import logging
import argparse
from pcanet import PCANet
from dataset_mnist import load_train_mnist
from sklearn.svm import LinearSVC, SVC
from tensorboardX import SummaryWriter
from torchvision.utils import make_grid
from utils import MyLogger

os.environ["CUDA_VISIBLE_DEVICES"] = "0"

parser = argparse.ArgumentParser("PCANet")
# this one fixes the issue in ipython apparently
# https://stackoverflow.com/questions/42249982/systemexit-2-error-when-calling-parse-args-within-ipython
parser.add_argument('-f')
parser.add_argument('--gpu', type=int, default=0, help='gpu device id')
parser.add_argument('--dataset_name', type=str, default='mnist', help='mnist or cifar10')
parser.add_argument('--dataset_path', type=str, default='/dataset/', help='location of the data corpus')
parser.add_argument('--batch_size', type=int, default=128, help='batch size')
# this train portion can be modified to make it faster i guess, default is 1.0
parser.add_argument('--train_portion', type=float, default=0.001, help='portion of training data')
parser.add_argument('--stages', type=int, default=2, help='the number of stages')
parser.add_argument('--filter_shape', type=list, default=[7, 7], help='patch size')
parser.add_argument('--stages_channels', type=list, default=[8, 8], help='channels in different stages')
parser.add_argument('--block_size', type=int, default=7, help='the size of blocks')
parser.add_argument('--block_overlap', type=float, default=0.5, help='the rate of overlap between blocks')
parser.add_argument('--save', type=str, default='EXP', help='experiment name')
parser.add_argument('--log_freq', type=int, default=40, help='record log frequency')
args = parser.parse_args()

args.save = 'search-{}-{}'.format(args.save, time.strftime("%Y%m%d-%H%M%S"))
# seems like this is the one that creates a duplicated python file for some reason, most likely to keep a copy of whatever code is executed for easier debugging maybe?
utils.create_exp_dir(args.save, scripts_to_save=glob.glob('*.py'))

logger = MyLogger("my_log.log")

CLASSES = 10

Experiment dir : search-EXP-20230815-112531


# Training section


In [7]:

if not torch.cuda.is_available():
    logger.log('no gpu device available')
    sys.exit(1)

logger.log('gpu device = %d' % args.gpu)
temp_log_string = "args = %s", args
logger.log(temp_log_string)

pcanet = PCANet(args.stages, args.filter_shape, args.stages_channels, args.block_size, args.block_overlap)
train_queue, valid_queue = load_train_mnist(args)        # load dataset
logger.log("load training dataset completely")
total_train_labels = torch.tensor([]).long()

writer = SummaryWriter(args.save)       # tensorboardX

# extract feature from images
with torch.no_grad():
    # first of all, generate eigenvector, and then execute convolution
    stage_save_path = args.save
    save_filename = utils.create_pickle_file_name(stage_save_path, 0)
    for global_step, (train_images, train_labels) in enumerate(train_queue):
        train_images = train_images.cuda()
        total_train_labels = torch.cat((total_train_labels, train_labels))
        utils.save_feature([train_images, train_labels], save_filename)         # this seems to save the images
        pcanet.unrolled_stage(train_images, 0)

        if global_step % args.log_freq== 0:
            logger.log("init training global_step: %d" % global_step)
            # convert a batch of tensor into CHW format
            grid_images = make_grid(train_images, nrow=16, padding=5, pad_value=125)
            writer.add_image("raw_images_in_step_%d" % global_step, grid_images)

    total_features = torch.tensor([])       # empty tensor
    for stage in range(args.stages):
        logger.log('PCANet stage: %d' % stage)

        # transform eigenvector to convolution kernel
        kernel = pcanet.eigenvector_to_kernel(stage)

        load_filename = utils.create_pickle_file_name(stage_save_path, stage)
        if stage + 1 < args.stages:
            save_filename = utils.create_pickle_file_name(stage_save_path, stage + 1)

        load_filename_pointer = 0         # clear file object pointer
        for step in range(global_step + 1):
            train_images, train_labels, load_filename_pointer = utils.load_feature(load_filename, load_filename_pointer)
            batch_features = pcanet.pca_conv(train_images, kernel)
            if step % args.log_freq == 0:
                # view the i-th image's feature map in a single batch
                single_image_feature = utils.exchange_channel(batch_features[5])
                grid_images = make_grid(single_image_feature, nrow=8, padding=5, pad_value=125)
                writer.add_image("feature_image_in_step_%d_in_stage_%d" % (step, stage), grid_images)

            if stage + 1 < args.stages:
                utils.save_feature([batch_features, train_labels], save_filename)
                pcanet.unrolled_stage(batch_features, stage + 1)
            else:
                decimal_features = pcanet.binary_mapping(batch_features, stage)
                final_features = pcanet.generate_histogram(decimal_features)
                final_features = final_features.cpu()
                total_features = torch.cat((total_features, final_features), dim=0)

            if step % args.log_freq == 0:
                logger.log("circulate training step: %d" % step)

        grid_kernels = make_grid(pcanet.kernel[stage], nrow=args.stages_channels[stage], padding=5, pad_value=125)
        writer.add_image("kernel_in_stage_%d" % stage, grid_kernels)

    writer.close()
    logger.log('extract feature completely, start training classifier')

    # train classifier
    classifier = LinearSVC()
    # classifier = SVC()
    # total_features = total_features.cpu()
    classifier.fit(total_features, total_train_labels)
    logger.log('classifier trained completely')

    # save model
    utils.save_model(pcanet, stage_save_path + "/pcanet.pkl")
    utils.save_model(classifier, stage_save_path + "/classifier.pkl")

    train_score = classifier.score(total_features, total_train_labels)
    logger.log("score of training is %s" % train_score)

# Evaluation section

In [4]:
import os
import sys
import time
import glob
import torch
import utils
import logging
import argparse
from dataset_mnist import load_test_mnist
from sklearn.metrics import accuracy_score
from utils import MyLogger

os.environ["CUDA_VISIBLE_DEVICES"] = "0"

parser = argparse.ArgumentParser("PCANet")
# this one fixes the issue in ipython apparently
# https://stackoverflow.com/questions/42249982/systemexit-2-error-when-calling-parse-args-within-ipython
parser.add_argument('-f')
parser.add_argument('--gpu', type=int, default=0, help='gpu device id')
parser.add_argument('--dataset_name', type=str, default='mnist', help='mnist or cifar10')
parser.add_argument('--dataset_path', type=str, default='/dataset/', help='location of the data corpus')
parser.add_argument('--batch_size', type=int, default=256, help='batch size')
parser.add_argument('--stages', type=int, default=2, help='the number of stages')
# looks like this one has to be changed to target the correct trained whatever
parser.add_argument('--pretrained_path', type=str, default='search-EXP-20230814-135920', help='pretrained_path')
parser.add_argument('--log_freq', type=int, default=30, help='record log frequency')
parser.add_argument('--save', type=str, default='EXP', help='experiment name')
args = parser.parse_args()

args.save = 'search-{}-{}'.format(args.save, time.strftime("%Y%m%d-%H%M%S"))
# seems like this is the one that creates a duplicated python file for some reason, most likely to keep a copy of whatever code is executed for easier debugging maybe?
utils.create_exp_dir(args.save, scripts_to_save=glob.glob('*.py'))

logger = MyLogger("my_log_eval.log")


Experiment dir : search-EXP-20230814-153131


In [5]:

if not torch.cuda.is_available():
    logger.log('no gpu device available')
    sys.exit(1)

logger.log('gpu device = %d' % args.gpu)
temp_log_string = "args = %s", args
logger.log(temp_log_string)

pcanet = utils.load_model(args.pretrained_path + "/pcanet.pkl")
classifier = utils.load_model(args.pretrained_path + "/classifier.pkl")
logger.log("load PCANet and SVM completely")

test_queue, num_test = load_test_mnist(args)       # load dataset
logger.log("load testing dataset completely")

with torch.no_grad():
    num_of_correct_samples = 0
    for global_step, (test_images, test_labels) in enumerate(test_queue):
        batch_size = test_images.shape[0]
        batch_features = test_images.cuda()

        # execute convolution in different stages
        for stage in range(args.stages):
            batch_features = pcanet.pca_conv(batch_features, pcanet.kernel[stage])

        # build binary quantization mapping and generate histogram
        decimal_features = pcanet.binary_mapping(batch_features, stage)
        final_features = pcanet.generate_histogram(decimal_features)

        # calculate the rate of correct classification
        final_features = final_features.cpu()
        predict_class = classifier.predict(final_features)
        batch_accuracy = accuracy_score(predict_class, test_labels)

        if global_step % args.log_freq == 0:
            logger.log("global_step %d, stage %d, batch accuracy %f" % (global_step, stage, batch_accuracy))

        batch_num_of_correct_samples = utils.total_accuracy(predict_class, test_labels)
        num_of_correct_samples += batch_num_of_correct_samples

    logger.log("total accuracy %f" % (num_of_correct_samples / num_test))
    logger.log("test completely")


gpu device = 0
('args = %s', Namespace(f='C:\\Users\\PC\\AppData\\Roaming\\jupyter\\runtime\\kernel-cc11951f-281c-47ba-8812-123026cb34da.json', gpu=0, dataset_name='mnist', dataset_path='/dataset/', batch_size=256, stages=2, pretrained_path='search-EXP-20230814-135920', log_freq=30, save='search-EXP-20230814-153131'))
load PCANet and SVM completely
load testing dataset completely
global_step 0, stage 1, batch accuracy 0.726562
global_step 30, stage 1, batch accuracy 0.769531
total accuracy 0.750200
test completely
