In [1]:
#imports
import torch
import numpy as np
import pandas as pd
import torchvision.models as models
from torch.utils.tensorboard import SummaryWriter
import seaborn as sns
import matplotlib.pyplot as plt
from torch.utils.data import TensorDataset
import time
import torch.nn as nn
import torch.optim as optim
from torch.autograd import Variable
from torch.utils.data import Dataset, DataLoader
import torchvision
from torchvision import datasets, models, transforms
import matplotlib.pyplot as plt
import datetime
import copy
import os
import pickle
from tqdm import tqdm
from models import vgg

# setting

In [2]:
#mean and std of cifar100 dataset
CIFAR100_TRAIN_MEAN = (0.5070751592371323, 0.48654887331495095, 0.4409178433670343)
CIFAR100_TRAIN_STD = (0.2673342858792401, 0.2564384629170883, 0.27615047132568404)

BATCH_SIZE = 16
WARM = False # typically used in new training

DATE_FORMAT = '%A_%d_%B_%Y_%Hh_%Mm_%Ss'
#time of we run the script
TIME_NOW = datetime.datetime.now().strftime(DATE_FORMAT)
#data settings
subset = False #for local running
k = 10 #number of samples needed to each class in validation set, because we need to split train and validation

#model settings
USE_TENSORBOARD = False
if USE_TENSORBOARD:
    foo = SummaryWriter()
use_gpu = True

#lr scheduler
BASE_LR = 0.001
EPOCH_DECAY = 4
DECAY_WEIGHT = 0.5

DEVICE = 'cpu'
if use_gpu and torch.cuda.is_available():
    DEVICE = 'cuda'

In [3]:
#read files
def unpickle(file):
    
    with open(file, 'rb') as fo:
        dictionary = pickle.load(fo, encoding='bytes')
    return dictionary

In [4]:
def compute_mean_std(cifar100_dataset):
    """compute the mean and std of cifar100 dataset
    Args:
        cifar100_training_dataset or cifar100_test_dataset
        witch derived from class torch.utils.data

    Returns:
        a tuple contains mean, std value of entire dataset
    """

    data_r = numpy.dstack([cifar100_dataset[i][1][:, :, 0] for i in range(len(cifar100_dataset))])
    data_g = numpy.dstack([cifar100_dataset[i][1][:, :, 1] for i in range(len(cifar100_dataset))])
    data_b = numpy.dstack([cifar100_dataset[i][1][:, :, 2] for i in range(len(cifar100_dataset))])
    mean = numpy.mean(data_r), numpy.mean(data_g), numpy.mean(data_b)
    std = numpy.std(data_r), numpy.std(data_g), numpy.std(data_b)

    return mean, std

In [5]:
#data processing
def reshape_images(data_dict):
    reshaped = data_dict.numpy().reshape(len(data_dict), 1024, 3, order = 'F').reshape(len(data_dict), 32,32,3)
    reshaped_processed = torch.from_numpy(reshaped).float().permute(0, 3, 1, 2)
    return reshaped_processed

In [6]:
def get_training_val_dataloader(mean, std, batch_size=16, num_workers=2, shuffle=True):
    """ return training dataloader
    Args:
        mean: mean of cifar100 training dataset
        std: std of cifar100 training dataset
        path: path to cifar100 training python dataset
        batch_size: dataloader batchsize
        num_workers: dataloader num_works
        shuffle: whether to shuffle
    Returns: train_data_loader:torch dataloader object
    """

    transform_train = transforms.Compose([
        #transforms.ToPILImage(),
        transforms.RandomCrop(32, padding=4),
        transforms.RandomHorizontalFlip(),
        transforms.RandomRotation(15),
        transforms.ToTensor(),
        transforms.Normalize(mean, std)
    ])
    #cifar100_training = CIFAR100Train(path, transform=transform_train)
    cifar100_training = torchvision.datasets.CIFAR100(root='', train=True, download=True, transform=transform_train)
    
    try:
        random_index = pickle.load(open("random_index.pkl", 'rb'))
    except:
        random_index = np.random.permutation([i for i in range(50000)])
        pickle.dump(random_index, open("random_index.pkl", 'wb'))
    
    train_index = random_index[:45000]
    validation_index = random_index[45000:]
    train_dataset = torch.utils.data.Subset(cifar100_training, train_index)
    validation_dataset = torch.utils.data.Subset(cifar100_training, validation_index)
    
    cifar100_training_loader = DataLoader(
        train_dataset, shuffle=shuffle, num_workers=num_workers, batch_size=batch_size)
    
    cifar100_validation_loader = DataLoader(
        validation_dataset, shuffle=shuffle, num_workers=num_workers, batch_size=batch_size)

    return cifar100_training_loader, cifar100_validation_loader

In [7]:
def get_training_dataloader(mean, std, batch_size=16, num_workers=2, shuffle=True):
    """ return training dataloader
    Args:
        mean: mean of cifar100 training dataset
        std: std of cifar100 training dataset
        path: path to cifar100 training python dataset
        batch_size: dataloader batchsize
        num_workers: dataloader num_works
        shuffle: whether to shuffle
    Returns: train_data_loader:torch dataloader object
    """

    transform_train = transforms.Compose([
        #transforms.ToPILImage(),
        transforms.RandomCrop(32, padding=4),
        transforms.RandomHorizontalFlip(),
        transforms.RandomRotation(15),
        transforms.ToTensor(),
        transforms.Normalize(mean, std)
    ])
    #cifar100_training = CIFAR100Train(path, transform=transform_train)
    cifar100_training = torchvision.datasets.CIFAR100(root='', train=True, download=True, transform=transform_train)
    cifar100_training_loader = DataLoader(
        cifar100_training, shuffle=shuffle, num_workers=num_workers, batch_size=batch_size)

    return cifar100_training_loader

In [8]:
def get_test_dataloader(mean, std, batch_size=16, num_workers=2, shuffle=True):
    """ return training dataloader
    Args:
        mean: mean of cifar100 test dataset
        std: std of cifar100 test dataset
        path: path to cifar100 test python dataset
        batch_size: dataloader batchsize
        num_workers: dataloader num_works
        shuffle: whether to shuffle
    Returns: cifar100_test_loader:torch dataloader object
    """

    transform_test = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize(mean, std)
    ])
    #cifar100_test = CIFAR100Test(path, transform=transform_test)
    cifar100_test = torchvision.datasets.CIFAR100(root='', train=False, download=True, transform=transform_test)
    cifar100_test_loader = DataLoader(
        cifar100_test, shuffle=shuffle, num_workers=num_workers, batch_size=batch_size)

    return cifar100_test_loader

In [9]:
class ClipWeightCallBack():
    
    def __init__(self):
        self.zeros_mask = None
    
    # on batch begin
    def get_zeros_mask(self, model):
        
        self.zeros_mask= []

        for weights_matrix in model.parameters():
            self.zeros_mask.append(torch.where(weights_matrix == 0, \
                                     torch.zeros(weights_matrix.data.shape).to(DEVICE), \
                                     torch.ones(weights_matrix.data.shape).to(DEVICE)))
    # on batch end
    def apply_zeros_mask(self, model):
        
        for index, weights_matrix in enumerate(model.parameters()):
            weights_matrix.data = weights_matrix.data * self.zeros_mask[index].to(DEVICE)

In [10]:
def train(model, epoch, train_dataloader, optimizer, loss_function, callbacks = None):

    start = time.time()
    model.to(DEVICE)
    model.train()
    # keep track of the zero mask
    if callbacks != None:
        callbacks.get_zeros_mask(model)
    
    for batch_index, (images, labels) in enumerate(train_dataloader):
        '''
        if epoch <= WARM:
            warmup_scheduler.step()
        '''
            
        if use_gpu:
            labels = labels.to(DEVICE)
            images = images.to(DEVICE)

        optimizer.zero_grad()
        outputs = model(images)
        loss = loss_function(outputs, labels)
        loss.backward()
        optimizer.step()
        if callbacks != None:
            callbacks.apply_zeros_mask(model)
            
        n_iter = (epoch - 1) * len(train_dataloader) + batch_index + 1

        last_layer = list(model.children())[-1]
        for name, para in last_layer.named_parameters():
            if 'weight' in name:
                writer.add_scalar('LastLayerGradients/grad_norm2_weights', para.grad.norm(), n_iter)
            if 'bias' in name:
                writer.add_scalar('LastLayerGradients/grad_norm2_bias', para.grad.norm(), n_iter)
                
        if batch_index % 1000 == 0:
            print('Training Epoch: {epoch} [{trained_samples}/{total_samples}]\tLoss: {:0.4f}\tLR: {:0.6f}'.format(
                loss.item(),
                optimizer.param_groups[0]['lr'],
                epoch=epoch,
                trained_samples=batch_index * BATCH_SIZE + len(images),
                total_samples=len(train_dataloader.dataset)
            ))
        

        #update training loss for each iteration
        writer.add_scalar('Train/loss', loss.item(), n_iter)

    for name, param in model.named_parameters():
        layer, attr = os.path.splitext(name)
        attr = attr[1:]
        writer.add_histogram("{}/{}".format(layer, attr), param, epoch)

    finish = time.time()

    print('epoch {} training time consumed: {:.2f}s'.format(epoch, finish - start))

In [11]:
def evaluate_model(model, val_dataloader):
    # for validation set or testing set
    start = time.time()
    model.to(DEVICE)
    model.eval()
    
    total_preds = 0
    total_corrects = 0
    
    for batch_index, (images, labels) in enumerate(val_dataloader):
        if use_gpu:
            # labels = labels.to(DEVICE)
            images = images.to(DEVICE)
            
        outputs = model(images)
        _, preds = torch.max(outputs.data, 1)
        
        
        total_preds += len(labels)
        total_corrects += np.sum(preds.cpu().numpy() == labels.numpy())
    
    print("Accuracy is {:.5f}".format(total_corrects/total_preds))
    
    return total_corrects/total_preds

In [12]:
'''
cifar100_training_loader, cifar100_validation_loader = get_training_dataloader(
    CIFAR100_TRAIN_MEAN,
    CIFAR100_TRAIN_STD,
    num_workers = 4,
    batch_size = BATCH_SIZE,
    shuffle = True
)
'''
cifar100_training_loader = get_training_dataloader(
    CIFAR100_TRAIN_MEAN,
    CIFAR100_TRAIN_STD,
    num_workers = 4,
    batch_size = BATCH_SIZE,
    shuffle = True
)


cifar100_test_loader = get_test_dataloader(
    CIFAR100_TRAIN_MEAN,
    CIFAR100_TRAIN_STD,
    num_workers = 4,
    batch_size = BATCH_SIZE,
    shuffle = True
)

Files already downloaded and verified
Files already downloaded and verified


In [13]:
def load_vgg(path):
    model = vgg.vgg16_bn()
    weights = torch.load(path)
    model.load_state_dict(weights)
    model.to(DEVICE)
    
    return model

In [14]:
writer = SummaryWriter(log_dir=os.path.join(
            'logs', 'vgg', TIME_NOW))

INFO:tensorflow:Using local port 15990
INFO:tensorflow:Using local port 18575
INFO:tensorflow:Using local port 22566
INFO:tensorflow:Using local port 21206
INFO:tensorflow:Using local port 18859
INFO:tensorflow:Using local port 20928
INFO:tensorflow:Using local port 21844
INFO:tensorflow:Using local port 21075
INFO:tensorflow:Using local port 21384
INFO:tensorflow:Using local port 21286


In [15]:
# train block #
'''
sgd_optimizer = optim.SGD(model.parameters(), lr= 0.0001, momentum=0.9, weight_decay=5e-4)
crossEntropyLoss_function = nn.CrossEntropyLoss()
train(model, train_dataloader = cifar100_training_loader, epoch = 10, optimizer = sgd_optimizer, \
                                                          loss_function = crossEntropyLoss_function)
'''

'\nsgd_optimizer = optim.SGD(model.parameters(), lr= 0.0001, momentum=0.9, weight_decay=5e-4)\ncrossEntropyLoss_function = nn.CrossEntropyLoss()\ntrain(model, train_dataloader = cifar100_training_loader, epoch = 10, optimizer = sgd_optimizer,                                                           loss_function = crossEntropyLoss_function)\n'

# Pruning

In [16]:
def model_report(model, dataloader):
    # local final score on validation data
    num_zeros = sum([(i.detach().cpu().numpy() == 0).sum() for i in model.parameters()])
    total_parameters = sum([np.prod(i.shape) for i in model.parameters()])
    accuracy = evaluate_model(model, dataloader)
    result = (accuracy + num_zeros/total_parameters)/2
    print("num_zeros / total_parameters ratio is ", num_zeros/total_parameters)
    print("accuracy is ", accuracy)
    print("overall score is ", result)
    return num_zeros/total_parameters, accuracy

In [17]:
def prune_network(model, threshold = 0.01):
    
    # vgg has classifier and features
    for weights_matrix in model.parameters():
        weights_matrix.data = torch.where(torch.abs(weights_matrix.data) >= threshold, \
                                          weights_matrix.data, torch.zeros(weights_matrix.data.shape).to(DEVICE))

In [18]:
def finetune_prune(model, rounds, epoches, train_dataloader, test_dataloader, lr = 0.00001, threshold = 0.001):
    print("Model performance at the beginning...")
    model_report(model, test_dataloader)
    sgd_optimizer = optim.SGD(model.parameters(), lr = lr, momentum = 0.9, weight_decay = 5e-4)
    crossEntropyLoss_function = nn.CrossEntropyLoss()
    prune_callback = ClipWeightCallBack()
    
    sparsity = []
    accuracy = []
    
    print("Start pruning..")
    for i in range(rounds):
        print("Round {}/{}:".format(i + 1, rounds))
        for epoch in range(1 + epoches):
            prune_network(model)
            train(model, epoch, train_dataloader, sgd_optimizer, crossEntropyLoss_function, callbacks = prune_callback)
            zeros_percentage, test_acc = model_report(model, test_dataloader)
        sparsity.append(zeros_percentage)
        accuracy.append(test_acc)
        pickle.dump([sparsity, accuracy], open("prune_hist/sparsity_accuracy.pkl", "wb"))
    
    print("Done pruning.")
    
    return sparsity, accuracy

In [19]:
model = load_vgg("checkpoints/vgg72.pth")

In [52]:
finetune_prune(model, 10, 4, cifar100_training_loader, cifar100_test_loader, lr = 0.00001, threshold = 0.01)

Pruning threshold:

Sparsity:[0.8850553143641191,
  0.8854645702199087,
  0.8858486316019958,
  0.8862241968313407,
  0.8865842396778212,
  0.8869207049654809,
  0.8872260078936021,
  0.8874967382417068,
  0.8877426562959903,
  0.8879710528726462]
  
Accuracy: [0.7039,
  0.7166,
  0.7147,
  0.7208,
  0.7244,
  0.7241,
  0.7257,
  0.7267,
  0.7249,
  0.7296]

In [49]:
torch.save(model, open("checkpoints/vgg_sparsity8897_acc7296.pth", "wb"))

  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "


In [56]:
finetune_prune(model, 10, 4, cifar100_training_loader, cifar100_test_loader, lr = 0.00001, threshold = 0.1)

Model performance at the beginning...
Accuracy is 0.72770
num_zeros / total_parameters ratio is  0.8880168556614775
accuracy is  0.7277
overall score is  0.8078584278307388
Start pruning..
Round 1/10:
Training Epoch: 0 [16/50000]	Loss: 0.3550	LR: 0.000010
Training Epoch: 0 [16016/50000]	Loss: 0.3648	LR: 0.000010
Training Epoch: 0 [32016/50000]	Loss: 0.6225	LR: 0.000010
Training Epoch: 0 [48016/50000]	Loss: 0.2215	LR: 0.000010
epoch 0 training time consumed: 473.99s
Accuracy is 0.72550
num_zeros / total_parameters ratio is  0.8880264395569583
accuracy is  0.7255
overall score is  0.8067632197784791
Training Epoch: 1 [16/50000]	Loss: 0.5021	LR: 0.000010
Training Epoch: 1 [16016/50000]	Loss: 0.5632	LR: 0.000010
Training Epoch: 1 [32016/50000]	Loss: 0.3459	LR: 0.000010
Training Epoch: 1 [48016/50000]	Loss: 0.5723	LR: 0.000010
epoch 1 training time consumed: 479.13s
Accuracy is 0.72660
num_zeros / total_parameters ratio is  0.8880666566398345
accuracy is  0.7266
overall score is  0.80733332

num_zeros / total_parameters ratio is  0.8894789876913384
accuracy is  0.7245
overall score is  0.8069894938456692
Round 9/10:
Training Epoch: 0 [16/50000]	Loss: 0.5407	LR: 0.000010
Training Epoch: 0 [16016/50000]	Loss: 0.0529	LR: 0.000010
Training Epoch: 0 [32016/50000]	Loss: 0.2671	LR: 0.000010
Training Epoch: 0 [48016/50000]	Loss: 0.3269	LR: 0.000010
epoch 0 training time consumed: 478.69s
Accuracy is 0.72410
num_zeros / total_parameters ratio is  0.8895120021533779
accuracy is  0.7241
overall score is  0.8068060010766889
Training Epoch: 1 [16/50000]	Loss: 0.4614	LR: 0.000010
Training Epoch: 1 [16016/50000]	Loss: 0.1238	LR: 0.000010
Training Epoch: 1 [32016/50000]	Loss: 0.3913	LR: 0.000010
Training Epoch: 1 [48016/50000]	Loss: 0.2598	LR: 0.000010
epoch 1 training time consumed: 478.80s
Accuracy is 0.72610
num_zeros / total_parameters ratio is  0.8895452812014889
accuracy is  0.7261
overall score is  0.8078226406007445
Training Epoch: 2 [16/50000]	Loss: 0.2361	LR: 0.000010
Training E

([0.8881966565963247,
  0.8883970658463008,
  0.8885877148100819,
  0.8887782167816008,
  0.8889578413257337,
  0.8891400823321298,
  0.8893094174179245,
  0.8894789876913384,
  0.8896440600015357,
  0.8898016945032773],
 [0.7261,
  0.7237,
  0.7263,
  0.7263,
  0.7245,
  0.7297,
  0.7267,
  0.7245,
  0.7257,
  0.7249])

In [20]:
finetune_prune(model, 10, 4, cifar100_training_loader, cifar100_test_loader, lr = 0.00001, threshold = 0.3)

Model performance at the beginning...
Accuracy is 0.72010
num_zeros / total_parameters ratio is  0.0
accuracy is  0.7201
overall score is  0.36005
Start pruning..
Round 1/10:
Training Epoch: 0 [16/50000]	Loss: 3.8473	LR: 0.000010
Training Epoch: 0 [16016/50000]	Loss: 3.6601	LR: 0.000010
Training Epoch: 0 [32016/50000]	Loss: 3.7558	LR: 0.000010
Training Epoch: 0 [48016/50000]	Loss: 3.6114	LR: 0.000010
epoch 0 training time consumed: 459.04s
Accuracy is 0.66020
num_zeros / total_parameters ratio is  0.8846256559823675
accuracy is  0.6602
overall score is  0.7724128279911837
Training Epoch: 1 [16/50000]	Loss: 3.6772	LR: 0.000010
Training Epoch: 1 [16016/50000]	Loss: 3.7415	LR: 0.000010
Training Epoch: 1 [32016/50000]	Loss: 3.6665	LR: 0.000010
Training Epoch: 1 [48016/50000]	Loss: 3.6257	LR: 0.000010
epoch 1 training time consumed: 466.05s
Accuracy is 0.67870
num_zeros / total_parameters ratio is  0.8847516577493321
accuracy is  0.6787
overall score is  0.781725828874666
Training Epoch: 2 

num_zeros / total_parameters ratio is  0.8862214039783632
accuracy is  0.72
overall score is  0.8031107019891817
Round 5/10:
Training Epoch: 0 [16/50000]	Loss: 1.8818	LR: 0.000010
Training Epoch: 0 [16016/50000]	Loss: 2.1643	LR: 0.000010
Training Epoch: 0 [32016/50000]	Loss: 1.8017	LR: 0.000010
Training Epoch: 0 [48016/50000]	Loss: 1.7938	LR: 0.000010
epoch 0 training time consumed: 465.68s
Accuracy is 0.72340
num_zeros / total_parameters ratio is  0.8862974871731613
accuracy is  0.7234
overall score is  0.8048487435865807
Training Epoch: 1 [16/50000]	Loss: 1.9446	LR: 0.000010
Training Epoch: 1 [16016/50000]	Loss: 2.4282	LR: 0.000010
Training Epoch: 1 [32016/50000]	Loss: 2.1207	LR: 0.000010
Training Epoch: 1 [48016/50000]	Loss: 1.8270	LR: 0.000010
epoch 1 training time consumed: 465.85s
Accuracy is 0.72300
num_zeros / total_parameters ratio is  0.8863725414221254
accuracy is  0.723
overall score is  0.8046862707110627
Training Epoch: 2 [16/50000]	Loss: 1.8197	LR: 0.000010
Training Epoc

accuracy is  0.7283
overall score is  0.8078971343858528
Round 9/10:
Training Epoch: 0 [16/50000]	Loss: 0.5926	LR: 0.000010
Training Epoch: 0 [16016/50000]	Loss: 0.5832	LR: 0.000010
Training Epoch: 0 [32016/50000]	Loss: 1.1431	LR: 0.000010
Training Epoch: 0 [48016/50000]	Loss: 0.5699	LR: 0.000010
epoch 0 training time consumed: 472.30s
Accuracy is 0.72700
num_zeros / total_parameters ratio is  0.8875436875701814
accuracy is  0.727
overall score is  0.8072718437850908
Training Epoch: 1 [16/50000]	Loss: 0.5999	LR: 0.000010
Training Epoch: 1 [16016/50000]	Loss: 0.4665	LR: 0.000010
Training Epoch: 1 [32016/50000]	Loss: 0.6951	LR: 0.000010
Training Epoch: 1 [48016/50000]	Loss: 0.4735	LR: 0.000010
epoch 1 training time consumed: 471.41s
Accuracy is 0.72800
num_zeros / total_parameters ratio is  0.8875916070475851
accuracy is  0.728
overall score is  0.8077958035237925
Training Epoch: 2 [16/50000]	Loss: 0.5053	LR: 0.000010
Training Epoch: 2 [16016/50000]	Loss: 0.6837	LR: 0.000010
Training Epo

([0.8850564903022149,
  0.8854681568311008,
  0.8858511892673541,
  0.8862214039783632,
  0.8865832401304398,
  0.8869235272169108,
  0.8872239500019344,
  0.8874942687717056,
  0.8877407159981321,
  0.8879726991859803],
 [0.7013, 0.7152, 0.7165, 0.72, 0.7217, 0.722, 0.7277, 0.7283, 0.7258, 0.7242])

In [21]:
torch.save(model, open("checkpoints/vgg_sparsity8897_acc7242_3e-1.pth", "wb"))

  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "


In [None]:
finetune_prune(model, 10, 4, cifar100_training_loader, cifar100_test_loader, lr = 0.00001, threshold = 0.5)

Model performance at the beginning...
Accuracy is 0.72420
num_zeros / total_parameters ratio is  0.8879726991859803
accuracy is  0.7242
overall score is  0.8060863495929902
Start pruning..
Round 1/10:
Training Epoch: 0 [16/50000]	Loss: 0.3515	LR: 0.000010
Training Epoch: 0 [16016/50000]	Loss: 0.8406	LR: 0.000010
Training Epoch: 0 [32016/50000]	Loss: 0.5008	LR: 0.000010
Training Epoch: 0 [48016/50000]	Loss: 0.5588	LR: 0.000010
epoch 0 training time consumed: 461.21s
Accuracy is 0.72500
num_zeros / total_parameters ratio is  0.8880180903964782
accuracy is  0.725
overall score is  0.8065090451982391
Training Epoch: 1 [16/50000]	Loss: 0.2265	LR: 0.000010
Training Epoch: 1 [16016/50000]	Loss: 0.2165	LR: 0.000010
Training Epoch: 1 [32016/50000]	Loss: 0.8522	LR: 0.000010
Training Epoch: 1 [48016/50000]	Loss: 0.5311	LR: 0.000010
epoch 1 training time consumed: 472.91s
Accuracy is 0.72510
num_zeros / total_parameters ratio is  0.8880603359725696
accuracy is  0.7251
overall score is  0.806580167

# Huffman Coding

In [67]:
from dahuffman import *

In [68]:
from dahuffman import HuffmanCodec

In [69]:
codec = HuffmanCodec.from_frequencies({'e': 100, 'n':20, 'x':1, 'i': 40, 'q':3})

In [71]:
encoded

b'\x86|%\x13i@'

In [72]:
codec.decode(encoded)

'exeneeeexniqneieini'

In [74]:
from scipy import sparse

In [76]:
sparse.csr_matrix([[1,3,0], [2,3,0], [0, 0, 2]])

<3x3 sparse matrix of type '<class 'numpy.intc'>'
	with 5 stored elements in Compressed Sparse Row format>

In [79]:
temp_sparse_weight = model.classifier[0].weight

In [84]:
sp_mt = sparse.csr_matrix(temp_sparse_weight.detach().cpu().numpy())

In [88]:
import sys

In [89]:
sys.getsizeof(temp_sparse_weight)

72

In [91]:
sys.getsizeof(sp_mt)

56

In [92]:
print(sp_mt)

  (0, 4)	0.023186345
  (0, 20)	0.02247533
  (0, 21)	0.034118313
  (0, 26)	0.014310435
  (0, 32)	0.022853471
  (0, 36)	0.030668179
  (0, 37)	-0.041748084
  (0, 39)	0.042770255
  (0, 45)	0.027494656
  (0, 46)	-0.022704057
  (0, 47)	0.028912688
  (0, 50)	0.02269156
  (0, 59)	0.01317652
  (0, 65)	0.026181713
  (0, 75)	0.021841025
  (0, 76)	-0.030036248
  (0, 83)	0.04323151
  (0, 88)	-0.023606148
  (0, 98)	0.021143986
  (0, 104)	0.020611063
  (0, 108)	0.053362876
  (0, 117)	0.019666748
  (0, 120)	0.021380028
  (0, 130)	0.02132387
  (0, 132)	0.022016082
  :	:
  (4095, 363)	-0.013786285
  (4095, 364)	0.025842268
  (4095, 365)	0.022444727
  (4095, 377)	0.043030553
  (4095, 385)	0.014561667
  (4095, 388)	0.017477114
  (4095, 406)	0.020886062
  (4095, 413)	0.022315985
  (4095, 420)	-0.012988817
  (4095, 435)	0.013630478
  (4095, 436)	0.019695008
  (4095, 439)	0.019828018
  (4095, 455)	0.016520282
  (4095, 456)	0.0142295435
  (4095, 461)	0.016456155
  (4095, 465)	0.028237121
  (4095, 474)	-0.0204

In [95]:
from modules.coding import Codec

ImportError: No module named 'bitarray'