In [1]:
import dagshub
dagshub.init(repo_owner='leocus4', repo_name='TinyFFF', mlflow=True)

In [2]:
# IMPORT FOR COMPRESSION
import sys
sys.path.insert(0, './Compression-Fast-Inf-FFF/')
from sparsecompFFF import compress_FF_models, print_size_model
from save_torch_model_to_c import main

In [3]:
import torch
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Training on {DEVICE}")

Training on cuda


In [4]:
import torch
import torchvision
import numpy as np
import matplotlib.pyplot as plt
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision.datasets import CIFAR10
from torchvision.transforms import ToTensor
from torch.utils.data.dataloader import DataLoader
from torch.utils.data import random_split
from sklearn.metrics import confusion_matrix, classification_report, ConfusionMatrixDisplay

In [5]:
import torch
import torchvision
import numpy as np
import matplotlib.pyplot as plt
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision.datasets import CIFAR10
from torchvision.transforms import ToTensor
from torch.utils.data.dataloader import DataLoader
from torch.utils.data import random_split
from sklearn.metrics import confusion_matrix, classification_report, ConfusionMatrixDisplay

In [6]:
from fastfeedforward import FFF

def train(net, trainloader, epochs, norm_weight=0.0):
    """Train the network on the training set."""
    # Define loss and optimizer
    criterion = torch.nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(net.parameters(), lr=0.001)

    # Train the network for the given number of epochs
    for _ in range(epochs):
        # Iterate over data
        for images, labels in trainloader:
            images, labels = images.to(DEVICE), labels.to(DEVICE)
            optimizer.zero_grad()
            loss = criterion(net(images), labels)
            if norm_weight != 0:
                loss += norm_weight * net.fff.w1s.pow(2).sum()
                loss += norm_weight * net.fff.w2s.pow(2).sum()
            loss.backward()
            optimizer.step()


def test(net, testloader):
    """Validate the network on the entire test set."""
    # Define loss and optimizer
    criterion = torch.nn.CrossEntropyLoss()
    correct, total, loss = 0, 0, 0.0
    # Train the network for the given number of epochs
    with torch.no_grad():
        # Iterate over data
        for data in testloader:
            images, labels = data[0].to(DEVICE), data[1].to(DEVICE)
            outputs = net(images)
            loss += criterion(outputs, labels).item()
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    accuracy = correct / total
    return loss, accuracy


class Net(torch.nn.Module):
    def __init__(self, input_width, leaf_width, output_width, depth, dropout, region_leak):
        super(Net, self).__init__()
        self.fff = FFF(input_width, leaf_width, output_width, depth, torch.nn.ReLU(), dropout, train_hardened=True, region_leak=region_leak)

    def forward(self, x):
        x = x.view(len(x), -1)
        x = self.fff(x)
        x = torch.nn.functional.softmax(x, -1)
        return x

    def parameters(self):
        return self.fff.parameters()


class FF(torch.nn.Module):
    def __init__(self, input_width, layer_width, output_width):
        super(FF, self).__init__()
        self.fc1 = torch.nn.Linear(input_width, layer_width)
        self.fc2 = torch.nn.Linear(layer_width, output_width)

    def forward(self, x):
        x = x.view(len(x), -1)
        x = torch.nn.functional.relu(self.fc1(x))
        x = torch.nn.functional.softmax(self.fc2(x), -1)
        return x

    def parameters(self):
        return [*self.fc1.parameters(), *self.fc2.parameters()]


def compute_n_params(input_width: int, l_w: int, depth: int, output_width: int):
    fff = Net(input_width, l_w, output_width, depth, 0, 0)
    ff = FF(input_width, l_w, output_width)

    n_ff = 0
    n_fff = 0
    for p in ff.parameters():
        n_ff += p.numel()
    for i, p in enumerate(fff.parameters()):
        print(f"[{i}-th layer]: {p.shape}")
        n_fff += p.numel()

    print(f"FFF: {n_fff}\nFF: {n_ff}")

In [7]:
import pickle
import mlflow
import numpy as np
import pandas as pd
from time import time
from matplotlib import pyplot as plt

def get_dist(net, testloader):
    """
    Returns the distribution of samples throughout the tree.
    """

    y = []
    l = []
    with torch.no_grad():
        # Iterate over data
        for data in testloader:
            images, labels = data[0].to(DEVICE), data[1].to(DEVICE)
            outputs, leaves = net.forward(images, return_nodes=True)
            y.append(labels)
            l.append(leaves)
    y = torch.concat(y, 0)
    l = torch.concat(l, 0)
    return y, l


class FFFWrapper(torch.nn.Module):
    def __init__(self, fff):
        super(FFFWrapper, self).__init__()
        self._fff = fff
        self._fastinference = [None for i in range(2 ** (self._fff.fff.depth.item()))]

    def forward(self, x, return_nodes=False):
        """
        Override the forward method in order to log the data distribution.
        """
        x = x.view(len(x), -1)
        original_shape = x.shape
        batch_size = x.shape[0]
        last_node = torch.zeros(len(x))

        current_nodes = torch.zeros((batch_size,), dtype=torch.long, device=x.device)
        for i in range(self._fff.fff.depth.item()):
            plane_coeffs = self._fff.fff.node_weights.index_select(dim=0, index=current_nodes)
            plane_offsets = self._fff.fff.node_biases.index_select(dim=0, index=current_nodes)
            plane_coeff_score = torch.bmm(x.unsqueeze(1), plane_coeffs.unsqueeze(-1))
            plane_score = plane_coeff_score.squeeze(-1) + plane_offsets
            plane_choices = (plane_score.squeeze(-1) >= 0).long()

            platform = torch.tensor(2 ** i - 1, dtype=torch.long, device=x.device)
            next_platform = torch.tensor(2 ** (i+1) - 1, dtype=torch.long, device=x.device)
            current_nodes = (current_nodes - platform) * 2 + plane_choices + next_platform

        leaves = current_nodes - next_platform
        new_logits = torch.empty((batch_size, self._fff.fff.output_width), dtype=torch.float, device=x.device)
        last_node = leaves

        for i in range(leaves.shape[0]):
            leaf_index = leaves[i]
            if self._fastinference[leaf_index] is not None:
                new_logits[i] = self._fastinference[leaf_index]
            else:
                logits = torch.matmul( x[i].unsqueeze(0), self._fff.fff.w1s[leaf_index])
                logits += self._fff.fff.b1s[leaf_index].unsqueeze(-2)
                activations = self._fff.fff.activation(logits)
                new_logits[i] = torch.matmul( activations, self._fff.fff.w2s[leaf_index]).squeeze(-2)

        if return_nodes:
            return new_logits.view(*original_shape[:-1], self._fff.fff.output_width), last_node
        return new_logits.view(*original_shape[:-1], self._fff.fff.output_width)


    def simplify_leaves(self, trainloader):
        y, leaves = (get_dist(self, trainloader))
        y = y.cpu().detach().numpy()
        outputs = y.max() + 1
        leaves = leaves.cpu().detach().numpy()

        n_simplifications = 0
        ratios = {}
        for l in np.unique(leaves):
            ratios[l] = torch.zeros(outputs)
            indices = leaves == l

            for i in range(outputs):
                ratios[l][i] = (np.sum(y[indices] == i) / np.sum(indices))

            argmax = np.argmax(ratios[l])
            if ratios[l][argmax] > 0.7:
                output = torch.zeros(outputs)
                output[argmax] = 1
                self._fastinference[l] = output
                n_simplifications += 1
                print(f"Leaf {l} has been replaced with {argmax}")
        print(self._fastinference)

In [8]:
class FFFWrapper(torch.nn.Module):
    def __init__(self, fff):
        super(FFFWrapper, self).__init__()
        self._fff = fff
        self._fastinference = [None for i in range(2 ** (self._fff.fff.depth.item()))]

    def forward(self, x, return_nodes=False):
        """
        Override the forward method in order to log the data distribution.
        """
        x = x.view(len(x), -1)
        original_shape = x.shape
        batch_size = x.shape[0]
        last_node = torch.zeros(len(x))

        current_nodes = torch.zeros((batch_size,), dtype=torch.long, device=x.device)
        for i in range(self._fff.fff.depth.item()):
            plane_coeffs = self._fff.fff.node_weights.index_select(dim=0, index=current_nodes)
            plane_offsets = self._fff.fff.node_biases.index_select(dim=0, index=current_nodes)
            plane_coeff_score = torch.bmm(x.unsqueeze(1), plane_coeffs.unsqueeze(-1))
            plane_score = plane_coeff_score.squeeze(-1) + plane_offsets
            plane_choices = (plane_score.squeeze(-1) >= 0).long()

            platform = torch.tensor(2 ** i - 1, dtype=torch.long, device=x.device)
            next_platform = torch.tensor(2 ** (i+1) - 1, dtype=torch.long, device=x.device)
            current_nodes = (current_nodes - platform) * 2 + plane_choices + next_platform

        leaves = current_nodes - next_platform
        new_logits = torch.empty((batch_size, self._fff.fff.output_width), dtype=torch.float, device=x.device)
        last_node = leaves

        for i in range(leaves.shape[0]):
            leaf_index = leaves[i]
            if self._fastinference[leaf_index] is not None:
                new_logits[i] = self._fastinference[leaf_index]
            else:
                logits = torch.matmul( x[i].unsqueeze(0), self._fff.fff.w1s[leaf_index])
                logits += self._fff.fff.b1s[leaf_index].unsqueeze(-2)
                activations = self._fff.fff.activation(logits)
                new_logits[i] = torch.matmul( activations, self._fff.fff.w2s[leaf_index]).squeeze(-2)

        if return_nodes:
            return new_logits.view(*original_shape[:-1], self._fff.fff.output_width), last_node
        return new_logits.view(*original_shape[:-1], self._fff.fff.output_width)


    def simplify_leaves(self, trainloader):
        y, leaves = (get_dist(self, trainloader))
        y = y.cpu().detach().numpy()
        outputs = y.max() + 1
        leaves = leaves.cpu().detach().numpy()

        n_simplifications = 0
        ratios = {}
        for l in np.unique(leaves):
            ratios[l] = torch.zeros(outputs)
            indices = leaves == l

            for i in range(outputs):
                ratios[l][i] = (np.sum(y[indices] == i) / np.sum(indices))

            argmax = np.argmax(ratios[l])
            if ratios[l][argmax] > 0.7:
                output = torch.zeros(outputs)
                output[argmax] = 1
                self._fastinference[l] = output
                n_simplifications += 1
                print(f"Leaf {l} has been replaced with {argmax}")
        print(self._fastinference)
    
    def simplify_leaves_full(self, trainloader):
        y, leaves = (get_dist(self, trainloader))
        y = y.cpu().detach().numpy()
        outputs = y.max() + 1
        leaves = leaves.cpu().detach().numpy()

        n_simplifications = 0
        ratios = {}
        for l in np.unique(leaves):
            ratios[l] = torch.zeros(outputs)
            indices = leaves == l

            for i in range(outputs):
                ratios[l][i] = (np.sum(y[indices] == i) / np.sum(indices))

            argmax = np.argmax(ratios[l])
            if ratios[l][argmax] >= 0:
                output = torch.zeros(outputs)
                output[argmax] = 1
                self._fastinference[l] = output
                n_simplifications += 1
                print(f"Leaf {l} has been replaced with {argmax}")
        print(self._fastinference)

In [9]:
from tqdm import trange
from torchvision.datasets import MNIST
from torch.utils.data import DataLoader
import torchvision.transforms as transforms

"""Load CIFAR-10 (training and test set)."""
transform = transforms.Compose(
    [
        transforms.ToTensor(),
        transforms.Normalize((0.1307,), (0.3081,)),
    ]
)
trainset = MNIST("../data", train=True,  download=True, transform=transform)
testset = MNIST("../data",  train=False, download=True, transform=transform)

# Select class to keep 
trainloader = DataLoader(trainset, batch_size=1024, shuffle=True)
testloader = DataLoader(testset, batch_size=1024)

num_examples = {"trainset" : len(trainset), "testset" : len(testset)}

print(num_examples)

{'trainset': 60000, 'testset': 10000}


In [10]:
# Training variable
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
learning_rate = 0.001
num_epochs = 7
criterion = nn.CrossEntropyLoss()

# Dataset
batch_size = 1024
val_size = 5000
train_size = len(trainset) - val_size
train_ds, val_ds = random_split(trainset, [train_size, val_size])
train_loader = DataLoader(train_ds, batch_size, shuffle=True, num_workers=4)
val_loader = DataLoader(val_ds, batch_size, num_workers=4)
test_loader = DataLoader(testset, batch_size, num_workers=4)

In [11]:
list_of_run = [
'27f4eafb191340f592dfab6992d3700d',
]

# mlflow.artifacts.download_artifacts(run_id=run_id, dst_path=".")
# wrapped_model = pickle.load(open("./truncated_model.pkl", "rb"))
# wrapped_model._fff.fff.depth.item()
# wrapped_model._fff.fff.input_width
# wrapped_model._fff.fff.leaf_width
# wrapped_model._fff.fff.output_width

In [14]:
result = []
for i in range (0,len(list_of_run)):
    run_id = list_of_run[i]
    
    run = mlflow.get_run(run_id)
    starting_run_id = run.data.params['starting_run']
    starting_run = mlflow.get_run(starting_run_id)
    norm_weight = starting_run.data.params['norm_weight']
    #print(norm_weight)
    
    mlflow.artifacts.download_artifacts(run_id=run_id, dst_path="./baselines")
    wrapped_model = pickle.load(open("./baselines/truncated_model.pkl", "rb"))
    depth = wrapped_model._fff.fff.depth.item()
    input_width = wrapped_model._fff.fff.input_width
    leaf_width = wrapped_model._fff.fff.leaf_width
    output_width = wrapped_model._fff.fff.output_width
    buffer_size = 2*(leaf_width + output_width + 3)
    print("Run:\t", run_id)
    print("Depth:\t", depth)
    print("Input:\t", input_width)
    print("Output:\t", output_width)
    print("Leaf:\t", leaf_width)
    print("Buffer:\t", buffer_size)
    
    # to reduce the sparsity and train only below a certain tresholds
    list_of_sizes = [100, 90, 80, 70, 60, 50]
    checked_sizes = [False for x in list_of_sizes]
    current_size_index = 0
    
    start = 0.5
    a = start
    b = start
    sizes=[]
    before_trunc_sizes=[]
    trunc_sizes=[]

    model = wrapped_model.to(device)
    
    layers_list = []
    for i, (name, p) in enumerate(model.named_parameters()):
        if (len(list(p.shape)) > 1 and p.requires_grad):
            layers_list.append(p)
    un, comp, layers = print_size_model(model, layers_list, [1,1,1,1,1,1])
    
    result.append({'run_id': run_id, 'depth': depth,'input_width':input_width, 
                   'output': output_width, 'leaf_width':leaf_width,
                   'buffer_size': buffer_size, 'sizes': un})

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Run:	 27f4eafb191340f592dfab6992d3700d
Depth:	 3
Input:	 784
Output:	 10
Leaf:	 4
Buffer:	 34


In [15]:
for r in result:
    print(r)

{'run_id': '27f4eafb191340f592dfab6992d3700d', 'depth': 3, 'input_width': 784, 'output': 10, 'leaf_width': 4, 'buffer_size': 34, 'sizes': 124068}


In [17]:
compress_FF_models(model, 50, train_loader, test_loader, val_loader=val_loader, num_epochs=8)

Target size requested: 50 KB
Starting Density of model's parameters: [1, 1, 1, 1, 1, 1]
Starting size of the model: 124.068 KB
1 iteration -  Size: 114064.8 [1, 1, 0.45, 1, 1, 1]
2 iteration -  Size: 105033.12000000001 [1, 1, 0.405, 1, 1, 1]
3 iteration -  Size: 96904.60800000001 [1, 1, 0.36450000000000005, 1, 1, 1]
4 iteration -  Size: 89588.94720000001 [1, 1, 0.32805000000000006, 1, 1, 1]
5 iteration -  Size: 83004.85248 [1, 1, 0.29524500000000004, 1, 1, 1]
6 iteration -  Size: 77079.167232 [1, 1, 0.2657205, 1, 1, 1]
7 iteration -  Size: 71746.05050879999 [1, 1, 0.23914845, 1, 1, 1]
8 iteration -  Size: 66946.24545792 [1, 1, 0.215233605, 1, 1, 1]
9 iteration -  Size: 62626.420912128 [1, 1, 0.1937102445, 1, 1, 1]
10 iteration -  Size: 58738.5788209152 [1, 1, 0.17433922005, 1, 1, 1]
11 iteration -  Size: 55239.52093882368 [1, 1, 0.156905298045, 1, 1, 1]
12 iteration -  Size: 52090.36884494132 [1, 1, 0.1412147682405, 1, 1, 1]
Disabling: _fff.fff.depth
Disabling: _fff.fff.node_weights
Di

49256.13196044718

In [18]:
compress_FF_models(model, 40, train_loader, test_loader, val_loader=val_loader, fastInfLoss=True, fastInfNormWeight=0.0001, num_epochs=8, device=device)

Target size requested: 40 KB
Starting Density of model's parameters: [1, 1, 0.12711256377551017, 1, 1, 1]
Starting size of the model: 49.227999999999994 KB
1 iteration -  Size: 46708.79999999999 [1, 1, 0.11440130739795915, 1, 1, 1]
2 iteration -  Size: 44412.719999999994 [1, 1, 0.10296117665816323, 1, 1, 1]
3 iteration -  Size: 42346.24799999999 [1, 1, 0.09266505899234691, 1, 1, 1]
4 iteration -  Size: 40486.42319999999 [1, 1, 0.08339855309311221, 1, 1, 1]
Disabling: _fff.fff.depth
Disabling: _fff.fff.node_weights
Disabling: _fff.fff.node_biases
Disabling: _fff.fff.w1s
Disabling: _fff.fff.b1s
Disabling: _fff.fff.w2s
Disabling: _fff.fff.b2s
Activating: torch.Size([7, 784])
Activating: torch.Size([7, 1])
Activating: torch.Size([8, 784, 4])
Activating: torch.Size([8, 4])
Activating: torch.Size([8, 4, 10])
Activating: torch.Size([8, 10])
Epoch [1/8], Step[54/54], Loss: 0.6405
Epoch[1]: v_loss: 0.6699 v_acc: 86.45
Epoch [2/8], Step[54/54], Loss: 0.7913
Epoch[2]: v_loss: 0.81798 v_acc: 79.11

38812.580879999994

In [22]:
original_fastinference = str([-1 if x is None else int(x.argmax()) for x in model._fastinference])
main(model, "model_to_c", original_fastinference)