In [1]:
from cnns_classes import t1_quantizedCNN, t2_quantizedCNN

In [2]:
#https://www.kaggle.com/code/vmarkin/advatt
# carregar as bibliotecas básicas necessárias
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import torch.nn.functional as F
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms as T
from tqdm import tqdm
from sklearn.metrics import accuracy_score

# gerar os gráficos logo abaixo dos comandos de plot
%matplotlib inline
torch.manual_seed(4)

<torch._C.Generator at 0x7fc65c330290>

In [3]:
train_qtdy = 324000
test_qtdy = 81000
batch_size = 512
npy_data_size = 1000 #for simulation

train_data_path = './dataset/X_train_sat6.csv'
train_label_path = './dataset/y_train_sat6.csv'
test_data_path = './dataset/X_test_sat6.csv'
test_label_path = './dataset/y_test_sat6.csv'

t1_quantizations=[2,4,8]
t2_quantizations=[2,4,8]

device = torch.device('cpu')

In [4]:
def data_read(data_path, nrows):
    data=pd.read_csv(data_path, header=None, nrows=nrows, dtype=np.uint8)
    data=data.values ## converting the data into numpy array
    return data

In [5]:
class SatImgDataset(Dataset):
    def __init__(self, X, y):
        self.X = X
        self.y = y
        self.transform = T.ToTensor()
    def __len__(self):
        return len(self.y)
    
    def __getitem__(self, index):
        x = self.transform(self.X[index])
        y = torch.FloatTensor(self.y[index])
        return {'x':x, 'y':y}

In [6]:
train_data=data_read(train_data_path, nrows=train_qtdy)
print("Train data shape:" + str(train_data.shape))

##Read training data labels
train_data_label=data_read(train_label_path,nrows=train_qtdy)
print("Train data label shape:" + str(train_data_label.shape))

##Read test data
test_data=data_read(test_data_path, nrows=test_qtdy)
print("Test data shape:" + str(test_data.shape))

##Read test data labels
test_data_label=data_read(test_label_path,nrows=test_qtdy)
print("Test data label shape:" + str(test_data_label.shape))

train_data_reshaped = train_data.reshape(train_qtdy,28,28,4)
test_data_reshaped = test_data.reshape(test_qtdy,28,28,4) 

final_train_data = np.zeros((train_qtdy, 32, 32, 4),dtype=np.float32)
final_train_data[:, :28, :28, :] = train_data_reshaped;

final_test_data = np.zeros((test_qtdy, 32, 32, 4),dtype=np.float32)
final_test_data[:, :28, :28, :] = test_data_reshaped;

output_tensor = []

for label in test_data_label:
    output_tensor.append(label.argmax())

input_tensor = torch.from_numpy(final_test_data[:npy_data_size])
output_tensor = torch.Tensor(output_tensor[:npy_data_size])

np.save("input.npy", input_tensor)
np.save("expected_output.npy", output_tensor)

dataset_train = SatImgDataset(final_train_data, train_data_label)
dataset_test = SatImgDataset(final_test_data, test_data_label)

loader_train = DataLoader(dataset_train, batch_size, shuffle=True)
loader_test = DataLoader(dataset_test, batch_size, shuffle=False)

Train data shape:(324000, 3136)
Train data label shape:(324000, 6)
Test data shape:(81000, 3136)
Test data label shape:(81000, 6)


In [7]:
def get_model_output_filename (topology,quant):
    return f"./pytorch_models/sat6-cnn-t{topology}w{quant}.pt"

In [8]:
def train_model(epochs,lr,topology,topology_class,quant):
    print(f"training t{topology}w{quant}")
    model = topology_class(bit_quantization=quant)
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)
    epochs = epochs
    criterion = nn.CrossEntropyLoss()
    model.train()
    i=0
    for e in range(epochs):
        if i % 5 == 0:
            print(f"epoch ({i})") 
        for batch in tqdm(loader_train):
            pred = model(batch['x'].to(device))
            loss = criterion(pred, batch['y'].to(device))
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
        i+=1

    print("saving the pytorch model...")    
    torch.save(model.state_dict(), get_model_output_filename(topology=topology,quant=quant))
    print("finishing training")
    return get_model_output_filename(topology=topology,quant=quant)

In [9]:
def test_model(topology,topology_class,model_filename,quant):
    print(f"testing t{topology}w{quant}")
    model = topology_class(bit_quantization=quant)
    model.load_state_dict(torch.load(model_filename))
    with torch.no_grad():
        correct = 0
        i=0
        for batch in loader_test:
            pred = model(batch['x'].to(device))
            predicted = torch.max(pred, 1)[1]
            real_class = torch.max(batch['y'].to(device), 1)[1]
            correct += (predicted == real_class).sum()    
    accuracy = correct.item()/len(dataset_test)*100
    print(f"accuracy of this model: {accuracy}% ({len(dataset_test)} test cases)")
    print(f"finishing testing")

In [10]:
def make_model(topology_class,topology,quant):
    print(f"starting to generate pytorch model t{topology}w{quant}")
    model_filename = train_model(epochs=30,topology=topology,lr=3e-4,topology_class=topology_class,quant=quant)
    test_model(topology=topology,topology_class=topology_class,model_filename=model_filename,quant=quant)
    print(f"finishing model generate")
    print(f"------------------------")

In [11]:
for quant in t1_quantizations:
    make_model(topology_class=t1_quantizedCNN,topology=1,quant=quant)

for quant in t2_quantizations:
    make_model(topology_class=t2_quantizedCNN,topology=2,quant=quant)

starting to generate pytorch model t1w2
training t1w2
epoch (0)


100%|█████████████████████████████████████████| 633/633 [01:08<00:00,  9.22it/s]
100%|█████████████████████████████████████████| 633/633 [00:54<00:00, 11.55it/s]
100%|█████████████████████████████████████████| 633/633 [00:57<00:00, 10.93it/s]
100%|█████████████████████████████████████████| 633/633 [00:55<00:00, 11.47it/s]
100%|█████████████████████████████████████████| 633/633 [00:54<00:00, 11.57it/s]


epoch (5)


100%|█████████████████████████████████████████| 633/633 [00:55<00:00, 11.43it/s]
100%|█████████████████████████████████████████| 633/633 [00:57<00:00, 11.06it/s]
100%|█████████████████████████████████████████| 633/633 [00:53<00:00, 11.78it/s]
100%|█████████████████████████████████████████| 633/633 [00:53<00:00, 11.86it/s]
100%|█████████████████████████████████████████| 633/633 [00:56<00:00, 11.30it/s]


epoch (10)


100%|█████████████████████████████████████████| 633/633 [00:57<00:00, 11.08it/s]
100%|█████████████████████████████████████████| 633/633 [00:58<00:00, 10.89it/s]
100%|█████████████████████████████████████████| 633/633 [00:57<00:00, 10.98it/s]
100%|█████████████████████████████████████████| 633/633 [01:09<00:00,  9.05it/s]
100%|█████████████████████████████████████████| 633/633 [01:07<00:00,  9.45it/s]


epoch (15)


100%|█████████████████████████████████████████| 633/633 [01:00<00:00, 10.51it/s]
100%|█████████████████████████████████████████| 633/633 [00:41<00:00, 15.15it/s]
100%|█████████████████████████████████████████| 633/633 [00:41<00:00, 15.11it/s]
100%|█████████████████████████████████████████| 633/633 [00:40<00:00, 15.45it/s]
100%|█████████████████████████████████████████| 633/633 [00:40<00:00, 15.44it/s]


epoch (20)


100%|█████████████████████████████████████████| 633/633 [00:40<00:00, 15.54it/s]
100%|█████████████████████████████████████████| 633/633 [00:41<00:00, 15.14it/s]
100%|█████████████████████████████████████████| 633/633 [00:41<00:00, 15.34it/s]
100%|█████████████████████████████████████████| 633/633 [00:40<00:00, 15.47it/s]
100%|█████████████████████████████████████████| 633/633 [00:40<00:00, 15.48it/s]


epoch (25)


100%|█████████████████████████████████████████| 633/633 [00:41<00:00, 15.21it/s]
100%|█████████████████████████████████████████| 633/633 [00:41<00:00, 15.34it/s]
100%|█████████████████████████████████████████| 633/633 [00:41<00:00, 15.44it/s]
100%|█████████████████████████████████████████| 633/633 [00:41<00:00, 15.44it/s]
100%|█████████████████████████████████████████| 633/633 [00:41<00:00, 15.37it/s]


saving the pytorch model...
finishing training
testing t1w2
accuracy of this model: 4.5851851851851855% (81000 test cases)
finishing testing
finishing model generate
------------------------
starting to generate pytorch model t1w4
training t1w4
epoch (0)


100%|█████████████████████████████████████████| 633/633 [00:50<00:00, 12.59it/s]
100%|█████████████████████████████████████████| 633/633 [00:41<00:00, 15.26it/s]
100%|█████████████████████████████████████████| 633/633 [00:40<00:00, 15.58it/s]
100%|█████████████████████████████████████████| 633/633 [00:41<00:00, 15.43it/s]
100%|█████████████████████████████████████████| 633/633 [00:43<00:00, 14.65it/s]


epoch (5)


100%|█████████████████████████████████████████| 633/633 [00:41<00:00, 15.09it/s]
100%|█████████████████████████████████████████| 633/633 [00:41<00:00, 15.30it/s]
100%|█████████████████████████████████████████| 633/633 [00:43<00:00, 14.59it/s]
100%|█████████████████████████████████████████| 633/633 [00:41<00:00, 15.27it/s]
100%|█████████████████████████████████████████| 633/633 [00:42<00:00, 14.86it/s]


epoch (10)


100%|█████████████████████████████████████████| 633/633 [00:41<00:00, 15.11it/s]
100%|█████████████████████████████████████████| 633/633 [00:42<00:00, 15.04it/s]
100%|█████████████████████████████████████████| 633/633 [00:42<00:00, 15.01it/s]
100%|█████████████████████████████████████████| 633/633 [00:41<00:00, 15.21it/s]
100%|█████████████████████████████████████████| 633/633 [00:40<00:00, 15.50it/s]


epoch (15)


100%|█████████████████████████████████████████| 633/633 [00:41<00:00, 15.42it/s]
100%|█████████████████████████████████████████| 633/633 [00:41<00:00, 15.28it/s]
100%|█████████████████████████████████████████| 633/633 [00:40<00:00, 15.51it/s]
100%|█████████████████████████████████████████| 633/633 [00:41<00:00, 15.08it/s]
100%|█████████████████████████████████████████| 633/633 [00:40<00:00, 15.52it/s]


epoch (20)


100%|█████████████████████████████████████████| 633/633 [00:40<00:00, 15.60it/s]
100%|█████████████████████████████████████████| 633/633 [00:41<00:00, 15.36it/s]
100%|█████████████████████████████████████████| 633/633 [00:42<00:00, 15.00it/s]
100%|█████████████████████████████████████████| 633/633 [00:40<00:00, 15.53it/s]
100%|█████████████████████████████████████████| 633/633 [00:41<00:00, 15.42it/s]


epoch (25)


100%|█████████████████████████████████████████| 633/633 [00:41<00:00, 15.12it/s]
100%|█████████████████████████████████████████| 633/633 [00:41<00:00, 15.19it/s]
100%|█████████████████████████████████████████| 633/633 [00:41<00:00, 15.42it/s]
100%|█████████████████████████████████████████| 633/633 [00:41<00:00, 15.41it/s]
100%|█████████████████████████████████████████| 633/633 [00:41<00:00, 15.29it/s]


saving the pytorch model...
finishing training
testing t1w4
accuracy of this model: 86.68148148148148% (81000 test cases)
finishing testing
finishing model generate
------------------------
starting to generate pytorch model t1w8
training t1w8
epoch (0)


100%|█████████████████████████████████████████| 633/633 [00:51<00:00, 12.35it/s]
100%|█████████████████████████████████████████| 633/633 [00:42<00:00, 14.98it/s]
100%|█████████████████████████████████████████| 633/633 [00:41<00:00, 15.43it/s]
100%|█████████████████████████████████████████| 633/633 [00:41<00:00, 15.36it/s]
100%|█████████████████████████████████████████| 633/633 [00:41<00:00, 15.39it/s]


epoch (5)


100%|█████████████████████████████████████████| 633/633 [00:40<00:00, 15.48it/s]
100%|█████████████████████████████████████████| 633/633 [00:40<00:00, 15.50it/s]
100%|█████████████████████████████████████████| 633/633 [00:42<00:00, 14.96it/s]
100%|█████████████████████████████████████████| 633/633 [00:40<00:00, 15.55it/s]
100%|█████████████████████████████████████████| 633/633 [00:40<00:00, 15.52it/s]


epoch (10)


100%|█████████████████████████████████████████| 633/633 [00:40<00:00, 15.64it/s]
100%|█████████████████████████████████████████| 633/633 [00:41<00:00, 15.20it/s]
100%|█████████████████████████████████████████| 633/633 [00:40<00:00, 15.53it/s]
100%|█████████████████████████████████████████| 633/633 [00:40<00:00, 15.52it/s]
100%|█████████████████████████████████████████| 633/633 [00:40<00:00, 15.48it/s]


epoch (15)


100%|█████████████████████████████████████████| 633/633 [00:40<00:00, 15.56it/s]
100%|█████████████████████████████████████████| 633/633 [00:40<00:00, 15.64it/s]
100%|█████████████████████████████████████████| 633/633 [00:42<00:00, 15.05it/s]
100%|█████████████████████████████████████████| 633/633 [00:40<00:00, 15.54it/s]
100%|█████████████████████████████████████████| 633/633 [00:40<00:00, 15.63it/s]


epoch (20)


100%|█████████████████████████████████████████| 633/633 [00:43<00:00, 14.52it/s]
100%|█████████████████████████████████████████| 633/633 [01:00<00:00, 10.39it/s]
100%|█████████████████████████████████████████| 633/633 [00:45<00:00, 13.99it/s]
100%|█████████████████████████████████████████| 633/633 [00:40<00:00, 15.77it/s]
100%|█████████████████████████████████████████| 633/633 [00:39<00:00, 15.86it/s]


epoch (25)


100%|█████████████████████████████████████████| 633/633 [00:40<00:00, 15.77it/s]
100%|█████████████████████████████████████████| 633/633 [00:41<00:00, 15.44it/s]
100%|█████████████████████████████████████████| 633/633 [00:41<00:00, 15.16it/s]
100%|█████████████████████████████████████████| 633/633 [00:40<00:00, 15.69it/s]
100%|█████████████████████████████████████████| 633/633 [00:40<00:00, 15.72it/s]


saving the pytorch model...
finishing training
testing t1w8
accuracy of this model: 98.58148148148148% (81000 test cases)
finishing testing
finishing model generate
------------------------
starting to generate pytorch model t2w2
training t2w2
epoch (0)


100%|█████████████████████████████████████████| 633/633 [00:37<00:00, 16.90it/s]
100%|█████████████████████████████████████████| 633/633 [00:30<00:00, 20.75it/s]
100%|█████████████████████████████████████████| 633/633 [00:32<00:00, 19.40it/s]
100%|█████████████████████████████████████████| 633/633 [00:35<00:00, 17.98it/s]
100%|█████████████████████████████████████████| 633/633 [00:34<00:00, 18.57it/s]


epoch (5)


100%|█████████████████████████████████████████| 633/633 [00:32<00:00, 19.45it/s]
100%|█████████████████████████████████████████| 633/633 [00:30<00:00, 20.45it/s]
100%|█████████████████████████████████████████| 633/633 [00:31<00:00, 20.33it/s]
100%|█████████████████████████████████████████| 633/633 [00:37<00:00, 16.94it/s]
100%|█████████████████████████████████████████| 633/633 [00:34<00:00, 18.17it/s]


epoch (10)


100%|█████████████████████████████████████████| 633/633 [00:33<00:00, 18.66it/s]
100%|█████████████████████████████████████████| 633/633 [00:34<00:00, 18.48it/s]
100%|█████████████████████████████████████████| 633/633 [00:32<00:00, 19.23it/s]
100%|█████████████████████████████████████████| 633/633 [00:30<00:00, 20.43it/s]
100%|█████████████████████████████████████████| 633/633 [00:31<00:00, 20.35it/s]


epoch (15)


100%|█████████████████████████████████████████| 633/633 [00:30<00:00, 20.58it/s]
100%|█████████████████████████████████████████| 633/633 [00:31<00:00, 20.40it/s]
100%|█████████████████████████████████████████| 633/633 [00:31<00:00, 20.39it/s]
100%|█████████████████████████████████████████| 633/633 [00:31<00:00, 20.02it/s]
100%|█████████████████████████████████████████| 633/633 [00:32<00:00, 19.31it/s]


epoch (20)


100%|█████████████████████████████████████████| 633/633 [00:32<00:00, 19.62it/s]
100%|█████████████████████████████████████████| 633/633 [00:31<00:00, 20.21it/s]
100%|█████████████████████████████████████████| 633/633 [00:32<00:00, 19.60it/s]
100%|█████████████████████████████████████████| 633/633 [00:33<00:00, 18.85it/s]
100%|█████████████████████████████████████████| 633/633 [00:32<00:00, 19.35it/s]


epoch (25)


100%|█████████████████████████████████████████| 633/633 [00:34<00:00, 18.42it/s]
100%|█████████████████████████████████████████| 633/633 [00:34<00:00, 18.55it/s]
100%|█████████████████████████████████████████| 633/633 [00:34<00:00, 18.52it/s]
100%|█████████████████████████████████████████| 633/633 [00:31<00:00, 19.98it/s]
100%|█████████████████████████████████████████| 633/633 [00:34<00:00, 18.50it/s]


saving the pytorch model...
finishing training
testing t2w2
accuracy of this model: 89.18518518518519% (81000 test cases)
finishing testing
finishing model generate
------------------------
starting to generate pytorch model t2w4
training t2w4
epoch (0)


100%|█████████████████████████████████████████| 633/633 [00:38<00:00, 16.46it/s]
100%|█████████████████████████████████████████| 633/633 [00:30<00:00, 20.50it/s]
100%|█████████████████████████████████████████| 633/633 [00:30<00:00, 20.59it/s]
100%|█████████████████████████████████████████| 633/633 [00:30<00:00, 20.43it/s]
100%|█████████████████████████████████████████| 633/633 [00:31<00:00, 20.26it/s]


epoch (5)


100%|█████████████████████████████████████████| 633/633 [00:30<00:00, 20.61it/s]
100%|█████████████████████████████████████████| 633/633 [00:30<00:00, 20.63it/s]
100%|█████████████████████████████████████████| 633/633 [00:30<00:00, 20.51it/s]
100%|█████████████████████████████████████████| 633/633 [00:30<00:00, 20.55it/s]
100%|█████████████████████████████████████████| 633/633 [00:31<00:00, 20.04it/s]


epoch (10)


100%|█████████████████████████████████████████| 633/633 [00:35<00:00, 17.70it/s]
100%|█████████████████████████████████████████| 633/633 [00:38<00:00, 16.56it/s]
100%|█████████████████████████████████████████| 633/633 [00:33<00:00, 18.99it/s]
100%|█████████████████████████████████████████| 633/633 [00:34<00:00, 18.24it/s]
100%|█████████████████████████████████████████| 633/633 [00:36<00:00, 17.26it/s]


epoch (15)


100%|█████████████████████████████████████████| 633/633 [00:34<00:00, 18.19it/s]
100%|█████████████████████████████████████████| 633/633 [00:36<00:00, 17.41it/s]
100%|█████████████████████████████████████████| 633/633 [00:38<00:00, 16.42it/s]
100%|█████████████████████████████████████████| 633/633 [00:36<00:00, 17.32it/s]
100%|█████████████████████████████████████████| 633/633 [00:33<00:00, 18.99it/s]


epoch (20)


100%|█████████████████████████████████████████| 633/633 [00:35<00:00, 17.63it/s]
100%|█████████████████████████████████████████| 633/633 [00:31<00:00, 20.24it/s]
100%|█████████████████████████████████████████| 633/633 [00:31<00:00, 20.04it/s]
100%|█████████████████████████████████████████| 633/633 [00:31<00:00, 20.23it/s]
100%|█████████████████████████████████████████| 633/633 [00:34<00:00, 18.15it/s]


epoch (25)


100%|█████████████████████████████████████████| 633/633 [00:38<00:00, 16.32it/s]
100%|█████████████████████████████████████████| 633/633 [00:37<00:00, 16.77it/s]
100%|█████████████████████████████████████████| 633/633 [00:37<00:00, 16.93it/s]
100%|█████████████████████████████████████████| 633/633 [00:52<00:00, 12.15it/s]
100%|█████████████████████████████████████████| 633/633 [00:35<00:00, 17.76it/s]


saving the pytorch model...
finishing training
testing t2w4
accuracy of this model: 96.69876543209877% (81000 test cases)
finishing testing
finishing model generate
------------------------
starting to generate pytorch model t2w8
training t2w8
epoch (0)


100%|█████████████████████████████████████████| 633/633 [00:45<00:00, 13.88it/s]
100%|█████████████████████████████████████████| 633/633 [00:33<00:00, 18.83it/s]
100%|█████████████████████████████████████████| 633/633 [00:36<00:00, 17.12it/s]
100%|█████████████████████████████████████████| 633/633 [00:36<00:00, 17.12it/s]
100%|█████████████████████████████████████████| 633/633 [00:36<00:00, 17.27it/s]


epoch (5)


100%|█████████████████████████████████████████| 633/633 [00:37<00:00, 16.83it/s]
100%|█████████████████████████████████████████| 633/633 [00:37<00:00, 17.01it/s]
100%|█████████████████████████████████████████| 633/633 [00:36<00:00, 17.18it/s]
100%|█████████████████████████████████████████| 633/633 [00:32<00:00, 19.32it/s]
100%|█████████████████████████████████████████| 633/633 [00:31<00:00, 20.24it/s]


epoch (10)


100%|█████████████████████████████████████████| 633/633 [00:30<00:00, 20.59it/s]
100%|█████████████████████████████████████████| 633/633 [00:32<00:00, 19.71it/s]
100%|█████████████████████████████████████████| 633/633 [00:38<00:00, 16.53it/s]
100%|█████████████████████████████████████████| 633/633 [00:36<00:00, 17.55it/s]
100%|█████████████████████████████████████████| 633/633 [00:34<00:00, 18.18it/s]


epoch (15)


100%|█████████████████████████████████████████| 633/633 [00:31<00:00, 20.04it/s]
100%|█████████████████████████████████████████| 633/633 [00:31<00:00, 20.36it/s]
100%|█████████████████████████████████████████| 633/633 [00:31<00:00, 20.28it/s]
100%|█████████████████████████████████████████| 633/633 [00:33<00:00, 19.13it/s]
100%|█████████████████████████████████████████| 633/633 [00:36<00:00, 17.47it/s]


epoch (20)


100%|█████████████████████████████████████████| 633/633 [00:39<00:00, 16.01it/s]
100%|█████████████████████████████████████████| 633/633 [00:35<00:00, 17.66it/s]
100%|█████████████████████████████████████████| 633/633 [00:32<00:00, 19.51it/s]
100%|█████████████████████████████████████████| 633/633 [00:31<00:00, 20.13it/s]
100%|█████████████████████████████████████████| 633/633 [00:31<00:00, 20.08it/s]


epoch (25)


100%|█████████████████████████████████████████| 633/633 [00:35<00:00, 18.07it/s]
100%|█████████████████████████████████████████| 633/633 [00:31<00:00, 19.91it/s]
100%|█████████████████████████████████████████| 633/633 [00:31<00:00, 20.08it/s]
100%|█████████████████████████████████████████| 633/633 [00:34<00:00, 18.22it/s]
100%|█████████████████████████████████████████| 633/633 [00:36<00:00, 17.32it/s]


saving the pytorch model...
finishing training
testing t2w8
accuracy of this model: 98.79506172839505% (81000 test cases)
finishing testing
finishing model generate
------------------------


Hardware generator

In [12]:
from finn.util.basic import make_build_dir
from finn.util.visualization import showInNetron
import os
    
build_dir = os.environ["FINN_BUILD_DIR"]

In [13]:
from qonnx.core.datatype import DataType
import torch
import onnx
from finn.util.test import get_test_model_trained
from brevitas.export import export_qonnx
from qonnx.util.cleanup import cleanup as qonnx_cleanup
from qonnx.core.modelwrapper import ModelWrapper
from finn.transformation.qonnx.convert_qonnx_to_finn import ConvertQONNXtoFINN
from qonnx.transformation.infer_shapes import InferShapes
from qonnx.transformation.fold_constants import FoldConstants
from qonnx.transformation.general import GiveReadableTensorNames, GiveUniqueNodeNames, RemoveStaticGraphInputs
from qonnx.transformation.insert_topk import InsertTopK
from qonnx.transformation.infer_datatypes import InferDataTypes
from finn.transformation.streamline import Streamline
from qonnx.transformation.lower_convs_to_matmul import LowerConvsToMatMul
from qonnx.transformation.bipolar_to_xnor import ConvertBipolarMatMulToXnorPopcount
import finn.transformation.streamline.absorb as absorb
from finn.transformation.streamline.reorder import MakeMaxPoolNHWC, MoveScalarLinearPastInvariants
from qonnx.transformation.infer_data_layouts import InferDataLayouts
from qonnx.transformation.general import RemoveUnusedTensors
import finn.transformation.fpgadataflow.convert_to_hls_layers as to_hls
from finn.transformation.fpgadataflow.create_dataflow_partition import (
    CreateDataflowPartition,
)
from finn.transformation.move_reshape import RemoveCNVtoFCFlatten
from qonnx.custom_op.registry import getCustomOp
from qonnx.transformation.infer_data_layouts import InferDataLayouts
import finn.builder.build_dataflow as build
import finn.builder.build_dataflow_config as build_cfg
import os
import shutil

In [14]:
def get_onnx_output_filename (topology,quant):
    return f"./hardware_onnxs/sat6-cnn-t{topology}w{quant}.onnx"

In [19]:
def make_onnx(cnv,quant,topology):
    cnv.load_state_dict(torch.load(get_model_output_filename(topology=topology,quant=quant)))
    onnx_output_filename = get_onnx_output_filename(topology=topology,quant=quant)
    
    export_onnx_path = build_dir + f"/end2end_cnv_t{topology}w{quant}_export.onnx"
    #tidy up
    export_qonnx(cnv, torch.randn(1, 4, 32, 32), export_onnx_path)    
    qonnx_cleanup(export_onnx_path, out_file=export_onnx_path)
    model = ModelWrapper(export_onnx_path)
    model = model.transform(ConvertQONNXtoFINN())
    model = model.transform(InferShapes())
    model = model.transform(FoldConstants())
    model = model.transform(GiveUniqueNodeNames())
    model = model.transform(GiveReadableTensorNames())
    model = model.transform(RemoveStaticGraphInputs())

    #preprocessing
    global_inp_name = model.graph.input[0].name
    model.set_tensor_datatype(global_inp_name, DataType["UINT8"])

    # postprocessing: insert Top-1 node at the end
    model = model.transform(InsertTopK(k=1))
    # tidy-up again
    model = model.transform(InferShapes())
    model = model.transform(FoldConstants())
    model = model.transform(GiveUniqueNodeNames())
    model = model.transform(GiveReadableTensorNames())
    model = model.transform(InferDataTypes())
    model = model.transform(RemoveStaticGraphInputs())

    model = model.transform(MoveScalarLinearPastInvariants())
    model = model.transform(Streamline())
    model = model.transform(LowerConvsToMatMul())
    model = model.transform(MakeMaxPoolNHWC())
    model = model.transform(absorb.AbsorbTransposeIntoMultiThreshold())
    model = model.transform(ConvertBipolarMatMulToXnorPopcount())
    model = model.transform(Streamline())
    # absorb final add-mul nodes into TopK
    model = model.transform(absorb.AbsorbScalarMulAddIntoTopK())
    model = model.transform(InferDataLayouts())
    model = model.transform(RemoveUnusedTensors())

    # choose the memory mode for the MVTU units, decoupled or const
    mem_mode = "const"    
    model = model.transform(to_hls.InferBinaryMatrixVectorActivation(mem_mode))
    model = model.transform(to_hls.InferQuantizedMatrixVectorActivation(mem_mode))
    # TopK to LabelSelect
    model = model.transform(to_hls.InferLabelSelectLayer())
    # input quantization (if any) to standalone thresholding
    model = model.transform(to_hls.InferThresholdingLayer())
    model = model.transform(to_hls.InferConvInpGen())
    model = model.transform(to_hls.InferStreamingMaxPool())
    # get rid of Reshape(-1, 1) operation between hlslib nodes
    model = model.transform(RemoveCNVtoFCFlatten())
    # get rid of Tranpose -> Tranpose identity seq
    model = model.transform(absorb.AbsorbConsecutiveTransposes())
    # infer tensor data layouts
    model = model.transform(InferDataLayouts())
    parent_model = model.transform(CreateDataflowPartition())
    parent_model.save(build_dir + f"/end2end_cnv_t{topology}w{quant}_dataflow_parent.onnx")
    sdp_node = parent_model.get_nodes_by_op_type("StreamingDataflowPartition")[0]
    sdp_node = getCustomOp(sdp_node)
    dataflow_model_filename = sdp_node.get_nodeattr("model")
    # save the dataflow partition with a different name for easier access
    dataflow_model = ModelWrapper(dataflow_model_filename)
    dataflow_model.save(onnx_output_filename)
    return onnx_output_filename

In [23]:
def generate_hardware(topology,quant,target_fps):
    model_file = get_onnx_output_filename(topology,quant)
    
    estimates_output_dir = f"./builds/build_t{topology}w{quant}"
    
    #Delete previous run results if exist
    if os.path.exists(estimates_output_dir):
        shutil.rmtree(estimates_output_dir)
        print("Previous run results deleted!")
    
    cfg_estimates = build.DataflowBuildConfig(
        output_dir          = estimates_output_dir,
        mvau_wwidth_max     = 1000000, #tinha usado 80
        target_fps          = target_fps, #tinha usado 100
        synth_clk_period_ns = 10.0,
        rtlsim_batch_size   = npy_data_size,
        folding_config_file = "/home/artti/Desktop/finn/notebooks/sat6_cnn/folding.json",
        verify_input_npy    = "input.npy",
        stitched_ip_gen_dcp = True,
        verify_expected_output_npy = "expected_output.npy",
        # verify_save_rtlsim_waveforms = True,
        board = "Pynq-Z1",
        shell_flow_type = build_cfg.ShellFlowType.VIVADO_ZYNQ,
        default_mem_mode = build_cfg.ComputeEngineMemMode.CONST,
        generate_outputs=[
            build_cfg.DataflowOutputType.ESTIMATE_REPORTS,
            build_cfg.DataflowOutputType.STITCHED_IP,
            build_cfg.DataflowOutputType.RTLSIM_PERFORMANCE,
            build_cfg.DataflowOutputType.OOC_SYNTH,
            build_cfg.DataflowOutputType.BITFILE,
            build_cfg.DataflowOutputType.PYNQ_DRIVER,
            build_cfg.DataflowOutputType.DEPLOYMENT_PACKAGE,
        ]
        # ,
        # verify_steps=[
        #     build_cfg.VerificationStepType.STITCHED_IP_RTLSIM,
        # ]
    )    
    
    build.build_dataflow_cfg(model_file, cfg_estimates)

In [24]:
def build_hardware(topology,topology_class,quant):
    cnv = topology_class(bit_quantization=quant)
    onnx_filename = make_onnx(cnv=cnv,quant=quant,topology=topology)
    generate_hardware(target_fps=1000,quant=quant,topology=topology)    

In [22]:
%%time
for quant in t1_quantizations:
    build_hardware(1,t1_quantizedCNN,quant)

for quant in t2_quantizations:
    build_hardware(2,t2_quantizedCNN,quant)



Previous run results deleted!
Building dataflow accelerator from ./hardware_onnxs/sat6-cnn-t1w2.onnx
Intermediate outputs will be generated in /tmp/finn_dev_artti
Final outputs will be generated in ./builds/build_t1w2
Build log is at ./builds/build_t1w2/build_dataflow.log
Running step: step_qonnx_to_finn [1/18]
Running step: step_tidy_up [2/18]
Running step: step_streamline [3/18]
Running step: step_convert_to_hls [4/18]
Running step: step_create_dataflow_partition [5/18]
Running step: step_target_fps_parallelization [6/18]
Running step: step_apply_folding_config [7/18]
Running step: step_minimize_bit_width [8/18]
Running step: step_generate_estimate_reports [9/18]
Running step: step_hls_codegen [10/18]
Running step: step_hls_ipgen [11/18]
Running step: step_set_fifo_depths [12/18]
Running step: step_create_stitched_ip [13/18]


Traceback (most recent call last):
  File "/home/artti/Desktop/finn/src/finn/builder/build_dataflow.py", line 158, in build_dataflow_cfg
    model = transform_step(model, cfg)
  File "/home/artti/Desktop/finn/src/finn/builder/build_dataflow_steps.py", line 643, in step_measure_rtlsim_performance
    DataflowOutputType.STITCHED_IP in cfg.generate_outputs
AssertionError: rtlsim_perf needs stitched IP


Running step: step_measure_rtlsim_performance [14/18]
> [0;32m/home/artti/Desktop/finn/src/finn/builder/build_dataflow_steps.py[0m(643)[0;36mstep_measure_rtlsim_performance[0;34m()[0m
[0;32m    641 [0;31m    [0;32mif[0m [0mDataflowOutputType[0m[0;34m.[0m[0mRTLSIM_PERFORMANCE[0m [0;32min[0m [0mcfg[0m[0;34m.[0m[0mgenerate_outputs[0m[0;34m:[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m    642 [0;31m        assert (
[0m[0;32m--> 643 [0;31m            [0mDataflowOutputType[0m[0;34m.[0m[0mSTITCHED_IP[0m [0;32min[0m [0mcfg[0m[0;34m.[0m[0mgenerate_outputs[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m    644 [0;31m        ), "rtlsim_perf needs stitched IP"
[0m[0;32m    645 [0;31m        [0mreport_dir[0m [0;34m=[0m [0mcfg[0m[0;34m.[0m[0moutput_dir[0m [0;34m+[0m [0;34m"/report"[0m[0;34m[0m[0;34m[0m[0m
[0m
--KeyboardInterrupt--

KeyboardInterrupt: Interrupted by user
Build failed



KeyboardInterrupt

