<h1>Table of Contents<span class="tocSkip"></span></h1>
<div class="toc"><ul class="toc-item"><li><span><a href="#Load-functions" data-toc-modified-id="Load-functions-1"><span class="toc-item-num">1&nbsp;&nbsp;</span>Load functions</a></span></li><li><span><a href="#Training-a-first-simple-NN" data-toc-modified-id="Training-a-first-simple-NN-2"><span class="toc-item-num">2&nbsp;&nbsp;</span>Training a first simple NN</a></span></li><li><span><a href="#Looping-over-different-configuration-for-1D" data-toc-modified-id="Looping-over-different-configuration-for-1D-3"><span class="toc-item-num">3&nbsp;&nbsp;</span>Looping over different configuration for 1D</a></span></li><li><span><a href="#Looping-over-different-configuration-for-2D" data-toc-modified-id="Looping-over-different-configuration-for-2D-4"><span class="toc-item-num">4&nbsp;&nbsp;</span>Looping over different configuration for 2D</a></span></li><li><span><a href="#Looping-over-different-configurations-for-2D---ACM" data-toc-modified-id="Looping-over-different-configurations-for-2D---ACM-5"><span class="toc-item-num">5&nbsp;&nbsp;</span>Looping over different configurations for 2D - ACM</a></span></li><li><span><a href="#ACTIVATION-MAP-Manually" data-toc-modified-id="ACTIVATION-MAP-Manually-6"><span class="toc-item-num">6&nbsp;&nbsp;</span>ACTIVATION MAP Manually</a></span><ul class="toc-item"><li><span><a href="#Tests-et-débug" data-toc-modified-id="Tests-et-débug-6.1"><span class="toc-item-num">6.1&nbsp;&nbsp;</span>Tests et débug</a></span></li><li><span><a href="#From-ACM-python-file-adapted" data-toc-modified-id="From-ACM-python-file-adapted-6.2"><span class="toc-item-num">6.2&nbsp;&nbsp;</span>From ACM python file adapted</a></span></li></ul></li><li><span><a href="#Filter-visualisation-map" data-toc-modified-id="Filter-visualisation-map-7"><span class="toc-item-num">7&nbsp;&nbsp;</span>Filter visualisation map</a></span></li><li><span><a href="#Get-model-structure-image" data-toc-modified-id="Get-model-structure-image-8"><span class="toc-item-num">8&nbsp;&nbsp;</span>Get model structure image</a></span></li></ul></div>

# CNN Notebook

## Load functions

from models.cnn_model import classifier_GD_1, classifier_GD_2
from models.cnn_preprocessing import main_preprocessing_cnn
from models.cnn_train_test import train, test
from models.cnn_model_statistics import main_cnn_stats_model
from models.loading_model_data import main_loading_model_data

import torch.nn as nn 
import torch.nn.functional as F
import torch
import numpy as np

## Training a first simple NN

train_loader, test_loader, dict_class_to_id, dict_id_to_class = main_preprocessing_cnn(selected_primer='V4', taxonomy_level=1)
X_train, X_test, y_train, y_test = main_loading_model_data(sequence_origin='DairyDB', primers_origin='DairyDB', selected_primer='V4', taxonomy_level=1)
n_out_features = len(dict_class_to_id)

out_channel_1 = 30  # 10
out_channel_2 = 30  # 20
kernel_size_1_W = 7  # 7
kernel_size_2_W = 7  # 7
ratio_fc_1 = 1 / 2  # 1 / 2

n_epochs = 50

conv_class = classifier_GD_2(n_out_features)

loss_fn = nn.CrossEntropyLoss()
learning_rate = 1e-3
optimizer_cl = torch.optim.Adam(conv_class.parameters(), lr=learning_rate)

loss_train, acc_train, loss_test, acc_test = train(conv_class, train_loader, test_loader, loss_fn, optimizer_cl, n_epochs=n_epochs)
_, _, y_test_torch, y_pred_torch = test(conv_class, test_loader, loss_fn)

main_cnn_stats_model(y_train, y_test_torch, y_pred_torch, dict_id_to_class, loss_train, loss_test, acc_train, acc_test,
                     make_plot=True,
                     model_name='CNN - Aoki - 2D',
                     model_class=conv_class,
                     model_preprocessing='OHE of letters in 4 dimensions (k=1)',
                     sequence_origin='DairyDB',
                     primers_origin='DairyDB',
                     taxonomy_level=1,
                     selected_primer='V4')

## Looping over different configuration for 1D

from models.cnn_preprocessing import main_preprocessing_cnn
from models.cnn_train_test import train, test
from models.cnn_model_statistics import main_cnn_stats_model
from models.loading_model_data import main_loading_model_data

import torch.nn as nn 
import torch.nn.functional as F
import torch
import numpy as np
from utils.utils import save_update_cnn

class classifier_GD_1(nn.Module):

    def __init__(self, n_out_features: int, k_mer: int = 1, max_size: int = 300):
        self.k_mer = k_mer
        self.max_size = max_size
        super(classifier_GD_1, self).__init__()
        # PARAMETERS
        self.out_channel_1 = out_channel_1
        self.out_channel_2 = out_channel_2
        self.kernel_size_1 = kernel_size_1
        self.max_pool_stride_1 = max_pool_stride_1
        self.max_pool_stride_2 = max_pool_stride_2
        self.ratio_fc_1 = ratio_fc_1
        # COPIED PARAMETERS
        self.kernel_size_max_pool_1 = self.kernel_size_1
        self.kernel_size_2 = self.kernel_size_1
        # SIZE COMPUTATION
        self.L_out_conv_1 = max_size - self.kernel_size_1 + 1
        self.L_out_max_pool_1 = int((self.L_out_conv_1 - self.kernel_size_1) // self.max_pool_stride_1) + 1
        self.L_out_conv_2 = self.L_out_max_pool_1 - self.kernel_size_2 + 1
        self.L_out_max_pool_2 = int((self.L_out_conv_2 - self.kernel_size_2) // self.max_pool_stride_2) + 1
        self.L_out_fc_1 = int(self.out_channel_2 * self.L_out_max_pool_2 * self.ratio_fc_1)

        # Layers
        self.conv1 = nn.Conv1d(in_channels=4 ** k_mer, out_channels=self.out_channel_1,
                               kernel_size=self.kernel_size_1, padding=0)
        self.bn1 = nn.BatchNorm1d(self.out_channel_1)
        self.ReLU1 = nn.ReLU()
        # Layers
        self.conv2 = nn.Conv1d(in_channels=self.out_channel_1, out_channels=self.out_channel_2,
                               kernel_size=self.kernel_size_2, padding=0)
        self.bn2 = nn.BatchNorm1d(self.out_channel_2)
        self.ReLU2 = nn.ReLU()

        # Hidden part
        self.fc1 = nn.Linear(in_features=self.out_channel_2 * self.L_out_max_pool_2,
                             out_features=self.L_out_fc_1)
        self.ReLU3 = nn.ReLU()
        self.fc2 = nn.Linear(in_features=self.L_out_fc_1,
                             out_features=n_out_features)

    def forward(self, x):
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.ReLU1(x)
        x = F.max_pool1d(x, kernel_size=self.kernel_size_1, stride=self.max_pool_stride_1)
        x = self.conv2(x)
        x = self.bn2(x)
        x = self.ReLU2(x)
        x = F.max_pool1d(x, kernel_size=self.kernel_size_2, stride=self.max_pool_stride_2)
        x = x.view(-1, self.out_channel_2 * self.L_out_max_pool_2)
        x = self.fc1(x)
        x = self.ReLU3(x)
        x = F.dropout(x, p=0.5)
        x = self.fc2(x)
        return x  # With CrossEntropyLoss directly

colnames = ['sequence_origin','primers_origin','selected_primer','taxonomy_level','dimension','k_mer','vector_max_size','out_channel_1','out_channel_2','kernel_size_1','max_pool_stride_1', 'max_pool_stride_2','ratio_fc_1','n_epochs','learning_rate','accuracy']

parameter_config_list = [
    {'sequence_origin':'DairyDB','primers_origin':'DairyDB','selected_primer':'V4','taxonomy_level':1,'dimension':1,'k_mer':1,'vector_max_size':50,'out_channel_1':32,'out_channel_2':64,'kernel_size_1':6,'max_pool_stride_1':2,'max_pool_stride_2':2,'ratio_fc_1':2/3,'n_epochs':50,'learning_rate':1e-3},
    {'sequence_origin':'DairyDB','primers_origin':'DairyDB','selected_primer':'V4','taxonomy_level':1,'dimension':1,'k_mer':1,'vector_max_size':100,'out_channel_1':32,'out_channel_2':64,'kernel_size_1':6,'max_pool_stride_1':2,'max_pool_stride_2':2,'ratio_fc_1':2/3,'n_epochs':50,'learning_rate':1e-3},
    {'sequence_origin':'DairyDB','primers_origin':'DairyDB','selected_primer':'V4','taxonomy_level':1,'dimension':1,'k_mer':1,'vector_max_size':150,'out_channel_1':32,'out_channel_2':64,'kernel_size_1':6,'max_pool_stride_1':2,'max_pool_stride_2':2,'ratio_fc_1':2/3,'n_epochs':50,'learning_rate':1e-3},
    {'sequence_origin':'DairyDB','primers_origin':'DairyDB','selected_primer':'V4','taxonomy_level':1,'dimension':1,'k_mer':1,'vector_max_size':200,'out_channel_1':32,'out_channel_2':64,'kernel_size_1':6,'max_pool_stride_1':2,'max_pool_stride_2':2,'ratio_fc_1':2/3,'n_epochs':50,'learning_rate':1e-3},
    {'sequence_origin':'DairyDB','primers_origin':'DairyDB','selected_primer':'V4','taxonomy_level':1,'dimension':1,'k_mer':1,'vector_max_size':250,'out_channel_1':32,'out_channel_2':64,'kernel_size_1':6,'max_pool_stride_1':2,'max_pool_stride_2':2,'ratio_fc_1':2/3,'n_epochs':50,'learning_rate':1e-3},
    {'sequence_origin':'DairyDB','primers_origin':'DairyDB','selected_primer':'V4','taxonomy_level':1,'dimension':1,'k_mer':1,'vector_max_size':300,'out_channel_1':32,'out_channel_2':64,'kernel_size_1':6,'max_pool_stride_1':2,'max_pool_stride_2':2,'ratio_fc_1':2/3,'n_epochs':50,'learning_rate':1e-3}
]

for parameter_config in parameter_config_list:
    dim = parameter_config['dimension']
    k_mer = parameter_config['k_mer']
    max_size = parameter_config['vector_max_size']
    file_path = 'results/models/CNN {}D - CNN({}) - accuracies.csv'.format(
        dim, k_mer
    )
    print('Working with the following parameter configuration: \n {}'.format(parameter_config))
    train_loader, test_loader, dict_class_to_id, dict_id_to_class = main_preprocessing_cnn(
        sequence_origin=parameter_config['sequence_origin'], 
        primers_origin=parameter_config['primers_origin'],
        selected_primer=parameter_config['selected_primer'], 
        taxonomy_level=parameter_config['taxonomy_level'],
        max_size=max_size,
        k_mer=k_mer
    )
    X_train, X_test, y_train, y_test = main_loading_model_data(sequence_origin=parameter_config['sequence_origin'], 
                                                               primers_origin=parameter_config['primers_origin'], 
                                                               selected_primer=parameter_config['selected_primer'], 
                                                               taxonomy_level=parameter_config['taxonomy_level'])
    n_out_features = len(dict_class_to_id)

    out_channel_1 = parameter_config['out_channel_1']
    out_channel_2 = parameter_config['out_channel_2']
    kernel_size_1 = parameter_config['kernel_size_1']
    max_pool_stride_1 = parameter_config['max_pool_stride_1']
    max_pool_stride_2 = parameter_config['max_pool_stride_2']
    ratio_fc_1 = parameter_config['ratio_fc_1']

    n_epochs = parameter_config['n_epochs']
    
    

    conv_class = classifier_GD_1(n_out_features, k_mer, max_size)

    loss_fn = nn.CrossEntropyLoss()
    learning_rate = parameter_config['learning_rate']
    optimizer_cl = torch.optim.Adam(conv_class.parameters(), lr=learning_rate)

    _, _, _, _ = test(conv_class, test_loader, loss_fn)
    loss_train, acc_train, loss_test, acc_test = train(conv_class, train_loader, test_loader, loss_fn, optimizer_cl, n_epochs=n_epochs)
    final_test_loss, accuracy, y_test_torch, y_pred_torch = test(conv_class, test_loader, loss_fn)
    
    parameter_config['accuracy'] = accuracy
    
    main_cnn_stats_model(y_train, y_test_torch, y_pred_torch, dict_id_to_class, loss_train, loss_test, acc_train, acc_test,
                     make_plot=True,
                     model_name='CNN - Aoki - {}D'.format(dim),
                     model_class=conv_class,
                     model_preprocessing='OHE of letters in {} dimensions (k={}) - max size = {}'.format(4**k_mer, k_mer, max_size),
                     sequence_origin=parameter_config['sequence_origin'],
                     primers_origin=parameter_config['primers_origin'],
                     taxonomy_level=parameter_config['taxonomy_level'],
                     selected_primer=parameter_config['selected_primer'])
    
    save_update_cnn(file_path, parameter_config.keys(), parameter_config.values())

## Looping over different configuration for 2D

from models.cnn_preprocessing import main_preprocessing_cnn
from models.cnn_train_test import train, test
from models.cnn_model_statistics import main_cnn_stats_model
from models.loading_model_data import main_loading_model_data

import torch.nn as nn 
import torch.nn.functional as F
import torch
import numpy as np
from utils.utils import save_update_cnn
import time

class classifier_GD_2(nn.Module):

    def __init__(self, n_out_features: int, k_mer: int = 1, max_size: int = 300):
        super(classifier_GD_2, self).__init__()
        self.k_mer = k_mer
        self.max_size = max_size
        # PARAMETERS
        self.out_channel_1 = out_channel_1
        self.out_channel_2 = out_channel_2
        self.kernel_size_1_W = kernel_size_1_W
        self.kernel_size_2_W = kernel_size_2_W
        self.ratio_fc_1 = ratio_fc_1
        # FIXED PARAMETERS
        self.kernel_size_1_H = 4 ** k_mer
        self.padding_conv_1_H = 0
        self.padding_conv_1_W = 0
        self.kernel_size_max_pool_1_H = 1
        self.max_pool_stride_1_H = 1
        self.max_pool_stride_1_W = 8
        self.kernel_size_2_H = 1
        self.padding_conv_2_H = 0
        self.padding_conv_2_W = 0
        self.kernel_size_max_pool_2_H = 1
        self.max_pool_stride_2_H = 1
        self.max_pool_stride_2_W = 8
        # COPIED PARAMETERS
        self.kernel_size_max_pool_1_W = self.kernel_size_1_W  # 7
        self.kernel_size_max_pool_2_W = self.kernel_size_2_W  # 7
        # SIZE COMPUTATION
        self.L_out_conv_1_H = 4 ** k_mer - self.kernel_size_1_H + 2 * self.padding_conv_1_H + 1  # 1
        self.L_out_conv_1_W = max_size - self.kernel_size_1_W + 2 * self.padding_conv_1_W + 1  # 294
        self.L_out_max_pool_1_H = int((self.L_out_conv_1_H - self.kernel_size_max_pool_1_H) // self.max_pool_stride_1_H) + 1  # 1
        self.L_out_max_pool_1_W = int((self.L_out_conv_1_W - self.kernel_size_max_pool_1_W) // self.max_pool_stride_1_W) + 1  # 36
        self.L_out_conv_2_H = self.L_out_max_pool_1_H - self.kernel_size_2_H + 2 * self.padding_conv_2_H + 1  # 1
        self.L_out_conv_2_W = self.L_out_max_pool_1_W - self.kernel_size_2_W + 2 * self.padding_conv_2_W + 1  # 30
        self.L_out_max_pool_2_H = int((self.L_out_conv_2_H - self.kernel_size_max_pool_2_H) // self.max_pool_stride_2_H) + 1  # 1
        self.L_out_max_pool_2_W = int((self.L_out_conv_2_W - self.kernel_size_max_pool_2_W) // self.max_pool_stride_2_W) + 1  # 4
        self.L_in_fc_1 = int(self.out_channel_2 * self.L_out_max_pool_2_H * self.L_out_max_pool_2_W)  # 80
        self.L_out_fc_1 = int(self.out_channel_2 * self.L_out_max_pool_2_H * self.L_out_max_pool_2_W * self.ratio_fc_1)  # 40
        # Layers
        self.conv1 = nn.Conv2d(in_channels=1, out_channels=self.out_channel_1,
                               kernel_size=(self.kernel_size_1_H, self.kernel_size_1_W),
                               padding=(self.padding_conv_1_H, self.padding_conv_1_W))
        self.bn1 = nn.BatchNorm2d(self.out_channel_1)
        self.ReLU1 = nn.ReLU()
        # Layers
        self.conv2 = nn.Conv2d(in_channels=self.out_channel_1, out_channels=self.out_channel_2,
                               kernel_size=(self.kernel_size_2_H, self.kernel_size_2_W),
                               padding=(self.padding_conv_2_H, self.padding_conv_2_W))
        self.bn2 = nn.BatchNorm2d(self.out_channel_2)
        self.ReLU2 = nn.ReLU()
        # Hidden part
        self.fc1 = nn.Linear(in_features=self.L_in_fc_1,
                             out_features=self.L_out_fc_1)
        self.ReLU3 = nn.ReLU()
        self.fc2 = nn.Linear(in_features=self.L_out_fc_1,
                             out_features=n_out_features)

    def forward(self, x):
        x = x.view(-1, 1, 4 ** self.k_mer, self.max_size)
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.ReLU1(x)
        x = F.max_pool2d(x,
                         kernel_size=(self.kernel_size_max_pool_1_H, self.kernel_size_max_pool_1_W),
                         stride=(self.max_pool_stride_1_H, self.max_pool_stride_1_W))
        x = self.conv2(x)
        x = self.bn2(x)
        x = self.ReLU2(x)
        x = F.max_pool2d(x,
                         kernel_size=(self.kernel_size_max_pool_2_H, self.kernel_size_max_pool_2_W),
                         stride=(self.max_pool_stride_2_H, self.max_pool_stride_2_W))
        x = x.view(-1, self.L_in_fc_1)
        x = self.fc1(x)
        x = self.ReLU3(x)
        x = F.dropout(x, p=0.5)
        x = self.fc2(x)
        return x  # With CrossEntropyLoss directly


colnames = ['sequence_origin','primers_origin','selected_primer','taxonomy_level','dimension','k_mer','vector_max_size','out_channel_1','out_channel_2','kernel_size_1_W','kernel_size_2_W','max_pool_stride_1_W','max_pool_stride_2_W','ratio_fc_1','n_epochs','learning_rate','accuracy', 'training_time']

parameter_config_list = [
    {'sequence_origin':'DairyDB', 'primers_origin':'DairyDB', 'selected_primer':'V4', 'taxonomy_level':1, 'dimension':2, 'k_mer':1, 'vector_max_size':300, 'out_channel_1':32, 'out_channel_2':64, 'kernel_size_1_W':8, 'kernel_size_2_W':8, 'max_pool_stride_1_W':8, 'max_pool_stride_2_W':8, 'ratio_fc_1':1/2, 'n_epochs':50, 'learning_rate':1e-3}
]

for parameter_config in parameter_config_list:
    dim = parameter_config['dimension']
    k_mer = parameter_config['k_mer']
    max_size = parameter_config['vector_max_size']
    file_path = 'results/models/CNN {}D - CNN({}) - accuracies.csv'.format(
        dim, k_mer
    )
    print('Working with the following parameter configuration: \n {}'.format(parameter_config))
    train_loader, test_loader, dict_class_to_id, dict_id_to_class = main_preprocessing_cnn(
        sequence_origin=parameter_config['sequence_origin'], 
        primers_origin=parameter_config['primers_origin'],
        selected_primer=parameter_config['selected_primer'], 
        taxonomy_level=parameter_config['taxonomy_level'],
        max_size=max_size,
        k_mer=k_mer
    )
    X_train, X_test, y_train, y_test = main_loading_model_data(sequence_origin=parameter_config['sequence_origin'], 
                                                               primers_origin=parameter_config['primers_origin'], 
                                                               selected_primer=parameter_config['selected_primer'], 
                                                               taxonomy_level=parameter_config['taxonomy_level'])
    n_out_features = len(dict_class_to_id)

    out_channel_1 = parameter_config['out_channel_1']
    out_channel_2 = parameter_config['out_channel_2']
    kernel_size_1_W = parameter_config['kernel_size_1_W']
    kernel_size_2_W = parameter_config['kernel_size_2_W']
    ratio_fc_1 = parameter_config['ratio_fc_1']

    n_epochs = parameter_config['n_epochs']
    
    

    conv_class = classifier_GD_2(n_out_features, k_mer, max_size)

    loss_fn = nn.CrossEntropyLoss()
    learning_rate = parameter_config['learning_rate']
    optimizer_cl = torch.optim.Adam(conv_class.parameters(), lr=learning_rate)

    _, _, _, _ = test(conv_class, test_loader, loss_fn)
    begin_time = time.time()
    loss_train, acc_train, loss_test, acc_test = train(conv_class, train_loader, test_loader, loss_fn, optimizer_cl, n_epochs=n_epochs)
    end_time = time.time()
    final_test_loss, accuracy, y_test_torch, y_pred_torch = test(conv_class, test_loader, loss_fn)
    
    parameter_config['accuracy'] = accuracy
    parameter_config['training_time'] = end_time - begin_time
    
    main_cnn_stats_model(y_train, y_test_torch, y_pred_torch, dict_id_to_class, loss_train, loss_test, acc_train, acc_test,
                     make_plot=True,
                     model_name='CNN - Aoki - {}D'.format(dim),
                     model_class=conv_class,
                     model_preprocessing='OHE of letters in {} dimensions (k={}) - max size = {}'.format(4**k_mer, k_mer, max_size),
                     sequence_origin=parameter_config['sequence_origin'],
                     primers_origin=parameter_config['primers_origin'],
                     taxonomy_level=parameter_config['taxonomy_level'],
                     selected_primer=parameter_config['selected_primer'])
    
    save_update_cnn(file_path, parameter_config.keys(), parameter_config.values())

## Looping over different configurations for 2D - ACM

In [1]:
from models.cnn_model import classifier_GD_2_ACM
from models.cnn_preprocessing import main_preprocessing_cnn
from models.cnn_train_test import train, test
from models.cnn_model_statistics import main_cnn_stats_model
from models.loading_model_data import main_loading_model_data

import torch.nn as nn 
import torch.nn.functional as F
import torch
import numpy as np
from utils.utils import save_update_cnn, slash, taxonomy_levels, folder_paths
import time

  import pandas.util.testing as tm


In [2]:
colnames = ['sequence_origin','primers_origin','selected_primer','taxonomy_level','dimension','k_mer','vector_max_size','out_channel_1','out_channel_2','kernel_size_1_W','kernel_size_2_W','max_pool_stride_1_W','max_pool_stride_2_W','n_epochs','learning_rate','accuracy','training_time']

In [17]:
parameter_config_list = [
    {'sequence_origin':'DairyDB', 'primers_origin':'DairyDB', 'selected_primer':'V4', 'taxonomy_level':5, 'dimension':2, 'k_mer':1, 'vector_max_size':270, 'out_channel_1':300, 'out_channel_2':300, 'kernel_size_1_W':5, 'kernel_size_2_W':15, 'max_pool_stride_1_W':5, 'max_pool_stride_2_W':15, 'n_epochs':50, 'learning_rate':1e-3},
    {'sequence_origin':'DairyDB', 'primers_origin':'DairyDB', 'selected_primer':'V4', 'taxonomy_level':5, 'dimension':2, 'k_mer':1, 'vector_max_size':270, 'out_channel_1':400, 'out_channel_2':400, 'kernel_size_1_W':5, 'kernel_size_2_W':15, 'max_pool_stride_1_W':5, 'max_pool_stride_2_W':15, 'n_epochs':50, 'learning_rate':1e-3}
]

In [18]:
for parameter_config in parameter_config_list:
    dim = parameter_config['dimension']
    k_mer = parameter_config['k_mer']
    max_size = parameter_config['vector_max_size']
    file_path = 'results{}models{}CNN {}D - V_ACM - CNN({}) - accuracies.csv'.format(slash, slash,
        dim, k_mer
    )
    print('Working with the following parameter configuration: \n {}'.format(parameter_config))
    train_loader, test_loader, dict_class_to_id, dict_id_to_class = main_preprocessing_cnn(
        sequence_origin=parameter_config['sequence_origin'], 
        primers_origin=parameter_config['primers_origin'],
        selected_primer=parameter_config['selected_primer'], 
        taxonomy_level=parameter_config['taxonomy_level'],
        max_size=max_size,
        k_mer=k_mer
    )
    X_train, X_test, y_train, y_test = main_loading_model_data(sequence_origin=parameter_config['sequence_origin'], 
                                                               primers_origin=parameter_config['primers_origin'], 
                                                               selected_primer=parameter_config['selected_primer'], 
                                                               taxonomy_level=parameter_config['taxonomy_level'])
    n_out_features = len(dict_class_to_id)

    n_epochs = parameter_config['n_epochs']

    conv_class = classifier_GD_2_ACM(n_out_features, parameter_config)

    loss_fn = nn.CrossEntropyLoss()
    learning_rate = parameter_config['learning_rate']
    optimizer_cl = torch.optim.Adam(conv_class.parameters(), lr=learning_rate)

    _, _, _, _ = test(conv_class, test_loader, loss_fn)
    begin_time = time.time()
    loss_train, acc_train, loss_test, acc_test = train(conv_class, train_loader, test_loader, loss_fn, optimizer_cl, n_epochs=n_epochs)
    end_time = time.time()
    final_test_loss, accuracy, y_test_torch, y_pred_torch = test(conv_class, test_loader, loss_fn)
    
    parameter_config['accuracy'] = accuracy
    parameter_config['training_time'] = end_time - begin_time
    
    main_cnn_stats_model(y_train, y_test_torch, y_pred_torch, dict_id_to_class, loss_train, loss_test, acc_train, acc_test,
                     make_plot=True,
                     save_model=True,
                     parameter_config=parameter_config,
                     create_acm=True,
                     acm_parameters=[X_test, y_test, 20],
                     model_name='CNN - V_ACM - {}D'.format(dim),
                     model_class=conv_class,
                     model_preprocessing='OHE of letters in {} dimensions (k={}) - max size = {}'.format(4**k_mer, k_mer, max_size),
                     sequence_origin=parameter_config['sequence_origin'],
                     primers_origin=parameter_config['primers_origin'],
                     taxonomy_level=parameter_config['taxonomy_level'],
                     selected_primer=parameter_config['selected_primer'])
    
    save_update_cnn(file_path, parameter_config.keys(), parameter_config.values())

Working with the following parameter configuration: 
 {'sequence_origin': 'DairyDB', 'primers_origin': 'DairyDB', 'selected_primer': 'V4', 'taxonomy_level': 5, 'dimension': 2, 'k_mer': 1, 'vector_max_size': 270, 'out_channel_1': 300, 'out_channel_2': 300, 'kernel_size_1_W': 5, 'kernel_size_2_W': 15, 'max_pool_stride_1_W': 5, 'max_pool_stride_2_W': 15, 'n_epochs': 50, 'learning_rate': 0.001}
Test - Loss: 0.1182 Acc: 0.0011
Epoch 1 over 50
Train - Loss: 0.0985 Acc: 0.1497
Test - Loss: 0.0865 Acc: 0.2346
Epoch 2 over 50
Train - Loss: 0.0855 Acc: 0.1756
Test - Loss: 0.0828 Acc: 0.2261
Epoch 3 over 50
Train - Loss: 0.0706 Acc: 0.2717
Test - Loss: 0.0747 Acc: 0.2930
Epoch 4 over 50
Train - Loss: 0.0588 Acc: 0.3531
Test - Loss: 0.0669 Acc: 0.3657
Epoch 5 over 50
Train - Loss: 0.0483 Acc: 0.4326
Test - Loss: 0.0606 Acc: 0.4204
Epoch 6 over 50
Train - Loss: 0.0399 Acc: 0.4976
Test - Loss: 0.0566 Acc: 0.4549
Epoch 7 over 50
Train - Loss: 0.0326 Acc: 0.5562
Test - Loss: 0.0554 Acc: 0.4846
Epoch 8

## ACTIVATION MAP Manually

%load_ext autoreload
%autoreload 2

In [None]:
from models.cnn_acm import create_activation_map
from models.loading_model_data import main_loading_model_data
from models.cnn_preprocessing import main_preprocessing_cnn


In [None]:
parameter_config = {'sequence_origin': 'DairyDB', 'primers_origin': 'DairyDB', 'selected_primer': 'V4', 'taxonomy_level': 5, 'dimension': 2, 'k_mer': 1, 'vector_max_size': 300, 'out_channel_1': 64, 'out_channel_2': 128, 'kernel_size_1_W': 5, 'kernel_size_2_W': 5, 'max_pool_stride_1_W': 5, 'max_pool_stride_2_W': 5, 'n_epochs': 50, 'learning_rate': 0.001, 'accuracy': 0.6014234875444839, 'training_time': 975.4851434230804}


In [None]:
max_size = parameter_config['vector_max_size']
k_mer = parameter_config['k_mer']

In [None]:
X_train, X_test, y_train, y_test = main_loading_model_data(sequence_origin=parameter_config['sequence_origin'], 
                                                               primers_origin=parameter_config['primers_origin'], 
                                                               selected_primer=parameter_config['selected_primer'], 
                                                               taxonomy_level=parameter_config['taxonomy_level'])
train_loader, test_loader, dict_class_to_id, dict_id_to_class = main_preprocessing_cnn(
        sequence_origin=parameter_config['sequence_origin'], 
        primers_origin=parameter_config['primers_origin'],
        selected_primer=parameter_config['selected_primer'], 
        taxonomy_level=parameter_config['taxonomy_level'],
        max_size=max_size,
        k_mer=k_mer
    )

In [None]:
analysis_path = 'D:\\0 - Boulot\\5 - X4\\16. Research Paper\\model_results\\CNN - V_ACM - 2D\\00008_analysis_V4_5_good\\'

In [None]:
create_activation_map(X_test, y_test, dict_id_to_class, parameter_config, n=2, analysis_path=analysis_path)

## Filter visualisation map

In [None]:
from models.loading_model_data import main_loading_model_data
from models.cnn_preprocessing import main_preprocessing_cnn
from models.cnn_preprocessing import get_homogenous_vector
from models.cnn_acm import get_kernel_activation_map, create_one_activation_map_with_return, get_letters

from models.cnn_model import classifier_GD_2_ACM
import torch
import numpy as np
import matplotlib.pyplot as plt
import cv2

In [None]:
parameter_config = {'sequence_origin': 'DairyDB', 'primers_origin': 'DairyDB', 'selected_primer': 'V4', 'taxonomy_level': 5, 'dimension': 2, 'k_mer': 1, 'vector_max_size': 300, 'out_channel_1': 64, 'out_channel_2': 128, 'kernel_size_1_W': 5, 'kernel_size_2_W': 5, 'max_pool_stride_1_W': 5, 'max_pool_stride_2_W': 5, 'n_epochs': 50, 'learning_rate': 0.001, 'accuracy': 0.6014234875444839, 'training_time': 975.4851434230804}
analysis_path = 'D:\\0 - Boulot\\5 - X4\\16. Research Paper\\model_results\\CNN - V_ACM - 2D\\00008_analysis_V4_5_good\\'
model_path = analysis_path + 'model.pt'
slash = '\\'

In [None]:
max_size = parameter_config['vector_max_size']
k_mer = parameter_config['k_mer']

X_train, X_test, y_train, y_test = main_loading_model_data(sequence_origin=parameter_config['sequence_origin'], 
                                                               primers_origin=parameter_config['primers_origin'], 
                                                               selected_primer=parameter_config['selected_primer'], 
                                                               taxonomy_level=parameter_config['taxonomy_level'])
train_loader, test_loader, dict_class_to_id, dict_id_to_class = main_preprocessing_cnn(
        sequence_origin=parameter_config['sequence_origin'], 
        primers_origin=parameter_config['primers_origin'],
        selected_primer=parameter_config['selected_primer'], 
        taxonomy_level=parameter_config['taxonomy_level'],
        max_size=max_size,
        k_mer=k_mer
    )

n_out_features = len(dict_class_to_id)

conv_class = classifier_GD_2_ACM(n_out_features=n_out_features,
                                 parameter_config=parameter_config)

model_path = analysis_path + 'model.pt'
acm_path = analysis_path + 'ACM{}'.format(slash)

conv_class.load_state_dict(torch.load(model_path))
conv_class.eval()

X_test_col = X_test.iloc[:, 1]
y_test_col = y_test.iloc[:, 1]
X_train_col = X_train.iloc[:, 1] 
y_train_col = y_train.iloc[:, 1] 
new_X_test = np.array([get_homogenous_vector(X_test_col[i], max_size).transpose() for i in range(len(X_test))])
new_X_train = np.array([get_homogenous_vector(X_train_col[i], max_size).transpose() for i in range(len(X_train))])
weight = conv_class.conv.conv1.weight.data.numpy()

In [None]:
all_kernels = np.zeros((0,15350//5,3))
gap = np.zeros((1,15350//5,3)) + 256
all_acm = {}

test_id = 3

for i in range(weight.shape[0]):
    print('Computing kernel {} over {}'.format(i, weight.shape[0]), end='\r')
    all_acm[get_letters(weight[i][0])] = get_kernel_activation_map(X_test_col, new_X_test, test_id, weight, i, max_size=300)


all_kernels, dict_sample_pred, probs = create_one_activation_map_with_return(X_test_col, new_X_test, y_test_col, test_id, dict_id_to_class, parameter_config, analysis_path, max_size=300) 

all_kernels = np.concatenate((all_kernels, gap))
all_kernels = np.concatenate((all_kernels, gap))

for index, key in enumerate(sorted(all_acm.keys())) :  
    all_kernels = np.concatenate((all_kernels, all_acm[key]))
    if i != weight.shape[0] - 1:
        all_kernels = np.concatenate((all_kernels, gap))

cv2.imwrite('model_results{}00008_analysis_V4_5_good{}ACM_with_kernels{}real_{}_predicted_{}_proba_{}%.png'.
            format(slash, slash, slash, dict_sample_pred['real_class'], dict_sample_pred['prediction'], int(probs*100)), 
            all_kernels)

## Get model structure image

In [None]:
import torch
from torchviz import make_dot
from models.cnn_model import classifier_GD_2_ACM

In [None]:
n_out_features = 44
parameter_config = {'sequence_origin':'DairyDB', 'primers_origin':'DairyDB', 'selected_primer':'V4', 'taxonomy_level':1, 'dimension':2, 'k_mer':1, 'vector_max_size':300, 'out_channel_1':128, 'out_channel_2':128, 'kernel_size_1_W':14, 'kernel_size_2_W':14, 'max_pool_stride_1_W':6, 'max_pool_stride_2_W':6, 'n_epochs':30, 'learning_rate':1e-3}

In [None]:
conv_class = classifier_GD_2_ACM(n_out_features, parameter_config)

In [None]:
x = torch.randn(1, 4, 300).requires_grad_(True)
y = conv_class(x)
make_dot(y, params=dict(list(conv_class.named_parameters()) + [('x', x)])).render("attached", format="png")