# Implémenter GradCAM au CNN à la maille caractère

cf le travail de Khaled GracCam.ipynb

In [1]:
from pathlib import Path

current_dir = Path.cwd()  # this points to 'notebooks/' folder
proj_path = current_dir.parent.parent 
print(proj_path)

C:\Users\wenceslas\Documents\cours\ENSAE\2A\Normal\statapp\nlp_understanding


In [4]:
import torch
import torch.nn as nn
from torch.utils.data import Dataset, TensorDataset
import pickle
import pandas as pd
from torch.autograd import Variable
import numpy as np
# from deep_nlp.cnncharclassifier import CNNCharClassifier
from deep_nlp.cnncharclassifier.char_to_tensor import charToTensor

In [19]:
train_df= pd.read_csv(r"../../data/01_raw/allocine_train.csv")
test_df= pd.read_csv(r"../../data/01_raw/allocine_test.csv")
valid_df= pd.read_csv(r"../../data/01_raw/allocine_valid.csv")

In [20]:
sequence_len= 1014
feature_num= 256
feature_size= 83
kernel_one= 7
kernel_two= 3
stride_one= 1
stride_two= 3
output_linear= 1024
num_class= 2
dropout= 0.5

In [None]:
import torch.nn as nn


class CNNCharClassifier(nn.Module):

    def __init__(self, sequence_len, feature_num, feature_size, kernel_one, kernel_two, stride_one, stride_two
                 , output_linear, num_class, dropout):

        super(CNNCharClassifier, self).__init__()  # legacy
        self.sequence_len = sequence_len
        self.feature_num = feature_num  # vocab size
        self.feature_size = feature_size
        self.kernel_one = kernel_one  # 7
        self.kernel_two = kernel_two  # 3
        self.stride_one = stride_one  # 1
        self.stride_two = stride_two  # 3
        self.input_linear = int(((self.sequence_len - 96) / 27) * self.feature_size)
        self.output_linear = output_linear
        self.num_class = int(num_class)  # 2
        self.dropout = dropout

        self.conv1 = nn.Sequential(
            nn.Conv1d(self.feature_num, self.feature_size, kernel_size=self.kernel_one, stride=self.stride_one),
            nn.ReLU(),
            nn.MaxPool1d(kernel_size=self.kernel_two, stride=self.stride_two)
        )

        self.conv2 = nn.Sequential(
            nn.Conv1d(self.feature_size, self.feature_size, kernel_size=self.kernel_one, stride=self.stride_one),
            nn.ReLU(),
            nn.MaxPool1d(kernel_size=self.kernel_two, stride=self.stride_two)
        )

        self.conv3 = nn.Sequential(
            nn.Conv1d(self.feature_size, self.feature_size, kernel_size=self.kernel_two, stride=self.stride_one),
            nn.ReLU()
        )

        self.conv4 = nn.Sequential(
            nn.Conv1d(self.feature_size, self.feature_size, kernel_size=self.kernel_two, stride=self.stride_one),
            nn.ReLU()
        )

        self.conv5 = nn.Sequential(
            nn.Conv1d(self.feature_size, self.feature_size, kernel_size=self.kernel_two, stride=self.stride_one),
            nn.ReLU()
        )

        self.conv6_conv = nn.Conv1d(self.feature_size, self.feature_size
                                    , kernel_size=self.kernel_two, stride=self.stride_one)
        self.conv6_relu = nn.ReLU()
        self.conv6_maxpool = nn.MaxPool1d(kernel_size=self.kernel_two, stride=self.stride_two)

        self.fc1 = nn.Sequential(
            nn.Linear(self.input_linear, self.output_linear),
            nn.ReLU(),
            nn.Dropout(p=self.dropout)
        )

        self.fc2 = nn.Sequential(
            nn.Linear(self.output_linear, self.output_linear),
            nn.ReLU(),
            nn.Dropout(p=self.dropout)
        )

        self.fc3 = nn.Linear(self.output_linear, self.num_class)
        self.log_softmax = nn.LogSoftmax(dim=1)

        self.weight_init()

        self.before_conv = nn.Sequential()
        self.before_conv.add_module("conv1", self.conv1)
        self.before_conv.add_module("conv2", self.conv2)
        self.before_conv.add_module("conv3", self.conv3)
        self.before_conv.add_module("conv4", self.conv4)
        self.before_conv.add_module("conv5", self.conv5)
        self.before_conv.add_module("conv6_conv", self.conv6_conv)
        self.before_conv.add_module("conv6_relu", self.conv6_relu)

        # disect the network to access its last convolutional layer
        self.pool = nn.Sequential()
        self.pool.add_module("conv6_maxpool", self.conv6_maxpool)

        # get the max pool of the features stem
        self.after_conv = nn.Sequential()
        self.after_conv.add_module("fc1", self.fc1)
        self.after_conv.add_module("fc2", self.fc2)
        self.after_conv.add_module("fc3", self.fc3)
        self.after_conv.add_module("log_softmax", self.log_softmax)

        # placeholder for the gradients
        self.gradients = None

    def weight_init(self):
        for block in self._modules:
            try:
                for m in self._modules[block]:
                    nn.init.normal_(m, 0, 0.05)
            except:
                pass
        pass

    # hook for the gradients of the activations
    def activations_hook(self, grad):
        self.gradients = grad
        pass

    # method for the gradient extraction
    def get_activations_gradient(self):
        return self.gradients

    # method for the activation exctraction
    def get_activations(self, x):
        x = self.before_conv(x)
        return x

    def forward(self, x):
        x = self.get_activations(x)
        x = self.pool(x)
        x= x.view(x.size(0), -1)
        x = self.after_conv(x)
        return x

In [None]:
from abc import abstractmethod

class GradCamBaseModel(nn.Module):
    def __init__(self):
        super(GradCamBaseModel, self).__init__()
        
        self.before_conv= nn.Sequential()
        self.pool= nn.Sequential()
        self.after_conv= nn.Sequential()
        self.gradients= None
    
        pass
    
    def get_activations(self, x):
        return self.before_conv(x)
    
    def activations_hook(self, grad):
        self.gradients = grad
        pass

    def get_activations_gradient(self):
        return self.gradients
    
    def get_hook(self, x):
        return x.register_hook(self.activations_hook)
    
    @abstractmethod
    def forward(self, *args):
        pass

In [28]:
a = torch.randn(4, 4)
print(a)
torch.mean(a, dim= [0])

tensor([[-1.1615,  0.0471, -0.7090,  0.1361],
        [ 0.2502,  0.3742, -0.5975,  2.1507],
        [ 0.6829, -0.2397,  0.2912,  0.0701],
        [-0.5083,  1.3377, -1.2722, -0.4939]])


tensor([-0.1842,  0.3798, -0.5719,  0.4658])