In [1]:
import math
import torch
import torch.nn as nn
import torch.nn.functional as F
import functions
import models
import embedder
import training_functions
from torch.utils import data
import glob
import dataset
from preprocessing import linear_interpolation_collate_fn
import time
import samplers
import frontier
import pandas as pd

# Set the device parameters
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device = torch.device("cuda:0")
print('Device in use : '+str(device))

# Create the parameters dict, will be fill after

parameters = dict()
parameters['device'] = device
parameters['tmps_form_last_step'] = time.time()

# Should set all parameters of dataloader in this dictionary

dataloader_params = dict( # A REVOIR POUR LES DONNEES TWEETS
    dataset=None,  # Will change to take dataset
    batch_size=2,
    shuffle=False,
    batch_sampler=samplers.OppositeSameSizeTwoSentenceBatchSampler,
    sampler=None,
    num_workers=0,
    collate_fn=linear_interpolation_collate_fn,
    pin_memory=False,
    drop_last=False,
    timeout=0,
    worker_init_fn=None,
    divide_by=[1, 2, 5, 20],
    divide_at=[0, 20, 30, 50]
)

# Should set all parameters of criterion in this dictionary

embedder_params = dict(
    path='./data/model_embedding/fine_tune_W2V.model',
    padding_idx=None,
    max_norm=None,
    norm_type=2.0,
    scale_grad_by_freq=False,
    sparse=False,
    _weight=None
)

parameters['embedder'] = embedder.W2VCustomEmbedding(**embedder_params).to(parameters['device'])

dataloader_params['dataset'] = dataset.YelpTweetDataset(
    # path='/home/alexis/Project/Data/NLP_Dataset/all_setences_en_processed.tsv',
    path='../Data/Yelp/',
    file_name='20review_binary',
    file_type='csv',
    device=parameters['device'],
    return_id=True,
    text_column='text',
    label_column='target')

# Set True or False for padable

dataloader_params['dataset'].set_embedder(parameters)

parameters['pad_token'] = parameters['embedder'].word2index['<pad>']

# Should set all parameters of model in this dictionary

'''model_params = dict(
    ntoken=len(parameters['embedder'].word2index),  # len(TEXT.vocab.stoi), # the size of vocabulary
    ninp=parameters['embedder'].embedding_dim,  # embedding dimension
    nhid=512,  # the dimension of the feedforward network model in nn.TransformerEncoder
    nlayers=6,  # the number of nn.TransformerEncoderLayer in nn.TransformerEncoder 10-16
    nhead=10,  # the number of heads in the multi_head_attention models
    dropout=0.1,
    device=parameters['device']
)'''

print('Longer sentence in data : '+str(max(dataloader_params['dataset'].size)))

encoder_params = dict(
    embedder=parameters['embedder'],
    dropout_p=0.1,
    device=parameters['device'],
    teacher_forcing_ratio=0,  # Non entrainement
    num_layers=2,
    bidirectional=False,
    encode_size=512,
    max_length=max(dataloader_params['dataset'].size)
)

# classifier_params = dict(
#     embedder=parameters['embedder'],
#     dropout=0.5,
#     layer_dropout=0.3,
#     device=parameters['device'], # a voir si je le laisse
#     n_layers=2,
#     bidirectional=False,
#     n_hidden=512,
#     n_out=2 #formule pour récupérer le nombre de classe du dataset
# )

model_params = dict(
    num_class=dataloader_params['dataset'].num_class
)

parameters['encoder_model'] = models.AttnAutoEncoderRNN(**encoder_params).to(parameters['device'])  #models.TransformerModel(**model_params).to(parameters['device'])
# parameters['encoder_model'].load_state_dict(torch.load(str("./executions/FromGPU4_MediumFixed/models/Best_Model_Epoch_20.pt"), map_location=device))
# parameters['classifier_model'] = models.SentimentRNN(**classifier_params).to(parameters['device'])  #models.TransformerModel(**model_params).to(parameters['device'])
# parameters['model'] = models.EncoderClassifier(parameters['encoder_model'], parameters['classifier_model'], parameters['embedder'])
parameters['model'] = models.EncoderClassifierDecoder(parameters['encoder_model'], parameters['embedder'], model_params['num_class'], device)

name_execution = 'FromGPU4_EncoderUnique' # A CHANGER

#with open("./executions/" + name_execution + "/model.pkl", 'rb') as f:
    #model = pkl.load(f)
parameters['model'] = parameters['model'].to(parameters['device'])  #models.TransformerModel(**model_params).to(parameters['device'])
parameters['encoder_model'] = parameters['model'].encoder
parameters['classifier_model'] = parameters['model'].classifier
# for name, param in model.named_parameters():
#     if param.requires_grad:
#         print(name, param.data)

#with open("./executions/" + name_execution + "/embedder.pkl", 'rb') as f:
    #embedder = pkl.load(f)
for f in glob.glob("./executions/" + str(name_execution) + "/models/Model_Epoch_5.pt"):
    print('model import : '+str(f))
    parameters['model'].load_state_dict(torch.load(str(f), map_location=device))
# model = torch.load(str("executions/FromGPU4_Short/models/Best_Model_Epoch_18.pt"))
parameters['model'].eval()

Device in use : cuda:0
sizes
104364
104364
Longer sentence in data : 20
cuda:0
model import : ./executions/FromGPU4_EncoderUnique/models/Model_Epoch_5.pt


EncoderClassifierDecoder(
  (encoder): AttnAutoEncoderRNN(
    (embedder): W2VCustomEmbedding(192192, 300)
    (encoder): EncoderRNN(
      (embedding): W2VCustomEmbedding(192192, 300)
      (gru): GRU(300, 512, num_layers=2)
    )
    (decoder): AttnDecoderRNN(
      (embedding): W2VCustomEmbedding(192192, 300)
      (attn): Linear(in_features=812, out_features=21, bias=True)
      (attn_combine): Linear(in_features=812, out_features=300, bias=True)
      (dropout): Dropout(p=0.1, inplace=False)
      (gru): GRU(300, 512, num_layers=2)
      (out): Linear(in_features=512, out_features=192193, bias=True)
    )
  )
  (embedder): W2VCustomEmbedding(192192, 300)
  (classifier): Linear(in_features=1024, out_features=2, bias=True)
  (sig_out): Softmax(dim=1)
)

In [90]:
for name, param in parameters['model'].named_parameters():
    if param.requires_grad and "classifier" in name:
        if "weight" in name:
            weight = param.data.cpu().data.numpy()
        else :
            biais = param.data.cpu().data.numpy()
        print(name, param.data.shape)

classifier.weight torch.Size([2, 1024])
classifier.bias torch.Size([2])


In [91]:
import pandas as pd
import numpy as np

In [92]:
weight

array([[-0.09971751, -0.07739043, -0.3053918 , ...,  0.01417913,
         0.17426673, -0.05969966],
       [ 0.02784912,  0.01768062,  0.13319671, ..., -0.0218176 ,
        -0.03291409,  0.1052188 ]], dtype=float32)

In [93]:
biais

array([ 0.00771963, -0.02887552], dtype=float32)

In [110]:
plans = pd.DataFrame(data=None, index = None, columns=list(range(1024))+['biais']) 

In [111]:
plans.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,1015,1016,1017,1018,1019,1020,1021,1022,1023,biais


In [112]:
plans = plans.append(pd.DataFrame(data=[np.append(weight[0], biais[0])], columns=plans.columns))
plans = plans.append(pd.DataFrame(data=[np.append(weight[1], biais[1])], columns=plans.columns))

In [113]:
plans.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,1015,1016,1017,1018,1019,1020,1021,1022,1023,biais
0,-0.099718,-0.07739,-0.305392,0.256532,-0.033397,0.220542,0.105602,-0.051412,-0.292714,0.01257,...,0.048517,-0.018064,-0.039292,0.071533,-0.090711,-0.501262,0.014179,0.174267,-0.0597,0.00772
0,0.027849,0.017681,0.133197,-0.098239,0.030657,-0.089705,0.001464,0.136552,0.187572,-0.009032,...,-0.00874,0.002646,0.013864,-0.100081,0.069355,0.321744,-0.021818,-0.032914,0.105219,-0.028876


In [114]:
import pickle as pkl

In [116]:
with open(r"last_FC.pkl", "wb") as output_file:
    pkl.dump(plans, output_file)