# Score alla data with all model

In [None]:
import pandas as pd
import torch
from torch.utils.data import DataLoader

from deep_nlp.cnncharclassifier import CNNCharClassifier, charToTensor
from src.deep_nlp.embed_cnn.embcnnmodel_gradcam import classifier3F
from deep_nlp.bilstm_cnn.bilstmcnn_gradcam import BilstmCnn


import pickle

from deep_nlp.grad_cam.utils.letter import rebuild_text, prepare_heatmap, LetterToToken
from deep_nlp.grad_cam.plot import plot_bar_heatmap, plot_text_and_heatmap

In [None]:
from pathlib import Path

current_dir = Path.cwd()  # this points to 'notebooks/' folder
proj_path = current_dir.parent.parent 
print(proj_path)

### CNN character level

In [None]:
# params
cnn_sequence_len= 1014
cnn_feature_num= 87
cnn_feature_size= 256
cnn_kernel_one= 7
cnn_kernel_two= 3
cnn_stride_one= 1
cnn_stride_two= 3
cnn_output_linear= 1024
cnn_num_class= 2
cnn_dropout= 0.5
cnn_cuda_allow= True

In [None]:
# Load model
model_path_saved= "data/06_models/cnn_char_classifier/cnn_char_model/cnn_char_model.pt"

with open(str(proj_path)+ "\\" + model_path_saved, 'rb') as f:
    model_saved= pickle.load(f)

#### Engineering process

In [None]:
# Load test data
data_df= pd.read_csv(str(proj_path)+ "\\" + "data/01_raw/allocine_test.csv")

test_data= charToTensor(data_df= data_df, sentence_max_size= cnn_sequence_len)

test_load = DataLoader(test_data, batch_size= 1
                       , num_workers=4)

#### Load trained model

In [None]:
# Initialisation
parameters = {"sequence_len": cnn_sequence_len, "feature_num": cnn_feature_num
    , "feature_size": cnn_feature_size, "kernel_one": cnn_kernel_one
    , "kernel_two": cnn_kernel_two, "stride_one": cnn_stride_one
    , "stride_two": cnn_stride_two, "output_linear": cnn_output_linear
    , "num_class": cnn_num_class, "dropout": cnn_dropout}

In [None]:
model = CNNCharClassifier(**parameters)

if cnn_cuda_allow:
    model = torch.nn.DataParallel(model).cuda()
else:
    model = torch.nn.DataParallel(model)

model.load_state_dict(model_saved)

# state_dict= model.module.state_dict() # delete module to allow cpu loading

# cpu_model= CNNCharClassifier(**parameters).cpu()
# cpu_model.load_state_dict(state_dict)

model.eval()

#### Score

In [None]:
pred_test = []
lab = []
reviews= []
alphabet= test_data.get_alphabet()+" "

with torch.no_grad():
    for review, label in test_load:
        pred_test.append(torch.exp(model(review)))
        lab.append(label.float())
        
pred_test = torch.cat(pred_test)
lab = torch.cat(lab)

In [None]:
text_review_all= data_df["review"].values

In [None]:
results= pd.DataFrame({
    "review": text_review_all
    , "label": lab
    , "cnn_char_proba_1": pred_test.cpu()[:,1]
})

In [None]:
results

### Embedding CNN (5 filters)

#### Load Test loader

In [None]:
test_iterator_cnn_embed_path= "data/02_intermediate/test_iterator_cnn_embed.pkl"

with open(str(proj_path)+ "\\" + test_iterator_cnn_embed_path, 'rb') as f:
    test_iterator_cnn_embed= pickle.load(f)
    
print(type(test_iterator_cnn_embed))

#### Load Embedding

In [None]:
# Embedding model
embed_for_torch_path= "data/04_feature/w2v_torch.pkl"

with open(str(proj_path)+ "\\" + embed_for_torch_path, 'rb') as f:
    embed_for_torch= pickle.load(f)
    
print(type(embed_for_torch))

#### Load vocabulary

In [None]:
word_ind_dict_path= "data/04_feature/voc.pkl"

with open(str(proj_path)+ "\\" + word_ind_dict_path, 'rb') as f:
    word_ind_dict= pickle.load(f)
    
print(type(word_ind_dict))

#### Load model

In [None]:
# Params
params_models = {"wv": embed_for_torch,"no_words": 67,"embedding_dim":200, "nb_filter":200
                 , "height_filter":tuple([1, 2, 3, 4, 5]), "output_dim":2, "dropout":0.8, "padded":True}

In [None]:
# Model dict
embed_cnn_model_for_save_path= "data/06_models/embed_cnn/embed_cnn_classifier/embed_cnn.pt"

with open(str(proj_path)+ "\\" + embed_cnn_model_for_save_path, 'rb') as f:
    embed_cnn_model_for_save= pickle.load(f)
    
print(type(embed_cnn_model_for_save))

In [None]:
model = classifier3F(**params_models)

model.load_state_dict(embed_cnn_model_for_save)
model.eval()

In [None]:
pred_test = []
lab = []
reviews= []

with torch.no_grad():
    for review, label in test_iterator_cnn_embed:
        pred_test.append(model(review))
        lab.append(label.float())
        
pred_test = torch.cat(pred_test)
lab = torch.cat(lab)

In [None]:
# Append prediction to df
results["embed_cnn_proba_1"]= pred_test.cpu()[:,1]

### BiLSTM CNN 

#### Load embedding

In [None]:
#
embed_matrix_path= "data/02_intermediate/bilstm_cnn/embed_matrix.pkl"

with open(str(proj_path)+ "\\" + embed_matrix_path, 'rb') as f:
    embed_matrix= pickle.load(f)
    
print(type(embed_matrix))

#### Load Test Loader

In [None]:
#
test_batch_bilstm_path= "data/02_intermediate/test_iterator_cnn_bilstm.pkl"

with open(str(proj_path)+ "\\" + test_batch_bilstm_path, 'rb') as f:
    test_batch_bilstm= pickle.load(f)
    
print(type(test_batch_bilstm))

#### Load Model

In [None]:
#BiLSTM + CNN
num_epochs=  50
batch_size= 32
patience= 5
lr= 0.001
input_dim= 200
hidden_dim= 128
layer_dim= 2
feature_size= 256
output_dim= 2
kernel_size= 3
dropout_rate= 0.5
sentence_size= 67
padded= True

In [None]:
#
bilstm_cnn_model_for_save_path= "data/06_models/bilstm_cnn/bilstm_cnn_classifier/bilstm_cnn.pt"

with open(str(proj_path)+ "\\" + bilstm_cnn_model_for_save_path, 'rb') as f:
    bilstm_cnn_model_for_save= pickle.load(f)
    
print(type(bilstm_cnn_model_for_save))

In [None]:
model = BilstmCnn(embed_matrix, sentence_size, input_dim, hidden_dim
                  , layer_dim, output_dim, feature_size, kernel_size, dropout_rate, padded)

if cnn_cuda_allow:
    model = torch.nn.DataParallel(model).cuda()
else:
    model = torch.nn.DataParallel(model)

model.load_state_dict(bilstm_cnn_model_for_save)
model.eval()

In [None]:
pred_test = []
lab = []
reviews= []

with torch.no_grad():
    for review, label in test_batch_bilstm:
        test_reviews = review.to(torch.int64)
#         test_labels = label.to(torch.int64)
        
        pred_test.append(model(test_reviews))
        lab.append(label.float())
        
pred_test = torch.cat(pred_test)
lab = torch.cat(lab)

In [None]:
# Append prediction to df
results["bilstm_cnn_proba_1"]= pred_test.cpu()[:,1]

In [None]:
results.head(20)

In [None]:
results.tail(100)

In [None]:
a= lab.cpu().numpy()
b= results["label"].values
sum(b == a) # check

In [None]:
results.to_csv("all_model_prediction.csv", index= False)