### Step 1.1 : Data Viewing and Simple Preprocessing

In [1]:
import pandas as pd
import os
import json
import numpy as np
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from transformers.data.processors.utils import InputExample

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
raw_data = pd.read_csv('./data/raw_data.csv')
raw_data.head()

Unnamed: 0,text_comments,text_only,comments_only,label,count
0,"Breaking: At least 10 dead, 5 injured after tO...","Breaking: At least 10 dead, 5 injured after tO...",The religion of peace strikes again.\n[SEP]Hi ...,rumour,9
1,France: 10 people dead after shooting at HQ of...,France: 10 people dead after shooting at HQ of...,MT France: 10 dead after shooting at HQ of sat...,rumour,7
2,Ten killed in shooting at headquarters of Fren...,Ten killed in shooting at headquarters of Fren...,must be that peace loving religion again\n[SEP...,rumour,5
3,BREAKING: 10 dead in shooting at headquarters ...,BREAKING: 10 dead in shooting at headquarters ...,WTF &gt; BREAKING 10 dead in shooting at headq...,rumour,13
4,Reuters: 10 people shot dead at headquarters o...,Reuters: 10 people shot dead at headquarters o...,watch yourself in Paris bud\n[SEP]islamist ter...,rumour,16


In [3]:
# These codes are used for data statistics only. No need to uncomment.
# data = raw_data[raw_data['count'] > 0]
# print(data['count'].min())
# print(data['count'].mean())

# raw_data['len_text'] =raw_data.text_comments.apply(lambda x: len(x.split()))

# print(raw_data['len_text'].median())
# bins = [0,50,100,150,200,250,300,350,400,450,500]
# groups = pd.cut(raw_data['len_text'],bins,right=True)
# pd.value_counts(groups).sort_index()

In [4]:
raw_data.sort_values(by='count', inplace=True)
raw_data.head(500)

Unnamed: 0,text_comments,text_only,comments_only,label,count
2484,The black &amp; unarmed group on the left is c...,The black &amp; unarmed group on the left is c...,,nonrumour,0
260,→ Charlie Hebdo attack: Hunt for killers focus...,→ Charlie Hebdo attack: Hunt for killers focus...,,rumour,0
3325,"Plane crashes in southern France, 148 on board...","Plane crashes in southern France, 148 on board...",,rumour,0
729,Terrorists shoot officer in Paris during terro...,Terrorists shoot officer in Paris during terro...,,nonrumour,0
728,It says much that men with guns feel quite so ...,It says much that men with guns feel quite so ...,,nonrumour,0
...,...,...,...,...,...
3506,NEWS Germanwings flight crashes in south of Fr...,NEWS Germanwings flight crashes in south of Fr...,is this true\n[SEP],nonrumour,1
3511,"Just landed, Barcelona to Kaunas. Found out ab...","Just landed, Barcelona to Kaunas. Found out ab...",BREAKING - Germanwings plane crashes in France...,nonrumour,1
3518,#BREAKING: A helicopter has located the Airbus...,#BREAKING: A helicopter has located the Airbus...,RT #BREAKING: A helicopter has located the Ai...,nonrumour,1
1253,.@Maryam_Rajavi message regarding the terroris...,.@Maryam_Rajavi message regarding the terroris...,Cartoons drawn for Charlie Hebdo: #CharlieHeb...,nonrumour,1


In [5]:
raw_data.shape[0]

5802

In [6]:
### IMPORTANT ###
# You may change 'model_path' to save and load different trained models.
# Availiable options: 'text_comments','text_only','commments_only','comments_group1','comments_group2','comments_group3','natural_split','fixed_split'.
# Please make sure that your 'model_path' must match the correspongding data and comments.
# For more details, please check the 'README.md' file.

model_path = 'text_comments'

In [7]:
## Different Number of Comments ##

# Please uncomment the corresponding lines if the 'model_path' is 'comments_groupX'.

# print(raw_data['count'].describe(percentiles=[0.33,0.67]))

# For'comments_group1'.
# raw_data = raw_data[raw_data['count'] <= 7]
# raw_data.shape

# For'comments_group2'.
# raw_data = raw_data[raw_data['count'] > 7]
# raw_data = raw_data[raw_data['count'] <= 18]
# raw_data.shape

# For'comments_group3'.
# raw_data = raw_data[raw_data['count'] > 18]
# raw_data.shape


In [8]:
## Data Selection ##

# You may change 'text_comments' to 'text_only' or 'comments_only' with the corresponding 'model_path' to get more experiment results.

raw_data = raw_data[['text_comments','label']]
raw_data = raw_data.rename(columns = {'text_comments':'text'})

# raw_data = raw_data[['text_only','label']]
# raw_data = raw_data.rename(columns = {'text_only':'text'})

# raw_data = raw_data[['comments_only','label']]
# raw_data = raw_data.rename(columns = {'comments_only':'text'})

raw_data.head()

Unnamed: 0,text,label
2484,The black &amp; unarmed group on the left is c...,nonrumour
260,→ Charlie Hebdo attack: Hunt for killers focus...,rumour
3325,"Plane crashes in southern France, 148 on board...",rumour
729,Terrorists shoot officer in Paris during terro...,nonrumour
728,It says much that men with guns feel quite so ...,nonrumour


In [9]:
raw_data = raw_data.dropna(axis=0)
raw_data.shape

(5802, 2)

In [10]:
raw_data['label'] = LabelEncoder().fit_transform(raw_data['label'])
raw_data.head()

Unnamed: 0,text,label
2484,The black &amp; unarmed group on the left is c...,0
260,→ Charlie Hebdo attack: Hunt for killers focus...,1
3325,"Plane crashes in southern France, 148 on board...",1
729,Terrorists shoot officer in Paris during terro...,0
728,It says much that men with guns feel quite so ...,0


In [11]:
data = raw_data.copy()
data = data.reindex(np.random.permutation(data.index))
data.head(10)

Unnamed: 0,text,label
3297,Reports: Crashed #Germanwings plane was carryi...,1
458,Charlie Hebdo became well known for publishing...,0
2811,"Ferguson, Mo., is 67% black, but black citizen...",0
2879,The live stream from #Ferguson is back up and ...,0
1878,The new world we live in #CharlieHebdo\n[SEP]I...,0
4693,The PM's office releases a statement about #s...,1
5792,A hostage situation at a Sydney cafe has come ...,0
1772,"At 87 years of age, Asterix cartoonist Uderzo ...",0
1078,Updates on #CharlieHebdo shooting from staff &...,0
4735,Map shows Martin Place in Sydney where hostage...,1


In [12]:
train, val = train_test_split(data, test_size=0.2, random_state=35)

In [13]:
train.reset_index(drop=True, inplace=True)
val.reset_index(drop=True, inplace=True)

train.head(10)

Unnamed: 0,text,label
0,All passengers and crew feared dead after A320...,1
1,BREAKING - Germanwings #A320 from Barcelona to...,1
2,→ 41 Charlie Hebdo Paris shooting: New killing...,0
3,Update - AFP reports at least two people kille...,1
4,Let´s get serious about #CharlieHebdo and West...,0
5,"This made me tear up, I am SO proud to be Aust...",0
6,"Incidents occurred at National War Memorial, n...",1
7,Police: 2 hostage situations near Paris believ...,0
8,In sympathy and solidarity with #CharlieHebdo...,0
9,Salman Rushdie: “Respect for religion” has bec...,0


In [14]:
train.shape,val.shape

((4641, 2), (1161, 2))

### Step 1.2 : Split the Dataset into Segments

In [15]:
from utils import get_split,get_natural_split,get_fixed_split

In [16]:
train_tmp = train.copy()

### IMPORTANT ###
# If your 'model_path' is 'natural_split' please use 'get_natural_split' function.
# If your 'model_path' is 'fixed_split' please use 'get_fixed_split' function.

train_tmp['text_split'] = train['text'].apply(get_split)
# train_tmp['text_split'] = train['text'].apply(get_fixed_split)
# train_tmp['text_split'] = train['text'].apply(get_natural_split)
train = train_tmp
train.head()

Unnamed: 0,text,label,text_split
0,All passengers and crew feared dead after A320...,1,[All passengers and crew feared dead after A32...
1,BREAKING - Germanwings #A320 from Barcelona to...,1,[BREAKING - Germanwings #A320 from Barcelona t...
2,→ 41 Charlie Hebdo Paris shooting: New killing...,0,[→ 41 Charlie Hebdo Paris shooting: New killin...
3,Update - AFP reports at least two people kille...,1,[Update - AFP reports at least two people kill...
4,Let´s get serious about #CharlieHebdo and West...,0,[Let´s get serious about #CharlieHebdo and Wes...


In [17]:
val_tmp = val.copy()

### IMPORTANT ###
# If your 'model_path' is 'natural_split' please use 'get_natural_split' function.
# If your 'model_path' is 'fixed_split' please use 'get_fixed_split' function.

val_tmp['text_split'] = val['text'].apply(get_split)
# val_tmp['text_split'] = val['text'].apply(get_fixed_split)
# val_tmp['text_split'] = val['text'].apply(get_natural_split)
val = val_tmp
val.head()

Unnamed: 0,text,label,text_split
0,Grand Mufti Prof. Ibrahim Abu Mohamed has cond...,0,[Grand Mufti Prof. Ibrahim Abu Mohamed has con...
1,"Before his death #MikeBrown told his mother, ""...",1,"[Before his death #MikeBrown told his mother, ..."
2,The reason to have a #JeNeSuisPasCharlie convo...,0,[The reason to have a #JeNeSuisPasCharlie conv...
3,Charlie Hebdo: There is no way they will make ...,0,[Charlie Hebdo: There is no way they will make...
4,French police still haven’t found the #Charlie...,0,[French police still haven’t found the #Charli...


In [18]:
train_l = []  # Segmented Text
label_l = []  # Label of Each Text
index_l =[]   # The Index of Each Text Before Segmentation
for idx,row in train.iterrows():
  for l in row['text_split']:
    train_l.append(l)
    label_l.append(row['label'])
    index_l.append(idx)
len(train_l), len(label_l), len(index_l)

(6791, 6791, 6791)

In [19]:
val_l = []
val_label_l = []
val_index_l = []
for idx,row in val.iterrows():
  for l in row['text_split']:
    val_l.append(l)
    val_label_l.append(row['label'])
    val_index_l.append(idx)
len(val_l), len(val_label_l), len(val_index_l)

(1746, 1746, 1746)

In [20]:
train_df = pd.DataFrame({'text':train_l, 'label':label_l})
train_df.head()

Unnamed: 0,text,label
0,All passengers and crew feared dead after A320...,1
1,BREAKING - Germanwings #A320 from Barcelona to...,1
2,→ 41 Charlie Hebdo Paris shooting: New killing...,0
3,Update - AFP reports at least two people kille...,1
4,Let´s get serious about #CharlieHebdo and West...,0


In [21]:
val_df = pd.DataFrame({'text':val_l, 'label':val_label_l})
val_df.head()

Unnamed: 0,text,label
0,Grand Mufti Prof. Ibrahim Abu Mohamed has cond...,0
1,"Before his death #MikeBrown told his mother, ""...",1
2,The reason to have a #JeNeSuisPasCharlie convo...,0
3,Charlie Hebdo: There is no way they will make ...,0
4,French police still haven’t found the #Charlie...,0


In [22]:
train_InputExamples = train_df.apply(lambda x: InputExample(guid=None,text_a = x['text'], text_b = None, label = x['label']), axis = 1)

val_InputExamples = val_df.apply(lambda x: InputExample(guid=None, text_a = x['text'], text_b = None, label = x['label']), axis = 1)


### Step 2 : Define Models For Bert Classification

In [23]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
from torch.nn import CrossEntropyLoss, MSELoss
import torch.nn.functional as F

import torch.optim as optim
from torchtext.data import BucketIterator

from torch.utils.data import DataLoader, RandomSampler, SequentialSampler, TensorDataset
from torch.utils.data.distributed import DistributedSampler
from tqdm import tqdm, trange

from transformers import (
    WEIGHTS_NAME,
    AdamW,
    get_linear_schedule_with_warmup,
    BertConfig,
    BertModel,
    BertPreTrainedModel,
    BertTokenizer,
    BertweetTokenizer,
    AutoModel,
    AutoTokenizer
)

from transformers import glue_convert_examples_to_features as convert_examples_to_features
from transformers import glue_output_modes as output_modes
from transformers import glue_processors as processors
from transformers.data.processors.utils import InputExample, DataProcessor

import logging

logger=logging.getLogger(__name__)


In [24]:
MODEL_CLASSES={
    "bert":(BertConfig,BertTokenizer),
    "bertweet":(BertConfig,BertweetTokenizer)
}

my_label_list=[0, 1]
MAX_SEQ_LENGTH=200

In [25]:
class BertForClassification(BertPreTrainedModel):
    def __init__(self, config):
        super().__init__(config)
        self.num_labels = 2

        self.bert = BertModel(config)
        self.dropout = nn.Dropout(config.hidden_dropout_prob)
        self.classifier = nn.Linear(config.hidden_size, self.num_labels)

        self.init_weights()

    def forward(
        self,
        input_ids=None,
        attention_mask=None,
        token_type_ids=None,
        position_ids=None,
        head_mask=None,
        inputs_embeds=None,
        labels=None,
    ):

        outputs = self.bert(
            input_ids,
            attention_mask=attention_mask,
            token_type_ids=token_type_ids,
            position_ids=position_ids,
            head_mask=head_mask,
            inputs_embeds=inputs_embeds,
        )

        sequence_output, pooled_output=outputs[:2]

        pooled_output = self.dropout(pooled_output)
        logits = self.classifier(pooled_output)

        
        outputs = (logits, pooled_output, sequence_output,)

        if labels is not None:
            
            if self.num_labels == 1:
                loss_fct = MSELoss()
                loss = loss_fct(logits.view(-1), labels.view(-1))
            else:
                loss_fct = CrossEntropyLoss()
                loss = loss_fct(logits.view(-1, self.num_labels), labels.view(-1))
            outputs = (loss,) + outputs

        
        return outputs  # loss, logits, pooled_output, sequence_output


### Step 3.1 : Load Pre-training Models & Prepare Training Data

In [25]:
# # Load Pre-training Models
# args={"model_name_or_path": "bert-base-uncased",
#     "config_name": "bert-base-uncased",
#     "tokenizer_name": "bert-base-uncased",
#       }

# config_class, tokenizer_class = MODEL_CLASSES["bert"]
# model_class=BertForClassification


# config = config_class.from_pretrained(
#     args["config_name"],
#     finetuning_task="", 
#     cache_dir=None,
# )
# tokenizer = tokenizer_class.from_pretrained(
#     args["tokenizer_name"],
#     do_lower_case=True,
#     cache_dir=None,
# )
# model = model_class.from_pretrained(
#     args["model_name_or_path"],
#     from_tf=bool(".ckpt" in args["model_name_or_path"]),
#     config=config,
#     cache_dir=None,
# )


# model.to("cuda")

In [26]:
# # Prepare Training Data
# train_features = convert_examples_to_features(train_InputExamples,tokenizer, label_list=my_label_list, 
#                                               output_mode="classification", max_length=MAX_SEQ_LENGTH )

In [27]:
# input_ids = torch.tensor([f.input_ids for f in train_features], dtype=torch.long)
# attention_mask = torch.tensor([f.attention_mask for f in train_features], dtype=torch.long)
# token_type_ids = torch.tensor([f.token_type_ids for f in train_features], dtype=torch.long)
# the_labels = torch.tensor([f.label for f in train_features], dtype=torch.long)


# dataset = TensorDataset(input_ids, attention_mask, token_type_ids, the_labels)

### Step 3.2 : Train & Save Models

In [28]:
# # Define Train Function For Bert Classification

# def train(train_dataset,model,tokenizer):
#     no_decay=["bias","LayerNorm.weight"]
#     optimizer_grouped_parameters=[
#         {
#             "params":[p for n,p in model.named_parameters() if not any(nd in n for nd in no_decay)],
#             "weight_decay":0.0,

#         },
#         {
#             "params": [p for n,p in model.named_parameters() if any(nd in n for nd in no_decay)],
#             "weight_decay":0.0
#         },
#     ]

    
#     t_total=len(train_dataset)// 5
#     optimizer=AdamW(optimizer_grouped_parameters,lr=2e-5,eps=1e-8)
    
#     scheduler=get_linear_schedule_with_warmup(
#         optimizer,num_warmup_steps=0,num_training_steps=t_total
#         )
    
    
    
#     # *********************
#     logger.info("*****Running training*****")
#     logger.info("  Num examples = %d", len(train_dataset))
#     logger.info("  Num Epochs = %d", 5)


#     epochs_trained=0
#     global_step=0
#     steps_trained_in_current_epoch=0

#     tr_loss,logging_loss=0.0,0.0
#     model.zero_grad()
#     train_iterator=trange(epochs_trained,5,desc="Epoch",disable=False)


#     for k in train_iterator: #5 epoch
    
#         train_sampler=RandomSampler(train_dataset)
#         train_dataloader=DataLoader(train_dataset,sampler=train_sampler,batch_size=16)
#         epoch_iterator=tqdm(train_dataloader,desc="Iteration",disable=False)

#         for step,batch in enumerate(epoch_iterator): 
#             if steps_trained_in_current_epoch>0:
#                 steps_trained_in_current_epoch-=1
#                 continue

#             model.train()
#             batch=tuple(t.to("cuda") for t in batch)
            
#             inputs={"input_ids": batch[0],"attention_mask": batch[1],"token_type_ids": batch[2], "labels": batch[3]}
#             outputs = model(**inputs)
#             loss=outputs[0]
 
#             loss.backward()

#             tr_loss+=loss.item()
#             if (step+1)%1==0:
#                 torch.nn.utils.clip_grad_norm_(model.parameters(),1.0)

#                 optimizer.step()
#                 scheduler.step()
#                 model.zero_grad()
#                 global_step+=1

#         logger.info("average loss:" +str(tr_loss/global_step))


#     return global_step,tr_loss/global_step

In [29]:
# # Start Training

# torch.cuda.empty_cache()
# train(dataset,model,tokenizer)

In [30]:
# # Save Trained Model Parameters

# import os
# model.save_pretrained("./trained_models/classification_models_" + model_path)
# tokenizer.save_pretrained("./trained_models/classification_models_" + model_path)

# torch.save(args,os.path.join("./trained_models/classification_models_" + model_path,"training_args.bin"))

### Step 4.1 : Load the Trained Model & Prepare Data for Bert Model Evaluation

In [31]:
# Start Loading the trained model data

args_eval={"model_name_or_path": "./trained_models/classification_models_" + model_path,
    "config_name": "./trained_models/classification_models_" + model_path,
    "tokenizer_name": "./trained_models/classification_models_" + model_path,
      }

config_class, tokenizer_class = MODEL_CLASSES["bert"]
model_class=BertForClassification


config = config_class.from_pretrained(
    args_eval["config_name"],
    finetuning_task="", 
    cache_dir=None,
)
tokenizer = tokenizer_class.from_pretrained(
    args_eval["tokenizer_name"],
    do_lower_case=True,
    cache_dir=None,
)
model = model_class.from_pretrained(
    args_eval["model_name_or_path"],
    from_tf=bool(".ckpt" in args_eval["model_name_or_path"]),
    config=config,
    cache_dir=None,
)


model.to("cuda")

BertForClassification(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0): BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_aff

In [32]:
# Prepare Data for Evaluation

val_features = convert_examples_to_features(val_InputExamples, tokenizer, label_list=my_label_list, output_mode="classification",  max_length=MAX_SEQ_LENGTH )


val_input_ids = torch.tensor([f.input_ids for f in val_features], dtype=torch.long)
val_attention_mask = torch.tensor([f.attention_mask for f in val_features], dtype=torch.long)
val_token_type_ids = torch.tensor([f.token_type_ids for f in val_features], dtype=torch.long)
val_the_labels = torch.tensor([f.label for f in val_features], dtype=torch.long)


eval_dataset = TensorDataset(val_input_ids, val_attention_mask, val_token_type_ids, val_the_labels)



### Step 4.2 : Bert Classification Model Evaluation

In [33]:
from sklearn.metrics import f1_score

In [34]:
def evaluate(model, tokenizer, eval_dataset):


    logger.info("***** Running evaluation  *****")
    logger.info("  Num examples = %d", len(eval_dataset))
    logger.info("  Batch size = %d", 16)
    eval_loss = 0.0
    nb_eval_steps = 0
    preds = None
    out_label_ids = None

    eval_sampler =RandomSampler(eval_dataset)
    eval_dataloader = DataLoader(eval_dataset, sampler=eval_sampler, batch_size=16)

    for batch in tqdm(eval_dataloader, desc="Evaluating"):
        model.eval()
        batch = tuple(t.to("cuda") for t in batch)

        with torch.no_grad():
            inputs = {"input_ids": batch[0], "attention_mask": batch[1], "labels": batch[3]}
            outputs = model(**inputs)
            tmp_eval_loss, logits = outputs[:2]

            eval_loss += tmp_eval_loss.mean().item()
        nb_eval_steps += 1
        if preds is None:
            preds = logits.detach().cpu().numpy()
            out_label_ids = inputs["labels"].detach().cpu().numpy()
        else:
            preds = np.append(preds, logits.detach().cpu().numpy(), axis=0)
            out_label_ids = np.append(out_label_ids, inputs["labels"].detach().cpu().numpy(), axis=0)

    eval_loss = eval_loss / nb_eval_steps

    preds = np.argmax(preds, axis=1)
    
    accuracy,f1 = acc_and_f1(preds, out_label_ids)


    return accuracy,f1,eval_loss

In [35]:
def simple_accuracy(preds, labels):
    return (preds == labels).mean()

def acc_and_f1(preds, labels):
    acc = simple_accuracy(preds, labels)
    f1 = f1_score(y_true=labels, y_pred=preds)
    return acc, f1


In [36]:
accuracy,f1 ,eval_loss = evaluate(model, tokenizer, eval_dataset)

print("Accuracy: ",accuracy, "F1 Score: ",f1,"Loss: ",eval_loss)

Evaluating: 100%|██████████| 110/110 [00:11<00:00,  9.47it/s]

Accuracy:  0.9379624359704041 F1 Score:  0.8995391705069123 Loss:  0.1821067643859847





### Step 5.1 : Get Text Embeddings & Combine Embeddings with Labels

In [26]:
def get_prediction(model, tokenizer, dataset):

    logger.info("***** Running prediction  *****")
    logger.info("  Num examples = %d", len(dataset))
    logger.info("  Batch size = %d", 16)

    pooled_outputs = None

    sampler =SequentialSampler(dataset)
    dataloader = DataLoader(dataset, sampler=sampler, batch_size=32)

    for batch in tqdm(dataloader, desc="Evaluating"):
        model.eval()
        batch = tuple(t.to("cpu") for t in batch)

        with torch.no_grad():
            inputs = {"input_ids": batch[0], "attention_mask": batch[1], "labels": batch[3]}
            outputs = model(**inputs)
            pooled_output = outputs[2]

            if pooled_outputs is None:
                pooled_outputs = pooled_output.detach().cpu().numpy()
            else:
                pooled_outputs = np.append(pooled_outputs, pooled_output.detach().cpu().numpy(), axis=0)

    return pooled_outputs

In [27]:
args_eval={"model_name_or_path": "./trained_models/classification_models_" + model_path,
    "config_name": "./trained_models/classification_models_" + model_path,
    "tokenizer_name": "./trained_models/classification_models_" + model_path,
      }


config_class, tokenizer_class = MODEL_CLASSES["bert"]
model_class=BertForClassification


config = config_class.from_pretrained(
    args_eval["config_name"],
    finetuning_task="", 
    cache_dir=None,
)
tokenizer = tokenizer_class.from_pretrained(
    args_eval["tokenizer_name"],
    do_lower_case=True,
    cache_dir=None,
)
model = model_class.from_pretrained(
    args_eval["model_name_or_path"],
    from_tf=bool(".ckpt" in args_eval["model_name_or_path"]),
    config=config,
    cache_dir=None,
)


# model.to("cuda")

In [28]:
train_features = convert_examples_to_features(train_InputExamples,tokenizer, label_list=my_label_list, output_mode="classification", max_length=MAX_SEQ_LENGTH )

val_features = convert_examples_to_features(val_InputExamples, tokenizer, label_list=my_label_list, output_mode="classification",  max_length=MAX_SEQ_LENGTH )



In [29]:
train_input_ids = torch.tensor([f.input_ids for f in train_features], dtype=torch.long)
train_attention_mask = torch.tensor([f.attention_mask for f in train_features], dtype=torch.long)
train_token_type_ids = torch.tensor([f.token_type_ids for f in train_features], dtype=torch.long)
train_the_labels = torch.tensor([f.label for f in train_features], dtype=torch.long)

train_dataset = TensorDataset(train_input_ids, train_attention_mask, train_token_type_ids, train_the_labels)

In [30]:
val_input_ids = torch.tensor([f.input_ids for f in val_features], dtype=torch.long)
val_attention_mask = torch.tensor([f.attention_mask for f in val_features], dtype=torch.long)
val_token_type_ids = torch.tensor([f.token_type_ids for f in val_features], dtype=torch.long)
val_the_labels = torch.tensor([f.label for f in val_features], dtype=torch.long)

val_dataset = TensorDataset(val_input_ids, val_attention_mask, val_token_type_ids, val_the_labels)

In [31]:
train_pooled_outputs = get_prediction(model, tokenizer, train_dataset)
train_pooled_outputs.shape

Evaluating: 100%|█████████████████████████████████████████████████████████████████████| 213/213 [39:37<00:00, 11.16s/it]


(6791, 768)

In [32]:
val_pooled_outputs = get_prediction(model, tokenizer, val_dataset)
val_pooled_outputs.shape

Evaluating: 100%|███████████████████████████████████████████████████████████████████████| 55/55 [09:24<00:00, 10.26s/it]


(1746, 768)

In [33]:
# Method 1: Feature Concatenation
train_x = {}
# print(index_l)
for l, emb in zip(index_l, train_pooled_outputs):
    # print(l)
    if l in train_x.keys():
        # np.vstack on lists represents features concatenation 
        train_x[l]  =np.vstack([train_x[l], emb])
    else:
        train_x[l] = [emb]

train_l_final = []
label_l_final = []
for k in train_x.keys():
    train_l_final.append(train_x[k])
    label_l_final.append(train.loc[k]['label'])

df_train = pd.DataFrame({'emb': train_l_final, 'label': label_l_final})
df_train.head(10)

Unnamed: 0,emb,label
0,"[[-0.59535825, 0.22133827, -0.064959064, 0.286...",1
1,"[[-0.03500444, 0.48017377, -0.0060315444, -0.2...",1
2,"[[0.60248107, -0.36487758, -0.023010079, 0.122...",0
3,"[[-0.54029393, 0.24938576, 0.5865337, -0.04925...",1
4,"[[0.53922117, -0.45679554, 0.12911734, 0.17952...",0
5,"[[0.630705, -0.21441744, 0.8533651, -0.0937718...",0
6,"[[-0.5761065, 0.23694296, 0.040113527, -0.0188...",1
7,"[[0.09709701, -0.20442064, -0.72128963, 0.2803...",0
8,"[[0.48888493, -0.36267853, 0.6068989, 0.070144...",0
9,"[[0.28556317, -0.47153112, 0.69443136, 0.27363...",0


In [34]:
# Method 1: Feature Concatenation
val_x = {}

for l, emb in zip(val_index_l, val_pooled_outputs):
    if l in val_x.keys():
        val_x[l]  =np.vstack([val_x[l], emb])
    else:
        val_x[l] = [emb]


val_l_final = []
vlabel_l_final = []
for k in val_x.keys():
    val_l_final.append(val_x[k])
    vlabel_l_final.append(val.loc[k]['label'])

df_val = pd.DataFrame({'emb': val_l_final, 'label': vlabel_l_final})
df_val.head(10)

Unnamed: 0,emb,label
0,"[[0.68049216, -0.26034823, -0.4906886, -0.0074...",0
1,"[[0.92846686, 0.501028, 0.941066, -0.9366698, ...",1
2,"[[0.4172276, -0.3754966, 0.62748855, 0.3172978...",0
3,"[[0.55138415, -0.105106525, 0.6977908, -0.1001...",0
4,"[[-0.8112874, -0.14650372, 0.7057784, 0.272934...",0
5,"[[0.64057475, -0.3374844, 0.06960024, -0.01647...",0
6,"[[0.5044643, -0.30644172, -0.19273518, 0.30636...",0
7,"[[-0.33219296, 0.0666637, -0.3166317, -0.09060...",1
8,"[[-0.51913524, -0.39834502, 0.31883827, 0.6338...",0
9,"[[0.538575, -0.42830735, -0.1163139, 0.1630735...",0


In [46]:
# # Method 2: Feature Average Pooling
# train_x = {}
# for l, emb in zip(index_l, train_pooled_outputs):
#     if l in train_x.keys():
#         train_x[l]  =np.vstack([train_x[l], emb])
#     else:
#         train_x[l] = [emb]

# for l in train_x.keys():
#     # print(len(train_x[l]))
#     train_x[l] = [np.mean(train_x[l],axis=0)]

# train_l_final = []
# label_l_final = []
# for k in train_x.keys():
#     train_l_final.append(train_x[k])
#     label_l_final.append(train.loc[k]['label'])

# df_train = pd.DataFrame({'emb': train_l_final, 'label': label_l_final})
# df_train.head(10)

In [47]:
# # Method 2: Feature Average Pooling
# val_x = {}

# for l, emb in zip(val_index_l, val_pooled_outputs):
#     if l in val_x.keys():
#         val_x[l]  =np.vstack([val_x[l], emb])
#     else:
#         val_x[l] = [emb]

# for l in val_x.keys():
#     val_x[l] = [np.mean(val_x[l],axis=0)]

# val_l_final = []
# vlabel_l_final = []
# for k in val_x.keys():
#     val_l_final.append(val_x[k])
#     vlabel_l_final.append(val.loc[k]['label'])

# df_val = pd.DataFrame({'emb': val_l_final, 'label': vlabel_l_final})
# df_val.head(10)

In [48]:
# # Method 3: Feature Max Pooling
# train_x = {}
# for l, emb in zip(index_l, train_pooled_outputs):
#     if l in train_x.keys():
#         train_x[l]  =np.vstack([train_x[l], emb])
#     else:
#         train_x[l] = [emb]

# for l in train_x.keys():
#     # print(len(train_x[l]))
#     train_x[l] = [np.max(train_x[l],axis=0)]

# train_l_final = []
# label_l_final = []
# for k in train_x.keys():
#     train_l_final.append(train_x[k])
#     label_l_final.append(train.loc[k]['label'])

# df_train = pd.DataFrame({'emb': train_l_final, 'label': label_l_final})
# df_train.head(10)

In [49]:
# # Method 3: Feature Max Pooling
# val_x = {}

# for l, emb in zip(val_index_l, val_pooled_outputs):
#     if l in val_x.keys():
#         val_x[l]  =np.vstack([val_x[l], emb])
#     else:
#         val_x[l] = [emb]

# for l in val_x.keys():
#     val_x[l] = [np.max(val_x[l],axis=0)]

# val_l_final = []
# vlabel_l_final = []
# for k in val_x.keys():
#     val_l_final.append(val_x[k])
#     vlabel_l_final.append(val.loc[k]['label'])

# df_val = pd.DataFrame({'emb': val_l_final, 'label': vlabel_l_final})
# df_val.head(10)

In [35]:
df_val, df_test = train_test_split(df_val, test_size=0.4, random_state=35)

In [36]:
df_train.shape, df_val.shape, df_test.shape

((4641, 2), (696, 2), (465, 2))

### Step 5.2 : Prepare Data for Classfication Model

In [37]:
batch_dict = {
    'text_comments':[[7,663],[3,232],[5,93]],
    'text_only':[[7,663],[3,232],[5,93]],
    'comments_only':[[4,1088],[4,163],[4,109]],
    'comments_group1':[[4,387],[4,58],[5,31]],
    'comments_group2':[[4,398],[1,239],[4,40]],
    'comments_group3':[[5,300],[5,45],[1,151]],
    'natural_split':[[7,663],[3,232],[5,93]],
    'fixed_split':[[7,663],[3,232],[5,93]],
}

batches = batch_dict[model_path]

In [38]:
def train_generator(df, batch_size = batches[0][0], batches_per_epoch = batches[0][1]):
    num_sequences = len(df['emb'].to_list())
    assert batch_size * batches_per_epoch == num_sequences
    num_features= 768

    x_list= df['emb'].to_list()
    y_list =  df.label.to_list()
    # Generate batches
    while True:
        for b in range(batches_per_epoch):
            longest_index = (b + 1) * batch_size - 1
            timesteps = len(max(df['emb'].to_list()[:(b + 1) * batch_size][-batch_size:], key=len))
            x_train = np.full((batch_size, timesteps, num_features), -99.)
            y_train = np.zeros((batch_size,  1))
            for i in range(batch_size):
                li = b * batch_size + i
                x_train[i, 0:len(x_list[li]), :] = x_list[li]
                y_train[i] = y_list[li]
            yield x_train, y_train

def val_generator(df,batch_size_val=batches[1][0],batches_per_epoch_val=batches[1][1]):
    
    num_sequences_val = len(df['emb'].to_list())
    assert batch_size_val * batches_per_epoch_val == num_sequences_val
    num_features= 768


    x_list= df['emb'].to_list()
    y_list =  df.label.to_list()
    # Generate batches
    while True:
        for b in range(batches_per_epoch_val):
            longest_index = (b + 1) * batch_size_val - 1
            timesteps = len(max(df['emb'].to_list()[:(b + 1) * batch_size_val][-31:], key=len))
            x_val = np.full((batch_size_val, timesteps, num_features), -99.)
            y_val = np.zeros((batch_size_val,  1))
            for i in range(batch_size_val):
                li = b * batch_size_val + i
                x_val[i, 0:len(x_list[li]), :] = x_list[li]
                y_val[i] = y_list[li]
            yield x_val, y_val

def test_generator(df,batch_size_test=batches[2][0],batches_per_epoch_test=batches[2][1]):
    
    num_sequences_test = len(df['emb'].to_list())
    assert batch_size_test * batches_per_epoch_test == num_sequences_test
    num_features= 768


    x_list= df['emb'].to_list()
    y_list =  df.label.to_list()
    # Generate batches
    while True:
        for b in range(batches_per_epoch_test):
            longest_index = (b + 1) * batch_size_test - 1
            timesteps = len(max(df['emb'].to_list()[:(b + 1) * batch_size_test][-31:], key=len))
            # print(len(df_train['emb'].to_list()[:b+batch_size][-7:]))
            x_test = np.full((batch_size_test, timesteps, num_features), -99.)
            y_test = np.zeros((batch_size_test,  1))
            for i in range(batch_size_test):
                li = b * batch_size_test + i
                x_test[i, 0:len(x_list[li]), :] = x_list[li]
                y_test[i] = y_list[li]
            yield x_test, y_test            

In [39]:
train_data = train_generator(df_train)
val_data = val_generator(df_val)
test_data = test_generator(df_test)

In [40]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

def cul_all_metrics(y_true, y_pred, pos_label=1):
    return {"accuracy": float("%.5f" % accuracy_score(y_true=y_true, y_pred=y_pred)),
            "precision": float("%.5f" % precision_score(y_true=y_true, y_pred=y_pred, pos_label=pos_label)),
            "recall": float("%.5f" % recall_score(y_true=y_true, y_pred=y_pred, pos_label=pos_label)),
            "f1-score": float("%.5f" % f1_score(y_true=y_true, y_pred=y_pred)),
           }

### Step 6.1 : Train & Save LSTM Model For Classification

In [41]:
import tensorflow as tf
from tensorflow import keras
import h5py

text_input = keras.Input(shape=(None,768,), dtype='float32', name='text')

# keras.layers.Masking(mask_value=0.0)
l_mask = keras.layers.Masking(mask_value=-99.)(text_input) 

# Which we encoded in a single vector via a LSTM
encoded_text = keras.layers.LSTM(100,)(l_mask)
out_dense = keras.layers.Dense(30, activation='relu')(encoded_text)
# And we add a softmax classifier on top
out = keras.layers.Dense(2, activation='softmax')(out_dense)
# At model instantiation, we specify the input and the output:
model = keras.Model(text_input, out)
model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['acc'])
model.summary()

2025-04-29 13:52:51.508202: I tensorflow/core/platform/cpu_feature_guard.cc:145] This TensorFlow binary is optimized with Intel(R) MKL-DNN to use the following CPU instructions in performance critical operations:  SSE4.1 SSE4.2 AVX AVX2 FMA
To enable them in non-MKL-DNN operations, rebuild TensorFlow with the appropriate compiler flags.
2025-04-29 13:52:51.523212: I tensorflow/core/platform/profile_utils/cpu_utils.cc:94] CPU Frequency: 3399410000 Hz
2025-04-29 13:52:51.523935: I tensorflow/compiler/xla/service/service.cc:168] XLA service 0x30b8c2f0 executing computations on platform Host. Devices:
2025-04-29 13:52:51.523949: I tensorflow/compiler/xla/service/service.cc:175]   StreamExecutor device (0): Host, Default Version
2025-04-29 13:52:51.524404: I tensorflow/core/common_runtime/process_util.cc:115] Creating new thread pool with default inter op setting: 2. Tune using inter_op_parallelism_threads for best performance.


Model: "model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
text (InputLayer)            [(None, None, 768)]       0         
_________________________________________________________________
masking (Masking)            (None, None, 768)         0         
_________________________________________________________________
lstm (LSTM)                  (None, 100)               347600    
_________________________________________________________________
dense (Dense)                (None, 30)                3030      
_________________________________________________________________
dense_1 (Dense)              (None, 2)                 62        
Total params: 350,692
Trainable params: 350,692
Non-trainable params: 0
_________________________________________________________________


In [42]:
call_reduce = tf.keras.callbacks.ReduceLROnPlateau(monitor='val_acc', factor=0.95, patience=3, verbose=2,
                                mode='auto', min_delta=0.01, cooldown=0, min_lr=0)

In [43]:
batches_per_epoch = batches[0][1]

batches_per_epoch_val= batches[1][1]

model.fit(train_data, steps_per_epoch=batches_per_epoch, epochs=10,
                    validation_data=val_data, validation_steps=batches_per_epoch_val, callbacks =[call_reduce] )

Train for 663 steps, validate for 232 steps
Epoch 1/10


2025-04-29 13:53:44.087397: W tensorflow/core/grappler/optimizers/implementation_selector.cc:310] Skipping optimization due to error while loading function libraries: Invalid argument: Functions '__inference___backward_cudnn_lstm_with_fallback_5844_7301' and '__inference___backward_standard_lstm_7406_8003_specialized_for_StatefulPartitionedCall_at___inference_distributed_function_8125' both implement 'lstm_3319ed09-f65e-4aa3-b0c6-5b81e84d45df' but their signatures do not match.




2025-04-29 13:53:55.856698: W tensorflow/core/grappler/optimizers/implementation_selector.cc:310] Skipping optimization due to error while loading function libraries: Invalid argument: Functions '__inference_standard_lstm_10344_specialized_for_model_lstm_StatefulPartitionedCall_at___inference_distributed_function_12215' and '__inference_standard_lstm_10344' both implement 'lstm_72f074a0-c42f-4b78-97e4-1f2074838059' but their signatures do not match.


Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 00004: ReduceLROnPlateau reducing learning rate to 0.0009500000451225787.
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 00007: ReduceLROnPlateau reducing learning rate to 0.0009025000152178108.
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 00010: ReduceLROnPlateau reducing learning rate to 0.0008573750033974647.


<tensorflow.python.keras.callbacks.History at 0x77818d6f34d0>

In [44]:
# save_path = "./trained_models/classification_models_" + model_path + "/LSTM_model/model.h5"

In [45]:
# model.save(save_path)

### Step 6.2 : Evaluate LSTM Model For Classification

In [None]:
# model = tf.keras.models.load_model(save_path)

ERROR:root:Internal Python error in the inspect module.
Below is the traceback from this internal error.

ERROR:root:Internal Python error in the inspect module.
Below is the traceback from this internal error.

ERROR:root:Internal Python error in the inspect module.
Below is the traceback from this internal error.



Traceback (most recent call last):
  File "/home/angelos/anaconda3/envs/py_env/lib/python3.7/site-packages/IPython/core/interactiveshell.py", line 3457, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "/tmp/ipykernel_12842/2662434083.py", line 1, in <module>
    model = tf.keras.models.load_model(save_path)
  File "/home/angelos/anaconda3/envs/py_env/lib/python3.7/site-packages/tensorflow_core/python/keras/saving/save.py", line 146, in load_model
    return hdf5_format.load_model_from_hdf5(filepath, custom_objects, compile)
  File "/home/angelos/anaconda3/envs/py_env/lib/python3.7/site-packages/tensorflow_core/python/keras/saving/hdf5_format.py", line 166, in load_model_from_hdf5
    model_config = json.loads(model_config.decode('utf-8'))
AttributeError: 'str' object has no attribute 'decode'

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/home/angelos/anaconda3/envs/py_env/lib/python3.7/site-pac

In [46]:
batches_per_epoch_test = batches[2][1]
pred = model.predict_generator(test_data, steps=batches_per_epoch_test)

ERROR! Session/line number was not unique in database. History logging moved to new session 59


In [47]:
pred = np.argmax(pred,axis=1).tolist()
label = df_test.label.to_list()

cul_all_metrics(label,pred)

{'accuracy': 0.95699,
 'precision': 0.91667,
 'recall': 0.95333,
 'f1-score': 0.93464}

### Step 7.1 : Train & Save Transformer Model For Classification

In [48]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

In [49]:
class MultiHeadSelfAttention(keras.layers.Layer):
    def __init__(self, embed_dim, num_heads=8):
        super(MultiHeadSelfAttention, self).__init__()
        self.embed_dim = embed_dim
        self.num_heads = num_heads
        assert (
            embed_dim % num_heads == 0
        ), "embedding dimension not divisible by num heads"
        self.projection_dim = embed_dim // num_heads
        self.wq = keras.layers.Dense(embed_dim)
        self.wk = keras.layers.Dense(embed_dim)
        self.wv = keras.layers.Dense(embed_dim)
        self.combine_heads = keras.layers.Dense(embed_dim)

    def attention(self, q, k, v):
        score = tf.matmul(q, k, transpose_b=True)
        dk = tf.cast(tf.shape(k)[-1], tf.float32)
        scaled_score = score / tf.math.sqrt(dk)
        weights = tf.nn.softmax(scaled_score, axis=-1)
        output = tf.matmul(weights, v)
        return output, weights

    def separate_heads(self, x, batch_size):
        x = tf.reshape(x, (batch_size, -1, self.num_heads, self.projection_dim))
        return tf.transpose(x, perm=[0, 2, 1, 3])

    def call(self, x):
        # x.shape = [batch_size, seq_len, embedding_dim]
        batch_size = tf.shape(x)[0]
        q = self.wq(x)  # (batch_size, seq_len, embed_dim)
        k = self.wk(x)  # (batch_size, seq_len, embed_dim)
        v = self.wv(x)  # (batch_size, seq_len, embed_dim)
        q = self.separate_heads(
            q, batch_size
        )  # (batch_size, num_heads, seq_len, projection_dim)
        k = self.separate_heads(
            k, batch_size
        )  # (batch_size, num_heads, seq_len, projection_dim)
        v = self.separate_heads(
            v, batch_size
        )  # (batch_size, num_heads, seq_len, projection_dim)
        attention, weights = self.attention(q, k, v)
        attention = tf.transpose(
            attention, perm=[0, 2, 1, 3]
        )  # (batch_size, seq_len, num_heads, projection_dim)
        concat_attention = tf.reshape(
            attention, (batch_size, -1, self.embed_dim)
        )  # (batch_size, seq_len, embed_dim)
        output = self.combine_heads(
            concat_attention
        )  # (batch_size, seq_len, embed_dim)
        return output

In [50]:
class TransformerLayer(tf.keras.layers.Layer):
    def __init__(self, embed_dim, num_heads, ff_dim, rate=0.1):
        super(TransformerLayer, self).__init__()

        self.att = MultiHeadSelfAttention(embed_dim, num_heads)
        self.ffn = keras.Sequential(
            [
                keras.layers.Dense(ff_dim, activation="relu"),
                keras.layers.Dense(embed_dim),
            ]
        )

        self.layernorm1 = keras.layers.LayerNormalization(epsilon=1e-6)
        self.layernorm2 = keras.layers.LayerNormalization(epsilon=1e-6)

        self.dropout1 = tf.keras.layers.Dropout(rate)
        self.dropout2 = tf.keras.layers.Dropout(rate)

    def call(self, x, training):
        attn_output = self.att(x)
        attn_output = self.dropout1(attn_output, training=training)
        out1 = self.layernorm1(x + attn_output)

        ffn_output = self.ffn(out1)
        ffn_output = self.dropout2(ffn_output, training=training)
        out2 = self.layernorm2(out1 + ffn_output)

        return out2


In [51]:
embed_dim=768
ff_dim=32
num_heads=1

In [52]:
text_input = keras.Input(shape=(None,768,), dtype='float32', name='text')

l_mask = keras.layers.Masking(mask_value=-99.)(text_input) 

encoded_text = TransformerLayer(embed_dim,num_heads,ff_dim)(l_mask)

out_dense1 = keras.layers.LSTM(100,)(encoded_text)

out_dense = keras.layers.Dense(30, activation='relu')(out_dense1)

out = keras.layers.Dense(2, activation='softmax')(out_dense)

model = keras.Model(text_input, out)

model.compile(optimizer='adam',loss='sparse_categorical_crossentropy',metrics=['acc'])

model.summary()









Model: "model_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
text (InputLayer)            [(None, None, 768)]       0         
_________________________________________________________________
masking_1 (Masking)          (None, None, 768)         0         
_________________________________________________________________
transformer_layer (Transform (None, None, 768)         2415392   
_________________________________________________________________
lstm_1 (LSTM)                (None, 100)               347600    
_________________________________________________________________
dense_8 (Dense)              (None, 30)                3030      
_________________________________________________________________
dense_9 (Dense)              (None, 2)                 62        
Total params: 2,766,084
Trainable params: 2,766,084
Non-trainable params: 0
_________________________________________________

In [53]:
call_reduce = tf.keras.callbacks.ReduceLROnPlateau(monitor='val_acc', factor=0.95, patience=3, verbose=2,
                                mode='auto', min_delta=0.01, cooldown=0, min_lr=0)

In [54]:
batches_per_epoch = batches[0][1]

batches_per_epoch_val= batches[1][1]

model.fit(train_data, steps_per_epoch=batches_per_epoch, epochs=10,
                    validation_data=val_data, validation_steps=batches_per_epoch_val, callbacks =[call_reduce] )

Train for 663 steps, validate for 232 steps
Epoch 1/10






2025-04-29 14:01:41.414374: W tensorflow/core/grappler/optimizers/implementation_selector.cc:310] Skipping optimization due to error while loading function libraries: Invalid argument: Functions '__inference___backward_standard_lstm_83790_84273_specialized_for_StatefulPartitionedCall_at___inference_distributed_function_84851' and '__inference___backward_cudnn_lstm_with_fallback_83503_83685' both implement 'lstm_c9e2550a-f1de-44f7-a0d3-b7f4f7b9b9ab' but their signatures do not match.




2025-04-29 14:02:06.767111: W tensorflow/core/grappler/optimizers/implementation_selector.cc:310] Skipping optimization due to error while loading function libraries: Invalid argument: Functions '__inference_cudnn_lstm_with_fallback_87415' and '__inference_standard_lstm_87304_specialized_for_model_1_lstm_1_StatefulPartitionedCall_at___inference_distributed_function_87670' both implement 'lstm_6eb6a6ab-648e-48af-a766-7cdc22a22e88' but their signatures do not match.


Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 00004: ReduceLROnPlateau reducing learning rate to 0.0009500000451225787.
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 00007: ReduceLROnPlateau reducing learning rate to 0.0009025000152178108.
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 00010: ReduceLROnPlateau reducing learning rate to 0.0008573750033974647.


<tensorflow.python.keras.callbacks.History at 0x77814e1076d0>

In [71]:
# save_weight_path = "./trained_models/classification_models_" + model_path + "/Transformer_model/model.h5"

In [72]:
# model.save_weights(save_weight_path)

### Step 7.2 : Evaluate Transformer Model for Classification

In [55]:
test_data = test_generator(df_test)

In [74]:
# model.load_weights(save_weight_path)

In [56]:
batches_per_epoch_test = batches[2][1]

pred = model.predict_generator(test_data, steps=batches_per_epoch_test)

In [57]:
pred = np.argmax(pred,axis=1).tolist()
label = df_test.label.to_list()

cul_all_metrics(label,pred)

{'accuracy': 0.96344,
 'precision': 0.93464,
 'recall': 0.95333,
 'f1-score': 0.94389}