In [1]:
from google.colab import drive
drive.mount('/content/gdrive')

Mounted at /content/gdrive


In [2]:
cd /content/gdrive/MyDrive/project/

/content/gdrive/MyDrive/project


In [3]:
import json
dev_set = json.load(open("../project/dev-claims.json"))
evidence_set = json.load(open("../project/evidence.json"))
test_set = json.load(open("../project/test-claims-unlabelled.json"))
train_set = json.load(open("../project/train-claims.json"))

In [4]:
!pip install sentence-transformers
from tqdm import tqdm
import numpy as np
from sentence_transformers import SentenceTransformer, InputExample, losses, models, evaluation
from sentence_transformers.cross_encoder import CrossEncoder
from sentence_transformers.cross_encoder.evaluation import CEBinaryAccuracyEvaluator
import torch
from torch.utils.data import DataLoader
import torch.nn as nn
import math
from sklearn.svm import SVC
from sklearn.metrics import classification_report
from tensorflow.keras.models import Sequential
from tensorflow.keras.models import Model

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting sentence-transformers
  Downloading sentence-transformers-2.2.2.tar.gz (85 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m86.0/86.0 kB[0m [31m9.2 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting transformers<5.0.0,>=4.6.0 (from sentence-transformers)
  Downloading transformers-4.29.1-py3-none-any.whl (7.1 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.1/7.1 MB[0m [31m80.7 MB/s[0m eta [36m0:00:00[0m
Collecting sentencepiece (from sentence-transformers)
  Downloading sentencepiece-0.1.99-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.3 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.3/1.3 MB[0m [31m85.0 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting huggingface-hub>=0.4.0 (from sentence-transformers)
  Downloading huggingface_hub-0.14.1-py3-

# Data Prepossessing 1

In [5]:
# prepocessing data
def prepocess(dataset, test=False):
    all_claims, all_claim_idx, all_evid_idx, all_labels, claims, evidences, labels = [], [], [], [], [], [], []
    for id, content in dataset.items():
        claim = content['claim_text']
        all_claims.append(claim)
        all_claim_idx.append(id)
        if test == False:
            all_labels.append(content['claim_label'])
            all_evid_id = []
            for i in content['evidences']:
                claims.append(claim)
                all_evid_id.append(int(i.split('-')[1]))
                evidences.append(evidence_set[i])
                labels.append(content['claim_label'])
            all_evid_idx.append(all_evid_id)
        else:
            claims.append(claim)
              
    if test == False:
        return all_claims, all_claim_idx, all_evid_idx, all_labels, claims, evidences, labels
    else:
        return all_claims, all_claim_idx

        
# find unmatched data and obtain top k and l evidence indices and scores for training
def unmatched_data(claims_data, evid_data, evid_id, evid_num, batch_size=300000, k_=10, l=5, test=False):
    matched = 0
    all_unmatched, all_top_l_idx, all_top_l_scores = [], [], []
    claims_data = claims_data.clone().detach().to('cuda')
    evid_data = evid_data.clone().detach().to('cuda')
    # obtain scores using cosine similarity for all pairs of claims and evidences
    for i, j in enumerate(claims_data):
        j = j.unsqueeze(0)  # Add batch dimension
        scores = []
        # using batch to avoid memory being run out
        for k in range(0, len(evid_data), batch_size):
            batch_evid_data = evid_data[k:k+batch_size]
            batch_scores = torch.nn.functional.cosine_similarity(j, batch_evid_data, dim=1)
            scores.append(batch_scores)
            
        scores = torch.cat(scores) # combine all batches of scores
        top_k_idx = torch.argsort(scores, descending=True)[:k_] # find top k indices
        top_l_idx = torch.argsort(scores, descending=True)[:l] # find top l indices

        if not test:
            evid_id_tensor = torch.tensor(evid_id[i]).to('cuda')
            unmatched = top_k_idx[~torch.isin(top_k_idx, evid_id_tensor)]
            matched += torch.sum(torch.isin(top_l_idx, evid_id_tensor)).item()
        else:
            unmatched = top_l_idx
        
        all_unmatched.append(unmatched.tolist())
        all_top_l_idx.append(top_l_idx.tolist())
        all_top_l_scores.append(torch.index_select(scores, 0, top_l_idx).tolist())
        torch.cuda.empty_cache() # Clear CUDA cache
    if not test:
        matched_ratio = matched / len(evid_num)
        print('Matched evidence num:', matched, ', ratio:', matched_ratio)
    return all_unmatched, all_top_l_idx, all_top_l_scores


all_train_claim, all_train_claim_id, all_train_evid_id, all_train_label, train_claim, train_evid, train_label = prepocess(train_set)
all_val_claim, all_val_claim_id, all_val_evid_id, all_val_label, val_claim, val_evid, val_label = prepocess(dev_set)
test_claim, test_id = prepocess(test_set, True)
evid_content = list(evidence_set.values())

In [6]:
# create the bi-encoder transfomer model
word_embedding_model = models.Transformer('distilroberta-base', max_seq_length=60)
pooling_model = models.Pooling(word_embedding_model.get_word_embedding_dimension())
base_model = SentenceTransformer(modules=[word_embedding_model, pooling_model])

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
base_model.to(device)



Downloading (…)lve/main/config.json:   0%|          | 0.00/480 [00:00<?, ?B/s]

Downloading pytorch_model.bin:   0%|          | 0.00/331M [00:00<?, ?B/s]

Some weights of the model checkpoint at distilroberta-base were not used when initializing RobertaModel: ['lm_head.decoder.weight', 'lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.dense.weight', 'lm_head.bias', 'lm_head.layer_norm.bias']
- This IS expected if you are initializing RobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Downloading (…)olve/main/vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

Downloading (…)olve/main/merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

Downloading (…)/main/tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

SentenceTransformer(
  (0): Transformer({'max_seq_length': 60, 'do_lower_case': False}) with Transformer model: RobertaModel 
  (1): Pooling({'word_embedding_dimension': 768, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False})
)

# Fine Tuning

In [7]:
#  data propossing forMultipleNegativesRankingLoss training
def mnr_dataloader(claim_data, evid_data):
    examples = []
    for i, claim in enumerate(claim_data):
        evid = evid_data[i]
        example = InputExample(texts=[claim, evid])
        examples.append(example)
    return DataLoader(examples, shuffle=True, batch_size=32)
    
# data prepossing for contrastive loss training
def contrastive_dataloader(claim_data, evid_data, unmatched_data, all_claim, batch_size=16):
    positive_pairs = list(zip(claim_data, evid_data))
    pairs = []
    labels = []
    for claim, pos_evid in positive_pairs:
        pairs.append((claim, pos_evid))
        labels.append(1)
            
    for i, claim in enumerate(all_claim):
        for neg_evid in unmatched_data[i]:
            pairs.append((claim, evid_content[neg_evid]))
            labels.append(0)

    examples = []
    for (anchor, other), label in zip(pairs, labels):
        example = InputExample(texts=[anchor, other], label=label)
        examples.append(example)
    dataloader = DataLoader(examples, shuffle=True, batch_size=batch_size)  
    return dataloader


# train transformer model using all matched training pairs
mnr_loss = losses.MultipleNegativesRankingLoss(model=base_model)
train_dataloader_m = mnr_dataloader(train_claim+val_claim, train_evid+val_evid) # for test 
# train_dataloader_m = mnr_dataloader(train_claim, train_evid)
warmup_steps = math.ceil(len(train_dataloader_m) * 10 * 0.1) 
base_model.fit(train_objectives=[(train_dataloader_m, mnr_loss)],epochs=20, warmup_steps=warmup_steps)

Epoch:   0%|          | 0/20 [00:00<?, ?it/s]

Iteration:   0%|          | 0/145 [00:00<?, ?it/s]

Iteration:   0%|          | 0/145 [00:00<?, ?it/s]

Iteration:   0%|          | 0/145 [00:00<?, ?it/s]

Iteration:   0%|          | 0/145 [00:00<?, ?it/s]

Iteration:   0%|          | 0/145 [00:00<?, ?it/s]

Iteration:   0%|          | 0/145 [00:00<?, ?it/s]

Iteration:   0%|          | 0/145 [00:00<?, ?it/s]

Iteration:   0%|          | 0/145 [00:00<?, ?it/s]

Iteration:   0%|          | 0/145 [00:00<?, ?it/s]

Iteration:   0%|          | 0/145 [00:00<?, ?it/s]

Iteration:   0%|          | 0/145 [00:00<?, ?it/s]

Iteration:   0%|          | 0/145 [00:00<?, ?it/s]

Iteration:   0%|          | 0/145 [00:00<?, ?it/s]

Iteration:   0%|          | 0/145 [00:00<?, ?it/s]

Iteration:   0%|          | 0/145 [00:00<?, ?it/s]

Iteration:   0%|          | 0/145 [00:00<?, ?it/s]

Iteration:   0%|          | 0/145 [00:00<?, ?it/s]

Iteration:   0%|          | 0/145 [00:00<?, ?it/s]

Iteration:   0%|          | 0/145 [00:00<?, ?it/s]

Iteration:   0%|          | 0/145 [00:00<?, ?it/s]

In [8]:
# embedding
evid_embed_1 = base_model.encode(evid_content, convert_to_tensor =True, batch_size=256)
train_embed_1 = base_model.encode(all_train_claim, convert_to_tensor =True)
val_embed_1 =  base_model.encode(all_val_claim, convert_to_tensor =True)
# sort out useful training and evaluating data
train_unmatched_1, train_top5_id_1, train_top5_scores_1 = unmatched_data(train_embed_1, evid_embed_1, all_train_evid_id, train_evid,k_=50)
val_unmatched_50, val_top_id_50, val_top_scores_50 = unmatched_data(val_embed_1, evid_embed_1, all_val_evid_id, val_evid, l=50)

Matched evidence num: 2180 , ratio: 0.5288694808345463
Matched evidence num: 468 , ratio: 0.9531568228105907


In [9]:
# Contrastive training
con_loss = losses.OnlineContrastiveLoss(model=base_model)
train_dataloader_c = contrastive_dataloader(train_claim+val_claim, train_evid+val_evid, train_unmatched_1+val_unmatched_50, all_train_claim+all_val_claim) # for test
#train_dataloader_c = contrastive_dataloader(train_claim, train_evid, train_unmatched_1, all_train_claim)

base_model.fit(train_objectives=[(train_dataloader_c, con_loss)],epochs=5, warmup_steps=warmup_steps)
evid_embed_2 = base_model.encode(evid_content, convert_to_tensor =True, batch_size=256)
train_embed_2 = base_model.encode(all_train_claim, convert_to_tensor =True)
val_embed_2 =  base_model.encode(all_val_claim, convert_to_tensor =True)
test_embed_2 =  base_model.encode(test_claim, convert_to_tensor =True)

train_unmatched_2, train_top5_id_2, train_top5_scores_2 = unmatched_data(train_embed_2, evid_embed_2, all_train_evid_id, train_evid,k_=50)
val_unmatched_50, val_top_id_50, val_top_scores_50 = unmatched_data(val_embed_2, evid_embed_2, all_val_evid_id, val_evid, l=50)
val_unmatched_50_test, val_top_id_50_test, val_top_scores_50_test = unmatched_data(val_embed_2, evid_embed_2, all_val_evid_id, val_evid, l=50, test=True)
test_unmatched_50_test, test_top_id_50_test, test_top_scores_50_test = unmatched_data(test_embed_2, evid_embed_2, all_val_evid_id, val_evid, l=50, test=True)

Epoch:   0%|          | 0/5 [00:00<?, ?it/s]

Iteration:   0%|          | 0/1980 [00:00<?, ?it/s]

Iteration:   0%|          | 0/1980 [00:00<?, ?it/s]

Iteration:   0%|          | 0/1980 [00:00<?, ?it/s]

Iteration:   0%|          | 0/1980 [00:00<?, ?it/s]

Iteration:   0%|          | 0/1980 [00:00<?, ?it/s]

Matched evidence num: 3886 , ratio: 0.9427462396894711
Matched evidence num: 491 , ratio: 1.0


#Evidence Prediction

## Evidence Prediction Training

In [10]:
# create another transformer model using CorssEncoder
cross_model = CrossEncoder('distilroberta-base', num_labels=1)
torch.cuda.empty_cache()
bce_loss = nn.BCEWithLogitsLoss()
train_dataloader_c = contrastive_dataloader(train_claim+val_claim, train_evid+val_evid, train_unmatched_2+val_unmatched_50, all_train_claim+all_val_claim) # for test 
# train_dataloader_c = contrastive_dataloader(train_claim, train_evid, train_unmatched_2, all_train_claim)
cross_model.fit(train_dataloader=train_dataloader_c, epochs=5, loss_fct=bce_loss)

Some weights of the model checkpoint at distilroberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.decoder.weight', 'roberta.pooler.dense.weight', 'roberta.pooler.dense.bias', 'lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.dense.weight', 'lm_head.bias', 'lm_head.layer_norm.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at distilroberta-base and are newly initialized: ['classifier.out_proj.bias

Epoch:   0%|          | 0/5 [00:00<?, ?it/s]

Iteration:   0%|          | 0/1968 [00:00<?, ?it/s]

Iteration:   0%|          | 0/1968 [00:00<?, ?it/s]

Iteration:   0%|          | 0/1968 [00:00<?, ?it/s]

Iteration:   0%|          | 0/1968 [00:00<?, ?it/s]

Iteration:   0%|          | 0/1968 [00:00<?, ?it/s]

## Predictions

In [11]:
# predict all l top evidences with cross-encoder
def cross_prediction(claim_data, top_evids):
    sent = []
    for i, evid_id in enumerate(top_evids):
        evid_list = [[claim_data[i], evid_content[idx]] for idx in evid_id]
        sent.append(evid_list)
    predictions = [cross_model.predict(i) for i in sent]
    return predictions

# find out the most suitable evidences
def evid_results(top_evids, evid_scores, predictions, thres, test=False):
    matched_num, avg_len = 0, []
    evid_predictions = []
    for i, scores in enumerate(predictions):
        result = []
        
        for j, score in enumerate(scores):
            idx = top_evids[i][j]
            if score*0.7+evid_scores[i][j]*0.3 >=thres:
                result.append(idx)
        final_result = [k for k in result[:5]]

        if len(final_result) == 0:
            final_result.append(top_evids[i][0])
        evid_predictions.append(final_result)

        if not test:
            for m in final_result:
                if m in all_val_evid_id[i]:
                    matched_num += 1
            avg_len.append(len(final_result))
    if not test:
        print('Matched evidence num:', matched_num, ', Average length:', sum(avg_len)/len(all_val_evid_id))
    return evid_predictions

In [19]:
# find the most related evidence by tuning scores from 2 transformer models
evid_prediction = cross_prediction(test_claim, test_unmatched_50_test)
test_evid_pred = evid_results(test_unmatched_50_test, test_top_scores_50_test, evid_prediction, 0.2)


Matched evidence num: 0 , Average length: 4.863636363636363


# Class Predictions

## Data Prepossessing 2

In [20]:
base_model.max_seq_length = 240 # turn model max length longer
# combine all related evidences
def evid_combined(evid_id):
    return [''.join(evid_content[j] for j in i) for i in evid_id]
# convert labels to number/names
def convert_label(data, num=True):
    if num:
        label_dict = {'SUPPORTS': 0,'REFUTES': 1,'NOT_ENOUGH_INFO': 2, 'DISPUTED': 3}
    else:
        label_dict = { 0:'SUPPORTS', 1:'REFUTES', 2:'NOT_ENOUGH_INFO', 3:'DISPUTED'}
    labels = [label_dict.get(label) for label in data]
    return labels



In [22]:
# prepocess data
train_evid_combined = evid_combined(all_train_evid_id)
val_evid_combined_true = evid_combined(all_val_evid_id)
val_evid_combiend_pred = evid_combined(test_evid_pred) # change to test_evid_pred for test
# convert label to number
train_label_num = convert_label(all_train_label)
val_label_num = convert_label(all_val_label)

train_evid_embed = base_model.encode(train_evid_combined)
val_evid_embed_true = base_model.encode(val_evid_combined_true)
val_evid_embed_pred = base_model.encode(val_evid_combiend_pred)

train_embed = base_model.encode(all_train_claim)
val_embed =  base_model.encode(test_claim) # change to test_claim for test

train_claim_f = [np.hstack((train_embed[i], train_evid_embed[i])) for i in range(len(train_evid_embed))]
#val_claim_f_true = [np.hstack((val_embed[i], val_evid_embed_true[i])) for i in range(len(val_evid_embed_true))]
val_claim_f_pred = [np.hstack((val_embed[i], val_evid_embed_pred[i])) for i in range(len(val_evid_embed_pred))]



## SVM Prediction

In [None]:
c = 3
svm_model_l1 = SVC(kernel='linear', class_weight='balanced', C=c)
svm_model_l1.fit(train_embed, train_label_num)
y_pred = svm_model_l1.predict(val_embed)
print("Training kernel: linear")
print("Classification Report: claim only")
print(classification_report(val_label_num, y_pred))
print()
svm_model_r1 = SVC(kernel='rbf', class_weight='balanced', C=c)
svm_model_r1.fit(train_embed, train_label_num)
y_pred = svm_model_r1.predict(val_embed)
print("Training kernel: rbf")
print("Classification Report: claim only")
print(classification_report(val_label_num, y_pred))
print()

svm_model_l2 = SVC(kernel='linear', class_weight='balanced', C=c)
svm_model_l2.fit(train_claim_f, train_label_num)
y_pred = svm_model_l2.predict(val_claim_f_true)
print("Training kernel: linear")
print("Classification Report: claim_evid true")
print(classification_report(val_label_num, y_pred))
print()
svm_model_r2 = SVC(kernel='rbf', class_weight='balanced', C=c)
svm_model_r2.fit(train_claim_f, train_label_num)
y_pred = svm_model_r2.predict(val_claim_f_true)
print("Training kernel: rbf")
print("Classification Report: claim_evid true")
print(classification_report(val_label_num, y_pred))
print()

y_pred = svm_model_l2.predict(val_claim_f_pred)
print("Training kernel: linear")
print("Classification Report: claim_evid pred")
print(classification_report(val_label_num, y_pred))
print()

y_pred = svm_model_r2.predict(val_claim_f_pred)
print("Training kernel: rbf")
print("Classification Report: evid_claim true")
print(classification_report(val_label_num, y_pred))
print()

Training kernel: linear
Classification Report: claim only
              precision    recall  f1-score   support

           0       0.48      0.57      0.52        68
           1       0.36      0.37      0.36        27
           2       0.29      0.24      0.26        41
           3       0.20      0.11      0.14        18

    accuracy                           0.40       154
   macro avg       0.33      0.32      0.32       154
weighted avg       0.37      0.40      0.38       154


Training kernel: rbf
Classification Report: claim only
              precision    recall  f1-score   support

           0       0.51      0.65      0.57        68
           1       0.38      0.22      0.28        27
           2       0.30      0.34      0.32        41
           3       0.33      0.11      0.17        18

    accuracy                           0.43       154
   macro avg       0.38      0.33      0.33       154
weighted avg       0.41      0.43      0.41       154


Training kernel

## LSTM Prediction

In [23]:
from tensorflow.keras import layers, models, optimizers
from keras.utils import to_categorical
# set up lstm model
maxlen = len(train_claim_f[0])
inputs = layers.Input(shape=(1, maxlen))
x = layers.Bidirectional(layers.LSTM(64, return_sequences=True))(inputs)
x = layers.GlobalMaxPooling1D()(x)
x = layers.Dense(16, activation='tanh')(x)
x = layers.Dropout(0.2)(x)
outputs = layers.Dense(4, activation='softmax')(x)

lstm_model = Model(inputs=inputs, outputs=outputs)
lstm_model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
lstm_model.summary()

Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 1, 1536)]         0         
                                                                 
 bidirectional (Bidirectiona  (None, 1, 128)           819712    
 l)                                                              
                                                                 
 global_max_pooling1d (Globa  (None, 128)              0         
 lMaxPooling1D)                                                  
                                                                 
 dense (Dense)               (None, 16)                2064      
                                                                 
 dropout (Dropout)           (None, 16)                0         
                                                                 
 dense_1 (Dense)             (None, 4)                 68    

In [24]:
train_label_cat = to_categorical(train_label_num, num_classes=4)
val_label_num_cat = to_categorical(val_label_num, num_classes=4)
train_claim_f = np.array(train_claim_f)
train_label_num = np.array(train_label_num)
#val_claim_f_true = np.array(val_claim_f_true)
val_label_num = np.array(val_label_num)
train_claim_f_reshaped = train_claim_f.reshape(-1, 1, maxlen)
#val_claim_f_reshaped = val_claim_f_true.reshape(-1, 1, maxlen)
history = lstm_model.fit(train_claim_f_reshaped, train_label_cat, epochs=50, batch_size=128, validation_split=0.2,shuffle=True)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


In [None]:
val_claim_f_true_reshaped = np.array(val_claim_f_true).reshape(-1, 1, maxlen)
val_claim_f_pred_reshaped = np.array(val_claim_f_pred).reshape(-1, 1, maxlen)

y_pred_lstm = (lstm_model.predict(val_claim_f_true_reshaped) > 0.5).astype("int32")
y_pred_classes = np.argmax(y_pred_lstm, axis=1)
print("Classification Report: claim_evid true")
print(classification_report(val_label_num, y_pred_classes))
print()
y_pred_lstm = (lstm_model.predict(val_claim_f_pred_reshaped) > 0.5).astype("int32")
y_pred_classes = np.argmax(y_pred_lstm, axis=1)
print("Classification Report: evid_claim true")
print(classification_report(val_label_num, y_pred_classes))
print()

Classification Report: claim_evid true
              precision    recall  f1-score   support

           0       0.60      0.71      0.65        68
           1       0.45      0.33      0.38        27
           2       0.57      0.66      0.61        41
           3       0.29      0.11      0.16        18

    accuracy                           0.56       154
   macro avg       0.48      0.45      0.45       154
weighted avg       0.53      0.56      0.54       154


Classification Report: evid_claim true
              precision    recall  f1-score   support

           0       0.53      0.51      0.52        68
           1       0.38      0.22      0.28        27
           2       0.21      0.34      0.26        41
           3       0.17      0.06      0.08        18

    accuracy                           0.36       154
   macro avg       0.32      0.28      0.29       154
weighted avg       0.38      0.36      0.36       154




## Output

In [30]:
def output_dic(claim_id, evid_result, label_result, claim):
    final_result = {}
    for i, id_c in enumerate(claim_id):
        claim_value = {}
        evid_list = [f'evidence-{j}' for j in evid_result[i]]
        claim_value['claim_text'] = claim[i]
        claim_value['claim_label'] = label_result[i]
        claim_value['evidences'] = evid_list
        
        final_result[id_c] = claim_value
    return final_result

In [None]:
svm_model_r = SVC(kernel='rbf', class_weight='balanced', C=3)
svm_model_r.fit(train_claim_f, train_label_num)
y_pred_svm = svm_model_r.predict(val_claim_f_pred)
val_claim_f_pred_reshaped = np.array(val_claim_f_pred).reshape(-1, 1, maxlen)
y_pred_lstm = (lstm_model.predict(val_claim_f_pred_reshaped) > 0.5).astype("int32")
y_pred_lstm = np.argmax(y_pred_lstm, axis=1)
test_pred_label_svm_r = convert_label(y_pred_svm, False)
test_pred_label_lstm = convert_label(y_pred_lstm, False)


In [31]:
svm_r_test = output_dic(test_id, test_evid_pred, test_pred_label_svm_r, test_claim)
lstm_test = output_dic(test_id, test_evid_pred, test_pred_label_svm_r, test_claim)


In [33]:
import json
with open("svm_r_test.json", "w") as file:
    json.dump(svm_r_test, file)