In [None]:
from transformers import DistilBertTokenizer, TFDistilBertModel
import numpy as np
import pandas as pd
import joblib
from tensorflow.keras.models import load_model
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

<h2>Load data and DistilBERT word embeddings</h2>

In [None]:
base_directory = "./drive/MyDrive/data/"

def get_distilbert_embeddings(data, tokenizer, model, batch_size=32):
    # Placeholder for the embeddings
    all_embeddings = []

    for i in range(0, len(data), batch_size):
        batch = data.iloc[i:i+batch_size]
        # Combine claim and evidence into one string per pair
        texts = list(batch['Claim'] + " [SEP] " + batch['Evidence'])
        inputs = tokenizer.batch_encode_plus(texts, padding='max_length', truncation=True, return_tensors="tf", max_length=110)

        # Generate embeddings
        outputs = model(inputs['input_ids'], attention_mask=inputs['attention_mask'])
        # Use the last_hidden_state so compatible with LSTM
        embeddings = outputs.last_hidden_state.numpy()
        all_embeddings.append(embeddings)


    # Concatenate all batch embeddings into a single array
    return np.vstack(all_embeddings)

def get_evaluation_metrics(y_true, y_pred):
  accuracy = accuracy_score(y_true, y_pred)
  precision = precision_score(y_true, y_pred)
  recall = recall_score(y_true, y_pred)
  f1 = f1_score(y_true, y_pred)

  print(f'Accuracy: {accuracy:.4f}')
  print(f'Precision: {precision:.4f}')
  print(f'Recall: {recall:.4f}')
  print(f'F1 Score: {f1:.4f}')

In [None]:
# Load test
test_data = pd.read_csv(base_directory + "dev.csv") # Change later
test_labels = test_data['label'].values
tokenizer = DistilBertTokenizer.from_pretrained('distilbert-base-uncased')
model = TFDistilBertModel.from_pretrained('distilbert-base-uncased')

# Combine claims and evidence, then tokenize
combined_texts = test_data['Claim'] + ' ' + test_data['Evidence']
tokenized_lengths = combined_texts.apply(lambda x: len(tokenizer.encode(x, add_special_tokens=True)))

# Create embeddings
test_embeddings = get_distilbert_embeddings(test_data, tokenizer, model)

# Flatten embeddings
test_embeddings_flat = test_embeddings.reshape(test_embeddings.shape[0], -1)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/28.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

config.json:   0%|          | 0.00/483 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/268M [00:00<?, ?B/s]

Some weights of the PyTorch model were not used when initializing the TF 2.0 model TFDistilBertModel: ['vocab_transform.weight', 'vocab_projector.bias', 'vocab_layer_norm.weight', 'vocab_transform.bias', 'vocab_layer_norm.bias']
- This IS expected if you are initializing TFDistilBertModel from a PyTorch model trained on another task or with another architecture (e.g. initializing a TFBertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFDistilBertModel from a PyTorch model that you expect to be exactly identical (e.g. initializing a TFBertForSequenceClassification model from a BertForSequenceClassification model).
All the weights of TFDistilBertModel were initialized from the PyTorch model.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFDistilBertModel for predictions without further training.


<h2>Logistic Regression Model</h2>

In [None]:
# Load PKL Model
lr_model = joblib.load(base_directory + "lr_model.pkl")

# Predict
lr_predictions = lr_model.predict(test_embeddings_flat)

# Evaluation Scores
get_evaluation_metrics(test_labels, lr_predictions)

# Store data into excel
predictions_df = pd.DataFrame(lr_predictions, columns=['prediction'])
predictions_df.to_csv('Group_1_A.csv', index=False)

https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations


Accuracy: 0.8193
Precision: 0.6789
Recall: 0.6266
F1 Score: 0.6517


<h2>LSTM Model</h2>

In [None]:
# Load model
lstm_model = load_model(base_directory + "distilbert_lstm2")



Accuracy: 0.8508
Precision: 0.7308
Recall: 0.7079
F1 Score: 0.7192


In [None]:
# Predict
lstm_predictions = lstm_model.predict(test_embeddings)
predictions_binary = np.where(lstm_predictions > 0.5, 1, 0)

# Evaluation Scores
get_evaluation_metrics(test_labels, predictions_binary)

# Store data into excel
predictions_df = pd.DataFrame(lstm_predictions, columns=['prediction'])
predictions_df.to_csv('Group_1_B.csv', index=False)

Accuracy: 0.8508
Precision: 0.7308
Recall: 0.7079
F1 Score: 0.7192
