In [2]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from datasets import load_dataset
import pandas as pd
import torch
import tensorflow as tf
from transformers import TFAutoModelForSequenceClassification

# Load the tokenizer and model
model_name = "FriedGil/distillBERT-misinformation-classifier"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSequenceClassification.from_pretrained(model_name)

# Load the LIAR dataset
def load_liar_data(file_path):
    columns = ['id', 'label', 'statement', 'subject', 'speaker', 'speaker_job_title',
               'state_info', 'party_affiliation', 'barely_true_counts', 'false_counts',
               'half_true_counts', 'mostly_true_counts', 'pants_on_fire_counts', 'context']
    data = pd.read_csv(file_path, sep='\t', header=None, names=columns)
    return data

liar_dataset_path = "path_to_liar_dataset.tsv"  # Replace with your dataset path
liar_data = load_liar_data(liar_dataset_path)

# Preprocess the dataset
def preprocess_data(data, tokenizer, max_length=128):
    statements = data['statement'].tolist()
    encodings = tokenizer(statements, truncation=True, padding=True, max_length=max_length, return_tensors="pt")
    return encodings

encodings = preprocess_data(liar_data, tokenizer)

# Model inference
def get_predictions(model, encodings):
    with torch.no_grad():
        outputs = model(**encodings)
        predictions = torch.argmax(outputs.logits, dim=1)
    return predictions

predictions = get_predictions(model, encodings)

# Convert the model to TensorFlow and export as .h5
def convert_and_save_model(model_name, save_path):
    tf_model = TFAutoModelForSequenceClassification.from_pretrained(model_name)
    tf_model.save_pretrained(save_path)
    print(f"Model saved in {save_path}")

save_path = "./misinformation_classifier"
convert_and_save_model(model_name, save_path)


ModuleNotFoundError: No module named 'transformers'