In [None]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch
import json
import pandas as pd

model_name = "tabularisai/multilingual-sentiment-analysis"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSequenceClassification.from_pretrained(model_name)

def predict_sentiment(texts):
    inputs = tokenizer(texts, return_tensors="pt", truncation=True, padding=True, max_length=512)
    with torch.no_grad():
        outputs = model(**inputs)
    return torch.argmax(outputs.logits, dim=-1).tolist()

# Load input JSONL file
input_path = "./merged_output.jsonl"
df = pd.read_json(input_path, lines=True)

# Predict and add label column
df["label"] = predict_sentiment(df["text"].tolist())

# Save to new JSONL file
output_path = "./labelled_output.jsonl"
df.to_json(output_path, orient="records", lines=True, force_ascii=False)

print(f"Labeled dataset saved to {output_path}")