In [17]:
%env CUDA_DEVICE_ORDER=PCI_BUS_ID
%env CUDA_VISIBLE_DEVICES=7

env: CUDA_DEVICE_ORDER=PCI_BUS_ID
env: CUDA_VISIBLE_DEVICES=7


In [18]:
from transformers import AutoTokenizer
import sys
import json
from scipy.special import softmax
from transformers import AutoModelForSequenceClassification
from simpletransformers.classification import ClassificationModel, ClassificationArgs
from transformers import AutoModelForSequenceClassification, TextClassificationPipeline, AutoTokenizer, AutoConfig
import torch
import argparse
import pandas as pd

  from .autonotebook import tqdm as notebook_tqdm


In [19]:
# Load the test datasets

test_en = pd.read_json("../../datasets/ParlaSent-EN-test/ParlaSent_EN_test.jsonl", lines=True)
test_bcs = pd.read_json("../../datasets/ParlaSent-BSC-test/ParlaSent_BCS_test.jsonl", lines=True)

print(test_en.shape, test_bcs.shape)

(2600, 14) (2600, 14)


In [20]:
def get_3_category_label(x: float) -> str:
    import numpy as np
    three_category_mapper = {
        0: 'Negative',
        1: 'Neutral',
        2: 'Positive',
    }
    return three_category_mapper[
      int(np.clip(np.round(x), 0, 5) // 2)
      ]

In [23]:
def predict_sentiment(df_test_name):
	dfs = {
		"ParlaSent-EN-test": test_en,
		"ParlaSent-BCS-test": test_bcs
	}

	df = dfs[df_test_name]
	texts = df["text"].to_list()

	MODEL = "classla/xlm-r-parlasent"
	tokenizer = AutoTokenizer.from_pretrained(MODEL)
	model = AutoModelForSequenceClassification.from_pretrained(MODEL)

	pipe = TextClassificationPipeline(model=model, tokenizer=tokenizer, return_all_scores=True,task='sentiment_analysis', device=0, function_to_apply="none")
	
	output_list = pipe(texts)

	label_list = []

	for x in output_list:
		label_list.append(x[0]["score"])

	# transform the float values to the 3 concrete sentiment labels
	prediction_list = [get_3_category_label(x) for x in label_list]
	
	# Create a json with results

	current_results = {
		"system": "XLM-R-ParlaSent",
		"predictions": [
			{
			"train": "ParlaSent",
			"test": "{}".format(df_test_name),
			"predictions": prediction_list,
			}
		]
		}

	# Save the results as a new json
	with open("submissions/submission-{}-{}.json".format("XLM-R-ParlaSent", df_test_name), "w") as file:
		json.dump(current_results, file)

	print("Classification with {} on {} finished.".format("XLM-R-ParlaSent", df_test_name))

In [24]:
predict_sentiment("ParlaSent-EN-test")

Classification with XLM-R-ParlaSent on ParlaSent-EN-test finished.


In [25]:
predict_sentiment("ParlaSent-BCS-test")

Classification with XLM-R-ParlaSent on ParlaSent-BCS-test finished.
