In [1]:
import pandas as pd
import json
import numpy as np
from time import sleep
import time
from tqdm import tqdm
import requests
from pydantic import BaseModel
url = open("local_models_path.txt", "r").read()

In [2]:
# Load the test datasets

test_en = pd.read_json("../../datasets/ParlaSent-EN-test/ParlaSent_EN_test.jsonl", lines=True)
test_bcs = pd.read_json("../../datasets/ParlaSent-BSC-test/ParlaSent_BCS_test.jsonl", lines=True)

print(test_en.shape, test_bcs.shape)

(2600, 14) (2600, 14)


In [3]:
def run_local_model(model, prompt, url=url):

	class ReponseStructure(BaseModel):
		sentiment: int

	data = {
	    "model": model,
	    "prompt": prompt,
	    "stream": False,
	    "temperature": 0,
	    "format": ReponseStructure.model_json_schema()
	}

	headers = {"Content-Type": "application/json",}
	response = requests.post(url, json=data, headers=headers)

	return response.json()["response"]

In [4]:
#models = ["gemma3:27b", "deepseek-r1:14b", "llama3.3:latest"]
models = ["llama4:scout", "qwen3:32b"]

In [4]:
def predict_gpt(df_test_name, gpt_model):

	dfs = {
		"ParlaSent-EN-test": test_en,
		"ParlaSent-BCS-test": test_bcs
	}

	df = dfs[df_test_name]

	responses = []
	
	texts = df["text"].to_list()
	langs = df["lang"].to_list()

	labels_dict = {0: "Negative", 1: "Neutral", 2: "Positive"}

	sentiment_description = {
		"Negative - text that is entirely or predominantly negative":  0, 
		"Neutral - text that only contains non-sentiment-related statements": 1,
		"Positive - text that is entirely or predominantly positive": 2
	}

	start_time = time.time()

	for i in list(zip(texts, langs)):
		text = i[0]
		lang = i[1]

		current_prompt = f"""
			### Task
				Your task is to classify the provided parliamentary text into a sentiment label, meaning that you need to recognize whether the speaker's sentiment towards the topic is negative, neutral, positive or somewhere in between. You will be provided with an excerpt from a parliamentary speech in {lang} language, delimited by single quotation marks. Always provide a label, even if you are not sure.


			### Output format
				Return a valid JSON dictionary with the following key: 'sentiment' and a value should be an integer which represents one of the labels according to the following dictionary: {sentiment_description}.

				Text: '{text}'
		"""

		if gpt_model == "GaMS-27B-quantized":
			gpt_model_path = "hf.co/mradermacher/GaMS-27B-Instruct-i1-GGUF:i1-Q4_K_M"
		elif gpt_model == "GaMS-27B":
			gpt_model_path = "hf.co/mradermacher/GaMS-27B-Instruct-i1-GGUF:latest"
		else:
			gpt_model_path = gpt_model

		initial_response= run_local_model(gpt_model_path, current_prompt, url=url)

		response = initial_response.replace("\n", "")
		response = response.replace("\t", "")

		# Convert the string into a dictionary
		response = json.loads(response)

		# Get out a label
		try:
			predicted = labels_dict[response["sentiment"]]
			responses.append(predicted)
		# add a possibility of something going wrong
		except:
			predicted = initial_response
			print("error with extracting a label:")
			print(initial_response)
			responses.append("Mix")

	end_time = time.time()
	elapsed_time_min = end_time-start_time

	print(f"Prediction finished. It took {elapsed_time_min/60} min for {df.shape[0]} instances - {elapsed_time_min/df.shape[0]} s per instance.")

	# Create a json with results

	current_results = {
		"system": gpt_model,
		"predictions": [
			{
			"train": "NA (zero-shot)",
			"test": "{}".format(df_test_name),
			"predictions": responses,
			}
		]
		}

	# Save the results as a new json
	with open("submissions/submission-{}-{}.json".format(gpt_model, df_test_name), "w") as file:
		json.dump(current_results, file)

	print("Classification with {} on {} finished.".format(gpt_model, df_test_name))

In [7]:
for test in ["ParlaSent-EN-test", "ParlaSent-BCS-test"]:
	for model in models:
		print(model)
		predict_gpt(test, model)


llama4:scout
Prediction finished. It took 26.681410400072732 min for 2600 instances - 0.6157248553862938 s per instance.
Classification with llama4:scout on ParlaSent-EN-test finished.
qwen3:32b
Prediction finished. It took 16.51592076619466 min for 2600 instances - 0.38113663306603063 s per instance.
Classification with qwen3:32b on ParlaSent-EN-test finished.
llama4:scout
Prediction finished. It took 28.800346231460573 min for 2600 instances - 0.664623374572167 s per instance.
Classification with llama4:scout on ParlaSent-BCS-test finished.
qwen3:32b
Prediction finished. It took 17.44969645738602 min for 2600 instances - 0.4026853028627542 s per instance.
Classification with qwen3:32b on ParlaSent-BCS-test finished.


In [5]:
for test in ["ParlaSent-EN-test", "ParlaSent-BCS-test"]:
	for model in ["GaMS-27B-quantized", "GaMS-27B"]:
		print(model)
		predict_gpt(test, model)


GaMS-27B-quantized
error with extracting a label:
{"sentiment": 3}
error with extracting a label:
{ "sentiment": 3 }
error with extracting a label:
{"sentiment": 3}
error with extracting a label:
{"sentiment": 3}

error with extracting a label:
{"sentiment": 3}
error with extracting a label:
{"sentiment": 3}
error with extracting a label:
{"sentiment": 3}
Prediction finished. It took 15.887605230013529 min for 2600 instances - 0.366637043769543 s per instance.
Classification with GaMS-27B-quantized on ParlaSent-EN-test finished.
GaMS-27B
error with extracting a label:
{"sentiment": 3}
error with extracting a label:
{"sentiment": 3}

error with extracting a label:
{"sentiment": 3}
error with extracting a label:
{"sentiment": 3}
error with extracting a label:
{"sentiment": 3}
error with extracting a label:
{"sentiment": 3}
error with extracting a label:
{"sentiment": 3}
error with extracting a label:
{"sentiment": 3}
error with extracting a label:
{"sentiment": 3}

error with extracting 