In [None]:
%pip install openai

In [1]:
import pandas as pd
import json
import numpy as np
from time import sleep
import time
from openai import OpenAI
from tqdm import tqdm

client = OpenAI(
  base_url="https://openrouter.ai/api/v1",
  api_key=open("API_key", "r").read(),
)

In [2]:
# Load the test datasets from the GitHub repositories (access to them is obtained by request to the AGILE repository owner)

en_ginco = pd.read_json("../../datasets/EN-GINCO-test-dataset/EN-GINCO.jsonl", lines=True)
x_ginco = pd.read_json("../../datasets/X-GINCO-test-set/X-GINCO.jsonl", lines=True)

print(en_ginco.shape, x_ginco.shape)

(272, 4) (790, 6)


In [3]:
def predict_gpt(df_test_name, gpt_model):

	dfs = {
		"en-ginco": en_ginco,
		"x-ginco": x_ginco
	}

	df = dfs[df_test_name]

	responses = []
	
	texts = df["text"].to_list()

	start_time = time.time()

	labels_dict = {
		0: "Other",
		1: "Information/Explanation",
		2: "News",
		3: "Instruction",
		4: "Opinion/Argumentation",
		5: "Forum",
		6: "Prose/Lyrical",
		7: "Legal",
		8: "Promotion"
	}

	for text in texts:
		completion = client.chat.completions.create(model=gpt_model,
		response_format= {"type": "json_object"},
		messages= [
		{
			"role": "user",
			"content": f"""
			### Task
			Your task is to classify the following text according to genre. Genres are text types, defined by the function of the text, author’s purpose and form of the text. Always provide a label, even if you are not sure.

			### Output format
				Return a valid JSON dictionary with the following key: 'genre' and a value should be an integer which represents one of the labels according to the following dictionary: {labels_dict}.

				
				Text: '{text}'
		"""
			}
		],
		temperature = 0)

		response=completion.choices[0].message.content

		response = response.replace("\n", "")
		response = response.replace("\t", "")

		# Get out a label
		try:
			# Convert the string into a dictionary
			response = json.loads(response)
			predicted = labels_dict[response["genre"]]
			responses.append(predicted)
		# add a possibility of something going wrong
		except:
			predicted = "error"
			print("error with extracting a label")
			responses.append(predicted)

	end_time = time.time()
	elapsed_time_min = end_time-start_time

	print(f"Prediction finished. It took {elapsed_time_min/60} min for {df.shape[0]} instances - {elapsed_time_min/df.shape[0]} s per instance.")

	# Create a json with results

	current_results = {
		"system": gpt_model,
		"predictions": [
			{
			"train": "NA (zero-shot)",
			"test": "{}".format(df_test_name),
			"predictions": responses,
			}
		]
		}

	# The only thing that needs to be changed in the code from OpenAI
	gpt_model_name = gpt_model.split("/")[1]

	# Save the results as a new json
	with open("submissions/submission-{}-{}.json".format(gpt_model_name, df_test_name), "w") as file:
		json.dump(current_results, file)

	print("Classification with {} on {} finished.".format(gpt_model_name, df_test_name))

In [4]:
models = ["google/gemini-2.5-flash", "mistralai/mistral-medium-3.1"]

In [5]:
for test in ["en-ginco", "x-ginco"]:
	for model in models:
		print(model)
		predict_gpt(test, model)

google/gemini-2.5-flash
Prediction finished. It took 2.7622382561365764 min for 272 instances - 0.6093172623830683 s per instance.
Classification with gemini-2.5-flash on en-ginco finished.
mistralai/mistral-medium-3.1
Prediction finished. It took 3.6654406627019247 min for 272 instances - 0.8085530873607186 s per instance.
Classification with mistral-medium-3.1 on en-ginco finished.
google/gemini-2.5-flash
Prediction finished. It took 6.8974383989969885 min for 790 instances - 0.5238560809364802 s per instance.
Classification with gemini-2.5-flash on x-ginco finished.
mistralai/mistral-medium-3.1
Prediction finished. It took 12.43912763595581 min for 790 instances - 0.9447438710852515 s per instance.
Classification with mistral-medium-3.1 on x-ginco finished.
