In [None]:
%pip install openai

In [1]:
import pandas as pd
import json
import numpy as np
from time import sleep
import time
from openai import OpenAI
from tqdm import tqdm
client = OpenAI(api_key=open('API_key').read())

In [2]:
# Load the test datasets from the GitHub repositories (access to them is obtained by request to the AGILE repository owner)

en_ginco = pd.read_json("../../datasets/EN-GINCO-test-dataset/EN-GINCO.jsonl", lines=True)
x_ginco = pd.read_json("../../datasets/X-GINCO-test-set/X-GINCO.jsonl", lines=True)

print(en_ginco.shape, x_ginco.shape)

(272, 4) (790, 6)


In [3]:
en_ginco.head(2)

Unnamed: 0,text,labels,dataset,language
0,Welcome to KBismarck.org! This is a community ...,Information/Explanation,EN-GINCO,English
1,Why graft thrives in postconflict zones <p> A ...,News,EN-GINCO,English


In [4]:
def predict_gpt(df_test_name, gpt_model):

	dfs = {
		"en-ginco": en_ginco,
		"x-ginco": x_ginco
	}

	df = dfs[df_test_name]

	responses = []
	
	texts = df["text"].to_list()

	labels_dict = {
		0: "Other",
		1: "Information/Explanation",
		2: "News",
		3: "Instruction",
		4: "Opinion/Argumentation",
		5: "Forum",
		6: "Prose/Lyrical",
		7: "Legal",
		8: "Promotion"
	}

	label_dict_with_description_ext = {
			"Information/Explanation - An objective text that describes or presents an event, a person, a thing, a concept etc. Its main purpose is to inform the reader about something. Common features: objective/factual, explanation/definition of a concept (x is …), enumeration. E.g., research article, encyclopedia article, informational blog, product specification, course materials, general information, job description, manual, horoscope, travel guide, glossaries, historical article, biographical story/history.": 1,
			"News - An objective or subjective text which reports on an event recent at the time of writing or coming in the near future. Common features: adverbs/adverbial clauses of time and/or place (dates, places), many proper nouns, direct or reported speech, past tense. E.g., news report, sports report, travel blog, reportage, police report, announcement.": 2,
			"Instruction - An objective text which instructs the readers on how to do something. Common features: multiple steps/actions, chronological order, 1st person plural or 2nd person, modality (must, have to, need to, can, etc.), adverbial clauses of manner (in a way that), of condition (if), of time (after …). E.g., how-to texts, recipes, technical support.": 3,
			"Opinion/Argumentation - A subjective text in which the authors convey their opinion or narrate their experience. It includes promotion of an ideology and other non-commercial causes. This genre includes a subjective narration of a personal experience as well. Common features: adjectives/adverbs that convey opinion, words that convey (un)certainty (certainly, surely), 1st person, exclamation marks. E.g., review, blog (personal blog, travel blog), editorial, advice, letter to editor, persuasive article or essay, formal speech, pamphlet, political propaganda, columns, political manifesto.": 4,
			"Forum - A text in which people discuss a certain topic in form of comments. Common features: multiple authors, informal language, subjective (the writers express their opinions), written in 1st person. E.g., discussion forum, reader/viewer responses, QA forum.": 5,
			"Prose/Lyrical - A literary text that consists of paragraphs or verses. A literary text is deemed to have no other practical purpose than to give pleasure to the reader. Often the author pays attention to the aesthetic appearance of the text. It can be considered as art. E.g., lyrics, poem, prayer, joke, novel, short story. ": 6,
			"Legal - An objective formal text that contains legal terms and is clearly structured. The name of the text type is often included in the headline (contract, rules, amendment, general terms and conditions, etc.). Common features: objective/factual, legal terms, 3rd person. E.g., small print, software license, proclamation, terms and conditions, contracts, law, copyright notices, university regulation.": 7,
			"Promotion - A subjective text intended to sell or promote an event, product, or service. It addresses the readers, often trying to convince them to participate in something or buy something. Common features: contains adjectives/adverbs that promote something (high-quality, perfect, amazing), comparative and superlative forms of adjectives and adverbs (the best, the greatest, the cheapest), addressing the reader (usage of 2nd person), exclamation marks. E.g., advertisement, promotion of a product (e-shops), promotion of an accommodation, promotion of company's services, invitation to an event.": 8,
			"Other - A text that which does not fall under any of other genre categories.": 0,

	}

	start_time = time.time()

	for text in texts:
		# the "v5" models do not have the "temperature" parameter
		if "gpt-5" not in gpt_model:
			completion = client.chat.completions.create(model=gpt_model,
			response_format= {"type": "json_object"},
			messages= [
			{
				"role": "user",
				"content": f"""
				### Task
				Your task is to classify the following text according to genre. Genres are text types, defined by the function of the text, author’s purpose and form of the text. Always provide a label, even if you are not sure.

				### Output format
					Return a valid JSON dictionary with the following key: 'genre' and a value should be an integer which represents one of the labels according to the following dictionary: {label_dict_with_description_ext}.

					
					Text: '{text}'
			"""
				}
			],
			temperature = 0)
		# the "v5" models do not have the "temperature" parameter
		elif "gpt-5" in gpt_model:
			completion = client.chat.completions.create(model=gpt_model,
			response_format= {"type": "json_object"},
			messages= [
			{
				"role": "user",
				"content": f"""
				### Task
				Your task is to classify the following text according to genre. Genres are text types, defined by the function of the text, author’s purpose and form of the text. Always provide a label, even if you are not sure.

				### Output format
					Return a valid JSON dictionary with the following key: 'genre' and a value should be an integer which represents one of the labels according to the following dictionary: {label_dict_with_description_ext}.

					
					Text: '{text}'
			"""
				}
			],
		)
		else:
			print("The model is not supported, check the code.")

		response=completion.choices[0].message.content

		response = response.replace("\n", "")
		response = response.replace("\t", "")

		# Get out a label
		try:
			# Convert the string into a dictionary
			response = json.loads(response)
			predicted = labels_dict[response["genre"]]
			responses.append(predicted)
		# add a possibility of something going wrong
		except:
			print("error with extracting a label")
			responses.append(response)

	end_time = time.time()
	elapsed_time_min = end_time-start_time

	print(f"Prediction finished. It took {elapsed_time_min/60} min for {df.shape[0]} instances - {elapsed_time_min/df.shape[0]} s per instance.")

	# Create a json with results

	current_results = {
		"system": gpt_model,
		"predictions": [
			{
			"train": "NA (zero-shot)",
			"test": "{}".format(df_test_name),
			"predictions": responses,
			}
		]
		}

	# Save the results as a new json
	with open("submissions/submission-{}-{}.json".format(gpt_model, df_test_name), "w") as file:
		json.dump(current_results, file)

	print("Classification with {} on {} finished.".format(gpt_model, df_test_name))

In [5]:
models = ["gpt-4o-2024-08-06", "gpt-3.5-turbo-0125", "gpt-4o-mini-2024-07-18", "gpt-5-nano-2025-08-07", "gpt-5-mini-2025-08-07", "gpt-5-2025-08-07"]

In [7]:
# Continue with prediction
for test in ["en-ginco"]:
	for model in ["gpt-4o-mini-2024-07-18", "gpt-5-nano-2025-08-07", "gpt-5-mini-2025-08-07", "gpt-5-2025-08-07"]:
		print(model)
		predict_gpt(test, model)

gpt-4o-mini-2024-07-18
Prediction finished. It took 3.453443451722463 min for 272 instances - 0.7617889967034844 s per instance.
Classification with gpt-4o-mini-2024-07-18 on en-ginco finished.
gpt-5-nano-2025-08-07
Prediction finished. It took 17.573689687252045 min for 272 instances - 3.876549195717363 s per instance.
Classification with gpt-5-nano-2025-08-07 on en-ginco finished.
gpt-5-mini-2025-08-07
Prediction finished. It took 15.44372048775355 min for 272 instances - 3.4067030487691654 s per instance.
Classification with gpt-5-mini-2025-08-07 on en-ginco finished.
gpt-5-2025-08-07
Prediction finished. It took 31.519857140382133 min for 272 instances - 6.952909663319588 s per instance.
Classification with gpt-5-2025-08-07 on en-ginco finished.


In [8]:
for test in ["x-ginco"]:
	for model in models:
		print(model)
		predict_gpt(test, model)

gpt-4o-2024-08-06
Prediction finished. It took 11.030546375115712 min for 790 instances - 0.8377630158315731 s per instance.
Classification with gpt-4o-2024-08-06 on x-ginco finished.
gpt-3.5-turbo-0125
Prediction finished. It took 8.338885962963104 min for 790 instances - 0.6333331111111219 s per instance.
Classification with gpt-3.5-turbo-0125 on x-ginco finished.
gpt-4o-mini-2024-07-18
Prediction finished. It took 10.449515946706136 min for 790 instances - 0.7936341225346433 s per instance.
Classification with gpt-4o-mini-2024-07-18 on x-ginco finished.
gpt-5-nano-2025-08-07
Prediction finished. It took 58.62004897991816 min for 790 instances - 4.452155618727962 s per instance.
Classification with gpt-5-nano-2025-08-07 on x-ginco finished.
gpt-5-mini-2025-08-07
Prediction finished. It took 52.45770330031713 min for 790 instances - 3.984129364581048 s per instance.
Classification with gpt-5-mini-2025-08-07 on x-ginco finished.
gpt-5-2025-08-07
Prediction finished. It took 139.1693073