In [1]:
import pandas as pd
import json
import numpy as np
from time import sleep
import time
from tqdm import tqdm
import requests
from pydantic import BaseModel
url = open("local_models_path.txt", "r").read()

In [2]:
# Load the test datasets from the GitHub repositories (access to them is obtained by request to the AGILE repository owner)

en_ginco = pd.read_json("../../datasets/EN-GINCO-test-dataset/EN-GINCO.jsonl", lines=True)
x_ginco = pd.read_json("../../datasets/X-GINCO-test-set/X-GINCO.jsonl", lines=True)

print(en_ginco.shape, x_ginco.shape)

en_ginco.head(2)

(272, 4) (790, 6)


Unnamed: 0,text,labels,dataset,language
0,Welcome to KBismarck.org! This is a community ...,Information/Explanation,EN-GINCO,English
1,Why graft thrives in postconflict zones <p> A ...,News,EN-GINCO,English


In [3]:
def run_local_model(model, prompt, url=url):

	class ReponseStructure(BaseModel):
		genre: int

	data = {
	    "model": model,
	    "prompt": prompt,
	    "stream": False,
	    "temperature": 0,
	    "format": ReponseStructure.model_json_schema()
	}

	headers = {"Content-Type": "application/json",}
	response = requests.post(url, json=data, headers=headers)

	return response.json()["response"]

In [4]:
models = ["gemma3:27b", "gemma2:27b", "deepseek-r1:14b", "llama3.3:latest"]

In [7]:
def predict_gpt(df_test_name, gpt_model):

	dfs = {
		"en-ginco": en_ginco,
		"x-ginco": x_ginco
	}

	df = dfs[df_test_name]

	responses = []
	
	texts = df["text"].to_list()

	start_time = time.time()

	labels_dict = {
		0: "Other",
		1: "Information/Explanation",
		2: "News",
		3: "Instruction",
		4: "Opinion/Argumentation",
		5: "Forum",
		6: "Prose/Lyrical",
		7: "Legal",
		8: "Promotion"
	}

	for text in texts:
		current_prompt = f"""
				### Task
				Your task is to classify the following text according to genre. Genres are text types, defined by the function of the text, author’s purpose and form of the text. Always provide a label, even if you are not sure.

				### Output format
					Return a valid JSON dictionary with the following key: 'genre' and a value should be an integer which represents one of the labels according to the following dictionary: {labels_dict}.

					
					Text: '{text}'
			"""

		initial_response= run_local_model(gpt_model, current_prompt, url=url)

		response = initial_response.replace("\n", "")
		response = response.replace("\t", "")

		# Convert the string into a dictionary
		response = json.loads(response)

		# Get out a label
		try:
			predicted = labels_dict[response["genre"]]
			responses.append(predicted)
		# add a possibility of something going wrong
		except:
			predicted = initial_response
			print("error with extracting a label:")
			print(initial_response)
			responses.append(predicted)

	end_time = time.time()
	elapsed_time_min = end_time-start_time

	print(f"Prediction finished. It took {elapsed_time_min/60} min for {df.shape[0]} instances - {elapsed_time_min/df.shape[0]} s per instance.")

	# Create a json with results

	if gpt_model == "hf.co/tknez/GaMS-9B-Instruct-GGUF:latest":
		gpt_model = "GaMS-9B-Instruct"

	current_results = {
		"system": gpt_model,
		"predictions": [
			{
			"train": "X-GENRE (train split)",
			"test": "{}".format(df_test_name),
			"predictions": responses,
			}
		]
		}

	# Save the results as a new json
	with open("submissions/submission-{}-{}.json".format(gpt_model, df_test_name), "w") as file:
		json.dump(current_results, file)

	print("Classification with {} on {} finished.".format(gpt_model, df_test_name))

In [None]:
for test in ["en-ginco", "x-ginco"]:
	for model in models:
		print(model)
		predict_gpt(test, model)


In [12]:
models

['gemma3:27b', 'gemma2:27b', 'deepseek-r1:14b', 'llama3.3:latest']

In [5]:
import requests


url = 'http://kt-gpu5.ijs.si:11435/api/generate'
data = {
    "model": "hf.co/tknez/GaMS-9B-Instruct-GGUF:latest",
    "prompt": "Explain what machine learning is.",
    "stream": False
}

headers = {"Content-Type": "application/json"}
response = requests.post(url, json=data, headers=headers)

result = response.json()

print(result['response'])

Machine Learning (ML) is a subset of artificial intelligence that focuses on the use of data and algorithms to enable computers to improve at tasks with experience. The key aspects of machine learning are:

1. Data: Machine learning models rely on large amounts of data to learn patterns, make predictions, or decisions. This data can come from various sources such as text documents, images, audio files, sensor readings, etc.
2. Algorithms: These algorithms are designed to process data and improve at a task through experience. They include supervised learning (where the data is labeled with desired outcomes), unsupervised learning (where the data is unlabeled and patterns are discovered), reinforcement learning (where an agent learns by taking actions in an environment).
3. Experience: Through exposure to data, machine learning models gradually improve their performance on a task over time. For example, they might become better at classifying images of cats as more examples of cat pictur

In [10]:
# Evaluate GaMS as well

for test in ["en-ginco", "x-ginco"]:
	for model in ["hf.co/tknez/GaMS-9B-Instruct-GGUF:latest"]:
		print(model)
		predict_gpt(test, model)

hf.co/tknez/GaMS-9B-Instruct-GGUF:latest
error with extracting a label:
{"genre" : 2018}

error with extracting a label:
{
	"genre": 2567899071273546
}

Prediction finished. It took 1.8320292750994365 min for 272 instances - 0.40412410480134625 s per instance.
Classification with GaMS-9B-Instruct on en-ginco finished.
hf.co/tknez/GaMS-9B-Instruct-GGUF:latest
Prediction finished. It took 5.4743201772371926 min for 790 instances - 0.41577115270155895 s per instance.
Classification with GaMS-9B-Instruct on x-ginco finished.
