In [35]:
def soft_max(arr):
    return arr.exp() / arr.exp().sum()

# 1. Loading the model

In [None]:
from transformers import AutoTokenizer, AutoModelForCausalLM

# GlorIA-1.3B
model = AutoModelForCausalLM.from_pretrained(
    'NOVA-vision-language/GlorIA-1.3B',
    use_safetensors=True
).to('cuda')
tokenizer = AutoTokenizer.from_pretrained('NOVA-vision-language/GlorIA-1.3B')
input_tokens = tokenizer.encode("O meu nome é", return_tensors="pt").to('cuda')
output = model.generate(
    input_tokens,
    max_length=100,
    do_sample=False,
    temperature=None,
    pad_token_id=tokenizer.eos_token_id,
    output_scores=True,
    return_dict_in_generate=True,
    return_legacy_cache=True
)

From v4.47 onwards, when a model cache is to be returned, `generate` will return a `Cache` instance instead by default (as opposed to the legacy tuple of tuples format). If you want to keep returning the legacy format, please set `return_legacy_cache=True`.


In [None]:
output = model.generate(
    input_tokens,
    max_length=input_tokens.shape[1]+50,
    do_sample=False,
    temperature=None,
    pad_token_id=tokenizer.eos_token_id,
    output_scores=True,
    return_dict_in_generate=True,
    return_legacy_cache=False
)
scores = output[1]
scores = list(map(soft_max, scores[0]))

In [104]:
import numpy as np
pred_ids = [score.argmax().item() for score in scores]


In [105]:
print(pred_ids)

[3313, 11, 3606, 436, 565, 258, 3515, 9854, 261, 17400, 261, 11798, 13, 198, 198, 198, 198, 46, 2651, 3131, 261, 1008, 261, 3565, 364, 302, 2411, 261, 6085, 11, 287, 1290, 4436, 327, 2193, 754, 2618, 261, 4680, 11, 348, 20154, 3714, 298, 1257, 13, 198, 198, 198, 198]


In [103]:
output[0]

tensor([[   46,  2651,  1007,   364,  3313,    11,  3606,   436,   565,   258,
          3515,  9854,   261, 17400,   261, 11798,    13,   198,   198,   198,
           198,    46,  2651,  3131,   261,  1008,   261,  3565,   364,   302,
          2411,   261,  6085,    11,   287,  1290,  4436,   327,  2193,   754,
          2618,   261,  4680,    11,   348, 20154,  3714,   298,  1257,    13,
           198,   198,   198,   198]], device='cuda:0')

In [75]:
np.argmax(arr)

3313

In [76]:
output[0]

tensor([[   46,  2651,  1007,   364,  3313,    11,  3606,   436,   565,   258,
          3515,  9854,   261, 17400,   261, 11798,    13,   198,   198,   198,
           198,    46,  2651,  3131,   261,  1008,   261,  3565,   364,   302,
          2411,   261,  6085,    11,   287,  1290,  4436,   327,  2193,   754,
          2618,   261,  4680,    11,   348, 20154,  3714,   298,  1257,    13,
           198,   198,   198,   198]], device='cuda:0')

In [None]:
output[1]

(tensor([[109.9425, 109.4950, 105.2128,  ..., 106.0118, 101.8323,  31.0431]],
        device='cuda:0'),)

# Loading CALAME-PT

In [None]:
# CALAME-PT
import pandas as pd

df_handwritten = pd.read_json("hf://datasets/NOVA-vision-language/calame-pt/calamept_handwritten_only.jsonl", lines=True)
df_handwritten['Source'] = 'Handwritten'

df_generated = pd.read_json("hf://datasets/NOVA-vision-language/calame-pt/calamept_gen_only.jsonl", lines=True)
df_generated['Source'] = 'Generated'

df = pd.concat([df_handwritten, df_generated])

In [None]:
df