In [42]:
import pandas as pd
import numpy as np

user_prompts = pd.read_parquet('../../Dataset/ml/ml-latest-small/tuning/user_prompts_PLOTS.parquet')
prompt=user_prompts['prompt'].values[1]
len(prompt)
import re


# 1) cattura (non greedy) i blocchi Title / Description / Genre
pattern = r"""
    Title:\s*(.*?)\s*          # titolo (lazy)
    Description:\s*(.*?)\s*    # descrizione (lazy)
    Genre:\s*(.*?)\s*          # generi    (lazy)
    (?:---|$)                  # fino al separatore o fine stringa
"""
movies = [
    f"Title: {title.strip()}\n"
    f"Description: {desc.rstrip()}\n"
    f"Genre: {genre.strip()}"
    for title, desc, genre in re.findall(pattern, prompt, flags=re.S | re.X)
]
print(len(movies))

26


In [36]:
movies = movies[:5]  # Prendi solo i primi 10 per testare

In [43]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
import warnings
warnings.filterwarnings('ignore')

# Carica modello e tokenizer
model_name = "mistralai/Mistral-7B-Instruct-v0.2"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    torch_dtype=torch.float16,
    device_map="auto",
    trust_remote_code=True
)

# Input di esempio
input_text = "what is the stronger football player?"
print("=== METODO 1: Generazione normale dal testo ===")
# Tokenizza l'input
inputs = tokenizer(input_text, return_tensors="pt")
input_ids = inputs["input_ids"].to(model.device)
input_length = input_ids.shape[1]

# Genera testo normalmente
with torch.no_grad():
    output_normal = model.generate(
        input_ids,
        max_new_tokens=50,
        temperature=0.1,  # Bassa temperatura per risultati deterministici
        do_sample=False,
        pad_token_id=tokenizer.eos_token_id
    )
# Decodifica solo i nuovi token generati (escludendo l'input)
generated_tokens = output_normal[0][input_length:]
generated_text_normal = tokenizer.decode(generated_tokens, skip_special_tokens=True)
print(f"Input: {input_text}")
print(f"Output: {generated_text_normal}\n")

print("=== METODO 2: Generazione dagli hidden states ===")

# Ottieni gli hidden states dell'input
with torch.no_grad():
    # Forward pass per ottenere gli hidden states
    outputs = model(
        input_ids=input_ids,
        output_hidden_states=True,
        return_dict=True
    )
    
    # Prendi l'ultimo hidden state (output dell'ultimo layer)
    last_hidden_state = outputs.hidden_states[-1]
    
    print(f"Forma hidden states: {last_hidden_state.shape}")
    print(f"Numero di layer: {len(outputs.hidden_states)}")

# Metodo alternativo: usa direttamente inputs_embeds
print("\n=== METODO 2a: Usando inputs_embeds direttamente ===")

# Metodo 1: Media semplice degli embeddings (ogni prompt contribuisce ugualmente)
print("\n--- Metodo 1: Media semplice degli embeddings ---")
input_embeds_list = []
max_length = 0

# Calcola embeddings per ogni movie
for i, movie in enumerate(movies):
    inputs = tokenizer(movie, return_tensors="pt")
    input_ids = inputs["input_ids"].to(model.device)
    
    # Ottieni embeddings
    embeds = model.get_input_embeddings()(input_ids)
    input_embeds_list.append(embeds)
    
    max_length = max(max_length, embeds.shape[1])
    print(f"Movie {i}: '{movie[:30]}...' - Forma embeddings: {embeds.shape}")

# Pad tutti gli embeddings alla stessa lunghezza per poter fare la media
padded_embeds_list = []
mask_list = []
for embeds in input_embeds_list:
    if embeds.shape[1] < max_length:
        # Pad con zeros (o potresti usare il pad_token_id embedding)
        padding = torch.zeros(1, max_length - embeds.shape[1], embeds.shape[2], dtype=embeds.dtype).to(embeds.device)
        padded_embeds = torch.cat([embeds, padding], dim=1)
        # crea una maschera per i token non zero
        mask = torch.ones(1, embeds.shape[1],embeds.shape[2], dtype=torch.bool).to(embeds.device)
        mask = torch.cat([mask, torch.zeros(1, max_length - embeds.shape[1],embeds.shape[2], dtype=torch.bool).to(embeds.device)], dim=1)
    else:
        padded_embeds = embeds
        mask = torch.ones(1, embeds.shape[1],embeds.shape[2], dtype=torch.bool).to(embeds.device)
    padded_embeds_list.append(padded_embeds)
    mask_list.append(mask)

# Calcola la media
stacked_embeds = torch.stack([e.squeeze(0) for e in padded_embeds_list])  # [num_movies, seq_len, hidden_dim]
input_embeds = stacked_embeds.mean(dim=0, keepdim=True)  # [1, seq_len, hidden_dim]

print(f"\nForma embeddings dopo padding: {stacked_embeds.shape}")
print(f"Forma media embeddings: {input_embeds.shape}")


print (f"Forma input embeddings: {input_embeds.shape}")
print(input_embeds)  # Mostra i primi 5 embeddings
with torch.no_grad():
    # Genera usando inputs_embeds invece di input_ids
    output_from_embeds = model.generate(
        inputs_embeds=input_embeds,
        max_new_tokens=50,
        temperature=0.1,
        do_sample=False,
        pad_token_id=tokenizer.eos_token_id
    )

generated_text_embeds = tokenizer.decode(output_from_embeds[0], skip_special_tokens=True)
print(f"Output da embeddings: {generated_text_embeds}")

# Verifica che siano uguali
print("\n=== CONFRONTO RISULTATI ===")
print(f"Testo normale: {generated_text_normal}")
print(f"Testo da embeddings: {generated_text_embeds}")
print(f"I risultati sono identici: {generated_text_normal == generated_text_embeds}")

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

Some parameters are on the meta device because they were offloaded to the cpu.
The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


=== METODO 1: Generazione normale dal testo ===
Input: what is the stronger football player?
Output: 

The term "stronger football player" can be interpreted in different ways, depending on the context. Here are a few possible interpretations and some factors that could influence the answer:

1. Physical strength: In terms of raw muscle

=== METODO 2: Generazione dagli hidden states ===
Forma hidden states: torch.Size([1, 8, 4096])
Numero di layer: 33

=== METODO 2a: Usando inputs_embeds direttamente ===

--- Metodo 1: Media semplice degli embeddings ---
Movie 0: 'Title: Shutter Island
Descript...' - Forma embeddings: torch.Size([1, 1094, 4096])
Movie 1: 'Title: Dark Knight Rises, The
...' - Forma embeddings: torch.Size([1, 121, 4096])
Movie 2: 'Title: Kill Bill: Vol. 1
Descr...' - Forma embeddings: torch.Size([1, 58, 4096])
Movie 3: 'Title: Step Brothers
Descripti...' - Forma embeddings: torch.Size([1, 831, 4096])
Movie 4: 'Title: Wolf of Wall Street, Th...' - Forma embeddings: torch.

The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


tensor([[[-4.3640e-03, -1.0633e-04, -5.6152e-03,  ..., -5.0545e-05,
          -1.1520e-03,  1.5926e-04],
         [-6.4850e-04,  6.1340e-03,  1.0757e-03,  ..., -8.8882e-04,
          -3.4027e-03, -1.6403e-03],
         [-9.8419e-04, -9.7275e-04,  4.5300e-05,  ..., -8.3923e-04,
          -2.1100e-05, -1.4114e-03],
         ...,
         [-9.3579e-06, -6.0439e-05,  3.2544e-05,  ...,  3.5048e-05,
           4.3750e-05,  3.1710e-05],
         [ 4.0174e-05, -3.1412e-05, -5.7518e-05,  ...,  3.9041e-05,
          -1.8060e-05, -5.0783e-05],
         [-7.5698e-05,  4.8101e-05,  3.6538e-05,  ..., -1.1677e-04,
           7.1585e-05, -1.9372e-04]]], device='cuda:0', dtype=torch.float16,
       grad_fn=<MeanBackward1>)
Output da embeddings: {}s{}s{}s{}s{}s{}s{}s{}s{}s{}s{}s{}s{}s{}s{}s{}s{}s{}s{}s{}s{}s{}s{}s{}s{}s

=== CONFRONTO RISULTATI ===
Testo normale: 

The term "stronger football player" can be interpreted in different ways, depending on the context. Here are a few possible interpretations 

In [44]:
# Metodo 2: Media ponderata basata sulla lunghezza
print("\n--- Metodo 2: Media ponderata (peso basato su lunghezza) ---")
weighted_sum = torch.zeros_like(padded_embeds_list[0])
total_weight = 0

for embeds, original_embeds in zip(padded_embeds_list, input_embeds_list):
    weight = original_embeds.shape[1]  # Peso = lunghezza originale
    weighted_sum += embeds * weight
    total_weight += weight

weighted_mean_embeds = weighted_sum / total_weight

with torch.no_grad():
    output_weighted = model.generate(
        inputs_embeds=weighted_mean_embeds,
        max_new_tokens=50,
        temperature=0.1,
        do_sample=True,
        pad_token_id=tokenizer.eos_token_id
    )

generated_text_weighted = tokenizer.decode(output_weighted[0], skip_special_tokens=True)
print(f"Output da media ponderata: {generated_text_weighted}")

# Metodo 3: Media solo dei primi N token di ogni prompt
print("\n--- Metodo 3: Media dei primi N token ---")
n_tokens = 100  # Prendi solo i primi 5 token di ogni prompt

truncated_embeds_list = []
for i, movie in enumerate(movies):
    inputs = tokenizer(movie, return_tensors="pt", max_length=n_tokens, truncation=True)
    input_ids = inputs["input_ids"].to(model.device)
    embeds = model.get_input_embeddings()(input_ids)
    
    # Assicurati che abbiano tutti esattamente n_tokens
    if embeds.shape[1] < n_tokens:
        padding = torch.zeros(1, n_tokens - embeds.shape[1], embeds.shape[2], dtype=embeds.dtype).to(embeds.device)
        embeds = torch.cat([embeds, padding], dim=1)
    
    truncated_embeds_list.append(embeds)

# Stack e media
stacked_truncated = torch.stack([e.squeeze(0) for e in truncated_embeds_list])
mean_truncated = stacked_truncated.mean(dim=0, keepdim=True)

with torch.no_grad():
    output_truncated = model.generate(
        inputs_embeds=mean_truncated,
        max_new_tokens=50,
        temperature=0.7,
        do_sample=True,
        pad_token_id=tokenizer.eos_token_id
    )

generated_text_truncated = tokenizer.decode(output_truncated[0], skip_special_tokens=True)
print(f"Output da media dei primi {n_tokens} token: {generated_text_truncated}")


--- Metodo 2: Media ponderata (peso basato su lunghezza) ---
Output da media ponderata: ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

--- Metodo 3: Media dei primi N token ---
Output da media dei primi 100 token: 

This is an extremely long and nonsensical title, which I have created using a combination of random words and letters from various languages. It is meant to be an example of how meaningless and confusing a title can be when it is constructed


In [45]:
print("\n--- Metodo 4: Media escludendo componenti zero (padding-aware) ---")

# Crea maschere per identificare le posizioni con padding (tutti zeri)
masks_list = []
for embeds, original_embeds in zip(padded_embeds_list, input_embeds_list):
    # Crea una maschera: 1 per posizioni valide, 0 per padding
    mask = torch.ones_like(embeds)
    if embeds.shape[1] > original_embeds.shape[1]:
        # Le posizioni dopo la lunghezza originale sono padding
        mask[:, original_embeds.shape[1]:, :] = 0
    masks_list.append(mask)

# Stack embeddings e maschere
stacked_embeds = torch.stack([e.squeeze(0) for e in padded_embeds_list])  # [num_movies, seq_len, hidden_dim]
stacked_masks = torch.stack([m.squeeze(0) for m in masks_list])  # [num_movies, seq_len, hidden_dim]

# Calcola la somma pesata e il conteggio per ogni componente
weighted_sum_nonzero = (stacked_embeds * stacked_masks).sum(dim=0)  # [seq_len, hidden_dim]
count_nonzero = stacked_masks.sum(dim=0)  # [seq_len, hidden_dim]

# Evita divisione per zero
count_nonzero = torch.clamp(count_nonzero, min=1e-9)

# Media considerando solo componenti non-zero
mean_nonzero = weighted_sum_nonzero / count_nonzero
mean_nonzero_embeds = mean_nonzero.unsqueeze(0)  # [1, seq_len, hidden_dim]

# Per debug: mostra quante componenti sono state mediate per alcune posizioni
print(f"Esempio conteggi non-zero per posizione 0: min={count_nonzero[0].min():.0f}, max={count_nonzero[0].max():.0f}")
print(f"Esempio conteggi non-zero per ultima posizione: min={count_nonzero[-1].min():.0f}, max={count_nonzero[-1].max():.0f}")

with torch.no_grad():
    output_nonzero = model.generate(
        inputs_embeds=mean_nonzero_embeds,
        max_new_tokens=50,
        temperature=0.1,
        do_sample=True,
        pad_token_id=tokenizer.eos_token_id
    )

generated_text_nonzero = tokenizer.decode(output_nonzero[0], skip_special_tokens=True)
print(f"Output da media non-zero: {generated_text_nonzero}")


--- Metodo 4: Media escludendo componenti zero (padding-aware) ---
Esempio conteggi non-zero per posizione 0: min=26, max=26
Esempio conteggi non-zero per ultima posizione: min=1, max=1
Output da media non-zero: 
Title: Inception
Description: Dom Cobb is a skilled thief, who enters the subconscious of his targets while they dream. His ability to extract information from people's subconscious thoughts is known as "extraction." However,


In [47]:
for movie in movies:
    print(f"Movie: {movie}")  # Mostra solo i primi 30 caratteri per brevità

Movie: Title: Shutter Island
Description: In 1954, U.S. Marshals Edward "Teddy" Daniels and his new partner Chuck Aule travel to the Ashecliffe Hospital for the criminally insane on Shutter Island in Boston Harbor. They are investigating the disappearance of patient Rachel Solando, who was incarcerated for drowning her three children. Their only clue is a cryptic note found hidden in Solando's room: "The law of 4; who is 67?" They arrive just before a storm hits, preventing their return to the mainland for a few days. Daniels and Aule find the staff confrontational. Dr. John Cawley, the lead psychiatrist, refuses to turn over records, and they learn that Solando's doctor Lester Sheehan left the island on vacation immediately after Solando disappeared. They are given access to the hospital, but they are told that Ward C is off limits and that the lighthouse has already been searched. While being interviewed, one patient secretly writes the word "RUN" in Daniels' notepad. Daniels starts 

In [15]:
# Carica modello e tokenizer
model_name = "mistralai/Mistral-7B-Instruct-v0.2"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    torch_dtype=torch.float16,
    device_map="auto",
    trust_remote_code=True
)

# Input di esempio
input_text = movies[1]
print("=== METODO 1: Generazione normale dal testo ===")
# Tokenizza l'input
inputs = tokenizer(input_text, return_tensors="pt")
input_ids = inputs["input_ids"].to(model.device)
input_length = input_ids.shape[1]

# Genera testo normalmente
with torch.no_grad():
    output_normal = model.generate(
        input_ids,
        max_new_tokens=50,
        temperature=0.1,  # Bassa temperatura per risultati deterministici
        do_sample=False,
        pad_token_id=tokenizer.eos_token_id
    )
# Decodifica solo i nuovi token generati (escludendo l'input)
generated_tokens = output_normal[0][input_length:]
generated_text_normal = tokenizer.decode(generated_tokens, skip_special_tokens=True)
print(f"Input: {input_text}")
print(f"Output: {generated_text_normal}\n")

print("=== METODO 2: Generazione dagli hidden states ===")

# Ottieni gli hidden states dell'input
with torch.no_grad():
    # Forward pass per ottenere gli hidden states
    outputs = model(
        input_ids=input_ids,
        output_hidden_states=True,
        return_dict=True
    )
    
    # Prendi l'ultimo hidden state (output dell'ultimo layer)
    last_hidden_state = outputs.hidden_states[-1]
    
    print(f"Forma hidden states: {last_hidden_state.shape}")
    print(f"Numero di layer: {len(outputs.hidden_states)}")

# Metodo alternativo: usa direttamente inputs_embeds
print("\n=== METODO 2a: Usando inputs_embeds direttamente ===")

# Ottieni gli embeddings dell'input
input_embeds = model.get_input_embeddings()(input_ids)
print (f"Forma input embeddings: {input_embeds.shape}")
print(input_embeds)  # Mostra i primi 5 embeddings
with torch.no_grad():
    # Genera usando inputs_embeds invece di input_ids
    output_from_embeds = model.generate(
        inputs_embeds=input_embeds,
        max_new_tokens=50,
        temperature=0.1,
        do_sample=False,
        pad_token_id=tokenizer.eos_token_id
    )

generated_text_embeds = tokenizer.decode(output_from_embeds[0], skip_special_tokens=True)
print(f"Output da embeddings: {generated_text_embeds}")

# Verifica che siano uguali
print("\n=== CONFRONTO RISULTATI ===")
print(f"Testo normale: {generated_text_normal}")
print(f"Testo da embeddings: {generated_text_embeds}")
print(f"I risultati sono identici: {generated_text_normal == generated_text_embeds}")

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


=== METODO 1: Generazione normale dal testo ===
Input: Title: Dark Knight Rises, The
Description: Following the death of District Attorney Harvey Dent, Batman assumes responsibility for Dent's crimes to protect the late attorney's reputation and is subsequently hunted by the Gotham City Police Department. Eight years later, Batman encounters the mysterious Selina Kyle and the villainous Bane, a new terrorist leader who overwhelms Gotham's finest. The Dark Knight resurfaces to protect a city that has branded him an enemy.
Genre: Action|Adventure|Crime|IMAX
Output: |Thriller
Release Date: 2012-07-20
Runtime: 165 minutes
Director: Christopher Nolan
Writer: Jonathan Nolan, Christopher Nolan, David S. Goy

=== METODO 2: Generazione dagli hidden states ===


The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


Forma hidden states: torch.Size([1, 121, 4096])
Numero di layer: 33

=== METODO 2a: Usando inputs_embeds direttamente ===
Forma input embeddings: torch.Size([1, 121, 4096])
tensor([[[-4.3640e-03, -1.0633e-04, -5.6152e-03,  ..., -5.0545e-05,
          -1.1520e-03,  1.5926e-04],
         [-6.4850e-04,  6.1340e-03,  1.0757e-03,  ..., -8.8882e-04,
          -3.4027e-03, -1.6403e-03],
         [-9.8419e-04, -9.7275e-04,  4.5300e-05,  ..., -8.3923e-04,
          -2.1100e-05, -1.4114e-03],
         ...,
         [-2.4319e-04, -1.5717e-03,  8.4686e-04,  ...,  9.1171e-04,
           1.1368e-03,  8.2397e-04],
         [ 1.0452e-03, -8.1635e-04, -1.4954e-03,  ...,  1.0147e-03,
          -4.6921e-04, -1.3199e-03],
         [-1.9684e-03,  1.2512e-03,  9.4986e-04,  ..., -3.0365e-03,
           1.8616e-03, -5.0354e-03]]], device='cuda:0', dtype=torch.float16,
       grad_fn=<EmbeddingBackward0>)


The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


Output da embeddings: |Thriller
Release Date: 2012-07-20
Runtime: 165 minutes
Director: Christopher Nolan
Writer: Jonathan Nolan, Christopher Nolan, David S. Goy

=== CONFRONTO RISULTATI ===
Testo normale: |Thriller
Release Date: 2012-07-20
Runtime: 165 minutes
Director: Christopher Nolan
Writer: Jonathan Nolan, Christopher Nolan, David S. Goy
Testo da embeddings: |Thriller
Release Date: 2012-07-20
Runtime: 165 minutes
Director: Christopher Nolan
Writer: Jonathan Nolan, Christopher Nolan, David S. Goy
I risultati sono identici: True

=== BONUS: Uso di hidden states da layer intermedio ===

Forma hidden state layer 16: torch.Size([1, 121, 4096])
Gli hidden states intermedi richiederebbero un wrapper personalizzato per la generazione.

=== SALVATAGGIO HIDDEN STATES ===
Hidden states pronti per il salvataggio (decommentare la riga sopra per salvare)

=== RIUTILIZZO HIDDEN STATES ===
Output da embeddings ricaricati: |Thriller
Release Date: 2012-07-20
Runtime: 165 minutes
Director: Christop

In [16]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
import warnings
warnings.filterwarnings('ignore')

# Carica modello e tokenizer
model_name = "mistralai/Mistral-7B-Instruct-v0.2"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    torch_dtype=torch.float16,
    device_map="auto",
    trust_remote_code=True
)

# Input di esempio
input_text = "what is the stronger football player?"
print("=== METODO 1: Generazione normale dal testo ===")
# Tokenizza l'input
inputs = tokenizer(input_text, return_tensors="pt")
input_ids = inputs["input_ids"].to(model.device)
input_length = input_ids.shape[1]

# Genera testo normalmente
with torch.no_grad():
    output_normal = model.generate(
        input_ids,
        max_new_tokens=50,
        temperature=0.1,  # Bassa temperatura per risultati deterministici
        do_sample=False,
        pad_token_id=tokenizer.eos_token_id
    )
# Decodifica solo i nuovi token generati (escludendo l'input)
generated_tokens = output_normal[0][input_length:]
generated_text_normal = tokenizer.decode(generated_tokens, skip_special_tokens=True)
print(f"Input: {input_text}")
print(f"Output: {generated_text_normal}\n")

print("=== METODO 2: Generazione dagli hidden states ===")

# Ottieni gli hidden states dell'input
with torch.no_grad():
    # Forward pass per ottenere gli hidden states
    outputs = model(
        input_ids=input_ids,
        output_hidden_states=True,
        return_dict=True
    )
    
    # Prendi l'ultimo hidden state (output dell'ultimo layer)
    last_hidden_state = outputs.hidden_states[-1]
    
    print(f"Forma hidden states: {last_hidden_state.shape}")
    print(f"Numero di layer: {len(outputs.hidden_states)}")

# Metodo alternativo: usa direttamente inputs_embeds
print("\n=== METODO 2a: Usando inputs_embeds direttamente ===")

# Metodo 1: Media semplice degli embeddings (ogni prompt contribuisce ugualmente)
print("\n--- Metodo 1: Media semplice degli embeddings ---")
input_embeds_list = []
max_length = 0

# Calcola embeddings per ogni movie
eletto = movies[1]  # Scegli un movie specifico per il test

for i, movie in enumerate(movies):
    inputs = tokenizer(eletto, return_tensors="pt")
    input_ids = inputs["input_ids"].to(model.device)
    
    # Ottieni embeddings
    embeds = model.get_input_embeddings()(input_ids)
    input_embeds_list.append(embeds)
    
    max_length = max(max_length, embeds.shape[1])
    print(f"Movie {i}: '{eletto[:30]}...' - Forma embeddings: {embeds.shape}")

# Pad tutti gli embeddings alla stessa lunghezza per poter fare la media
padded_embeds_list = []
for embeds in input_embeds_list:
    if embeds.shape[1] < max_length:
        # Pad con zeros (o potresti usare il pad_token_id embedding)
        padding = torch.zeros(1, max_length - embeds.shape[1], embeds.shape[2], dtype=embeds.dtype).to(embeds.device)
        padded_embeds = torch.cat([embeds, padding], dim=1)
    else:
        padded_embeds = embeds
    padded_embeds_list.append(padded_embeds)

# Calcola la media
stacked_embeds = torch.stack([e.squeeze(0) for e in padded_embeds_list])  # [num_movies, seq_len, hidden_dim]
input_embeds = stacked_embeds.mean(dim=0, keepdim=True)  # [1, seq_len, hidden_dim]

print(f"\nForma embeddings dopo padding: {stacked_embeds.shape}")
print(f"Forma media embeddings: {input_embeds.shape}")


print (f"Forma input embeddings: {input_embeds.shape}")
print(input_embeds)  # Mostra i primi 5 embeddings
with torch.no_grad():
    # Genera usando inputs_embeds invece di input_ids
    output_from_embeds = model.generate(
        inputs_embeds=input_embeds,
        max_new_tokens=50,
        temperature=0.1,
        do_sample=False,
        pad_token_id=tokenizer.eos_token_id
    )

generated_text_embeds = tokenizer.decode(output_from_embeds[0], skip_special_tokens=True)
print(f"Output da embeddings: {generated_text_embeds}")

# Verifica che siano uguali
print("\n=== CONFRONTO RISULTATI ===")
print(f"Testo normale: {generated_text_normal}")
print(f"Testo da embeddings: {generated_text_embeds}")
print(f"I risultati sono identici: {generated_text_normal == generated_text_embeds}")

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


=== METODO 1: Generazione normale dal testo ===
Input: what is the stronger football player?
Output: 

The term "stronger football player" can be interpreted in different ways, depending on the context. Here are a few possible interpretations and some factors that could influence the answer:

1. Physical strength: In terms of raw muscle

=== METODO 2: Generazione dagli hidden states ===


The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


Forma hidden states: torch.Size([1, 8, 4096])
Numero di layer: 33

=== METODO 2a: Usando inputs_embeds direttamente ===

--- Metodo 1: Media semplice degli embeddings ---
Movie 0: 'Title: Dark Knight Rises, The
...' - Forma embeddings: torch.Size([1, 121, 4096])
Movie 1: 'Title: Dark Knight Rises, The
...' - Forma embeddings: torch.Size([1, 121, 4096])
Movie 2: 'Title: Dark Knight Rises, The
...' - Forma embeddings: torch.Size([1, 121, 4096])
Movie 3: 'Title: Dark Knight Rises, The
...' - Forma embeddings: torch.Size([1, 121, 4096])
Movie 4: 'Title: Dark Knight Rises, The
...' - Forma embeddings: torch.Size([1, 121, 4096])
Movie 5: 'Title: Dark Knight Rises, The
...' - Forma embeddings: torch.Size([1, 121, 4096])
Movie 6: 'Title: Dark Knight Rises, The
...' - Forma embeddings: torch.Size([1, 121, 4096])
Movie 7: 'Title: Dark Knight Rises, The
...' - Forma embeddings: torch.Size([1, 121, 4096])
Movie 8: 'Title: Dark Knight Rises, The
...' - Forma embeddings: torch.Size([1, 121, 4096])
M