In [1]:
import pandas as pd
import numpy as np

user_prompts = pd.read_parquet('../../Dataset/ml/ml-latest-small/tuning/user_prompts_PLOTS.parquet')
prompt=user_prompts['prompt'].values[10]
len(prompt)
import re


# 1) cattura (non greedy) i blocchi Title / Description / Genre
pattern = r"""
    Title:\s*(.*?)\s*          # titolo (lazy)
    Description:\s*(.*?)\s*    # descrizione (lazy)
    Genre:\s*(.*?)\s*          # generi    (lazy)
    (?:---|$)                  # fino al separatore o fine stringa
"""
movies = [
    f"Title: {title.strip()}\n"
    f"Description: {desc.rstrip()}\n"
    f"Genre: {genre.strip()}"
    for title, desc, genre in re.findall(pattern, prompt, flags=re.S | re.X)
]
print(len(movies))

54


In [2]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
import warnings
warnings.filterwarnings('ignore')

# Carica modello e tokenizer
model_name = "mistralai/Mistral-7B-Instruct-v0.2"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    torch_dtype=torch.float16,
    device_map="auto",
    trust_remote_code=True
)
print("\n=== METODO 2a: Usando inputs_embeds direttamente ===")

# Metodo 1: Media semplice degli embeddings (ogni prompt contribuisce ugualmente)
print("\n--- Metodo 1: Media semplice degli embeddings ---")
input_embeds_list = []
max_length = 0

# Calcola embeddings per ogni movie
for i, movie in enumerate(movies):
    inputs = tokenizer(movie, return_tensors="pt")
    input_ids = inputs["input_ids"].to(model.device)
    
    # Ottieni embeddings
    embeds = model.get_input_embeddings()(input_ids)
    input_embeds_list.append(embeds)
    
    max_length = max(max_length, embeds.shape[1])

# Pad tutti gli embeddings alla stessa lunghezza per poter fare la media
padded_embeds_list = []
mask_list = []
for embeds in input_embeds_list:
    if embeds.shape[1] < max_length:
        # Pad con zeros (o potresti usare il pad_token_id embedding)
        padding = torch.zeros(1, max_length - embeds.shape[1], embeds.shape[2], dtype=embeds.dtype).to(embeds.device)
        padded_embeds = torch.cat([embeds, padding], dim=1)
        # crea una maschera per i token non zero
        mask = torch.ones(1, embeds.shape[1],embeds.shape[2], dtype=torch.bool).to(embeds.device)
        mask = torch.cat([mask, torch.zeros(1, max_length - embeds.shape[1],embeds.shape[2], dtype=torch.bool).to(embeds.device)], dim=1)
    else:
        padded_embeds = embeds
        mask = torch.ones(1, embeds.shape[1],embeds.shape[2], dtype=torch.bool).to(embeds.device)
    padded_embeds_list.append(padded_embeds)
    mask_list.append(mask)

# Calcola la media
stacked_embeds = torch.stack([e.squeeze(0) for e in padded_embeds_list])  # [num_movies, seq_len, hidden_dim]
input_embeds = stacked_embeds.mean(dim=0, keepdim=True)  # [1, seq_len, hidden_dim]

print(input_embeds)  # Mostra i primi 5 embeddings
with torch.no_grad():
    # Genera usando inputs_embeds invece di input_ids
    output_from_embeds = model.generate(
        inputs_embeds=input_embeds,
        max_new_tokens=10,
        temperature=0.1,
        do_sample=False,
        pad_token_id=tokenizer.eos_token_id
    )

generated_text_embeds = tokenizer.decode(output_from_embeds[0], skip_special_tokens=True)
print(f"Output da embeddings: {generated_text_embeds}")

# Metodo 2: Media ponderata basata sulla lunghezza
print("\n--- Metodo 2: Media ponderata (peso basato su lunghezza) ---")
weighted_sum = torch.zeros_like(padded_embeds_list[0])
total_weight = 0

for embeds, original_embeds in zip(padded_embeds_list, input_embeds_list):
    weight = original_embeds.shape[1]  # Peso = lunghezza originale
    weighted_sum += embeds * weight
    total_weight += weight

weighted_mean_embeds = weighted_sum / total_weight

with torch.no_grad():
    output_weighted = model.generate(
        inputs_embeds=weighted_mean_embeds,
        max_new_tokens=10,
        pad_token_id=tokenizer.eos_token_id
    )

generated_text_weighted = tokenizer.decode(output_weighted[0], skip_special_tokens=True)
print(f"Output da media ponderata: {generated_text_weighted}")

# Metodo 3: Media solo dei primi N token di ogni prompt
print("\n--- Metodo 3: Media dei primi N token ---")
n_tokens = 100  # Prendi solo i primi 5 token di ogni prompt

truncated_embeds_list = []
for i, movie in enumerate(movies):
    inputs = tokenizer(movie, return_tensors="pt", max_length=n_tokens, truncation=True)
    input_ids = inputs["input_ids"].to(model.device)
    embeds = model.get_input_embeddings()(input_ids)
    
    # Assicurati che abbiano tutti esattamente n_tokens
    if embeds.shape[1] < n_tokens:
        padding = torch.zeros(1, n_tokens - embeds.shape[1], embeds.shape[2], dtype=embeds.dtype).to(embeds.device)
        embeds = torch.cat([embeds, padding], dim=1)
    
    truncated_embeds_list.append(embeds)

# Stack e media
stacked_truncated = torch.stack([e.squeeze(0) for e in truncated_embeds_list])
mean_truncated = stacked_truncated.mean(dim=0, keepdim=True)

with torch.no_grad():
    output_truncated = model.generate(
        inputs_embeds=mean_truncated,
        max_new_tokens=100,
        pad_token_id=tokenizer.eos_token_id
    )

generated_text_truncated = tokenizer.decode(output_truncated[0], skip_special_tokens=True)
print(f"Output da media dei primi {n_tokens} token: {generated_text_truncated}")

print("\n--- Metodo 4: Media escludendo componenti zero (padding-aware) ---")

# Crea maschere per identificare le posizioni con padding (tutti zeri)
masks_list = []
for embeds, original_embeds in zip(padded_embeds_list, input_embeds_list):
    # Crea una maschera: 1 per posizioni valide, 0 per padding
    mask = torch.ones_like(embeds)
    if embeds.shape[1] > original_embeds.shape[1]:
        # Le posizioni dopo la lunghezza originale sono padding
        mask[:, original_embeds.shape[1]:, :] = 0
    masks_list.append(mask)

# Stack embeddings e maschere
stacked_embeds = torch.stack([e.squeeze(0) for e in padded_embeds_list])  # [num_movies, seq_len, hidden_dim]
stacked_masks = torch.stack([m.squeeze(0) for m in masks_list])  # [num_movies, seq_len, hidden_dim]

# Calcola la somma pesata e il conteggio per ogni componente
weighted_sum_nonzero = (stacked_embeds * stacked_masks).sum(dim=0)  # [seq_len, hidden_dim]
count_nonzero = stacked_masks.sum(dim=0)  # [seq_len, hidden_dim]

# Evita divisione per zero
count_nonzero = torch.clamp(count_nonzero, min=1e-9)

# Media considerando solo componenti non-zero
mean_nonzero = weighted_sum_nonzero / count_nonzero
mean_nonzero_embeds = mean_nonzero.unsqueeze(0)  # [1, seq_len, hidden_dim]

# Per debug: mostra quante componenti sono state mediate per alcune posizioni

with torch.no_grad():
    output_nonzero = model.generate(
        inputs_embeds=mean_nonzero_embeds,
        max_new_tokens=300,
        pad_token_id=tokenizer.eos_token_id
    )

generated_text_nonzero = tokenizer.decode(output_nonzero[0], skip_special_tokens=True)
print(f"Output da media non-zero: {generated_text_nonzero}")

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

Some parameters are on the meta device because they were offloaded to the cpu.



=== METODO 2a: Usando inputs_embeds direttamente ===

--- Metodo 1: Media semplice degli embeddings ---


The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
The attention mask is not set and cannot be inferred from input because pad token is same as eos token. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.


tensor([[[-4.3640e-03, -1.0633e-04, -5.6152e-03,  ..., -5.0545e-05,
          -1.1520e-03,  1.5926e-04],
         [-6.4850e-04,  6.1340e-03,  1.0757e-03,  ..., -8.8882e-04,
          -3.4027e-03, -1.6403e-03],
         [-9.8419e-04, -9.7275e-04,  4.5300e-05,  ..., -8.3923e-04,
          -2.1100e-05, -1.4114e-03],
         ...,
         [-2.7955e-05,  7.5758e-05,  3.3319e-05,  ..., -5.4240e-06,
           1.1700e-04,  1.2994e-05],
         [-1.2934e-05, -2.8253e-05,  1.3351e-05,  ...,  9.4175e-06,
          -2.4855e-05,  6.2704e-05],
         [ 5.9366e-05,  4.0412e-05, -3.8981e-05,  ...,  1.1802e-05,
          -1.2636e-05, -3.4332e-05]]], device='cuda:0', dtype=torch.float16,
       grad_fn=<MeanBackward1>)
Output da embeddings: {}����€€€€€

--- Metodo 2: Media ponderata (peso basato su lunghezza) ---
Output da media ponderata: {}���������

--- Metodo 3: Media dei primi N token ---
Output da media dei primi 100 token: rack rack rack rack rack rack rack rack rack rack rack rack rack rack

In [3]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
import warnings
warnings.filterwarnings('ignore')

# Carica modello e tokenizer
model_name = "mistralai/Mistral-7B-Instruct-v0.2"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    torch_dtype=torch.float16,
    device_map="auto",
    trust_remote_code=True
)
print("\n=== METODO 2a: Usando inputs_embeds direttamente ===")

# Metodo 1: Media semplice degli embeddings (ogni prompt contribuisce ugualmente)
print("\n--- Metodo 1: Media semplice degli embeddings ---")
input_embeds_list = []
max_length = 0

# Calcola embeddings per ogni movie
for i, movie in enumerate(movies):
    instuction = "You are a expert on movie. Based on the movie description, generate a short summary of the user taste."
    movie = f"{instuction}\n{movie}"
    inputs = tokenizer(movie, return_tensors="pt")
    input_ids = inputs["input_ids"].to(model.device)
    
    # Ottieni embeddings
    embeds = model.get_input_embeddings()(input_ids)
    input_embeds_list.append(embeds)
    
    max_length = max(max_length, embeds.shape[1])

# Pad tutti gli embeddings alla stessa lunghezza per poter fare la media
padded_embeds_list = []
mask_list = []
for embeds in input_embeds_list:
    if embeds.shape[1] < max_length:
        # Pad con zeros (o potresti usare il pad_token_id embedding)
        padding = torch.zeros(1, max_length - embeds.shape[1], embeds.shape[2], dtype=embeds.dtype).to(embeds.device)
        padded_embeds = torch.cat([embeds, padding], dim=1)
        # crea una maschera per i token non zero
        mask = torch.ones(1, embeds.shape[1],embeds.shape[2], dtype=torch.bool).to(embeds.device)
        mask = torch.cat([mask, torch.zeros(1, max_length - embeds.shape[1],embeds.shape[2], dtype=torch.bool).to(embeds.device)], dim=1)
    else:
        padded_embeds = embeds
        mask = torch.ones(1, embeds.shape[1],embeds.shape[2], dtype=torch.bool).to(embeds.device)
    padded_embeds_list.append(padded_embeds)
    mask_list.append(mask)

# Calcola la media
stacked_embeds = torch.stack([e.squeeze(0) for e in padded_embeds_list])  # [num_movies, seq_len, hidden_dim]
input_embeds = stacked_embeds.mean(dim=0, keepdim=True)  # [1, seq_len, hidden_dim]

print(input_embeds)  # Mostra i primi 5 embeddings
with torch.no_grad():
    # Genera usando inputs_embeds invece di input_ids
    output_from_embeds = model.generate(
        inputs_embeds=input_embeds,
        max_new_tokens=10,
        temperature=0.1,
        do_sample=False,
        pad_token_id=tokenizer.eos_token_id
    )

generated_text_embeds = tokenizer.decode(output_from_embeds[0], skip_special_tokens=True)
print(f"Output da embeddings: {generated_text_embeds}")

# Metodo 2: Media ponderata basata sulla lunghezza
print("\n--- Metodo 2: Media ponderata (peso basato su lunghezza) ---")
weighted_sum = torch.zeros_like(padded_embeds_list[0])
total_weight = 0

for embeds, original_embeds in zip(padded_embeds_list, input_embeds_list):
    weight = original_embeds.shape[1]  # Peso = lunghezza originale
    weighted_sum += embeds * weight
    total_weight += weight

weighted_mean_embeds = weighted_sum / total_weight

with torch.no_grad():
    output_weighted = model.generate(
        inputs_embeds=weighted_mean_embeds,
        max_new_tokens=10,
        pad_token_id=tokenizer.eos_token_id
    )

generated_text_weighted = tokenizer.decode(output_weighted[0], skip_special_tokens=True)
print(f"Output da media ponderata: {generated_text_weighted}")

# Metodo 3: Media solo dei primi N token di ogni prompt
print("\n--- Metodo 3: Media dei primi N token ---")
n_tokens = 100  # Prendi solo i primi 5 token di ogni prompt

truncated_embeds_list = []
for i, movie in enumerate(movies):
    instuction = "You are a expert on movie. Based on the movie description, generate a short summary of the user taste."
    movie = f"{instuction}\n{movie}"
    inputs = tokenizer(movie, return_tensors="pt", max_length=n_tokens, truncation=True)
    input_ids = inputs["input_ids"].to(model.device)
    embeds = model.get_input_embeddings()(input_ids)
    
    # Assicurati che abbiano tutti esattamente n_tokens
    if embeds.shape[1] < n_tokens:
        padding = torch.zeros(1, n_tokens - embeds.shape[1], embeds.shape[2], dtype=embeds.dtype).to(embeds.device)
        embeds = torch.cat([embeds, padding], dim=1)
    
    truncated_embeds_list.append(embeds)

# Stack e media
stacked_truncated = torch.stack([e.squeeze(0) for e in truncated_embeds_list])
mean_truncated = stacked_truncated.mean(dim=0, keepdim=True)

with torch.no_grad():
    output_truncated = model.generate(
        inputs_embeds=mean_truncated,
        max_new_tokens=100,
        pad_token_id=tokenizer.eos_token_id
    )

generated_text_truncated = tokenizer.decode(output_truncated[0], skip_special_tokens=True)
print(f"Output da media dei primi {n_tokens} token: {generated_text_truncated}")

print("\n--- Metodo 4: Media escludendo componenti zero (padding-aware) ---")

# Crea maschere per identificare le posizioni con padding (tutti zeri)
masks_list = []
for embeds, original_embeds in zip(padded_embeds_list, input_embeds_list):
    # Crea una maschera: 1 per posizioni valide, 0 per padding
    mask = torch.ones_like(embeds)
    if embeds.shape[1] > original_embeds.shape[1]:
        # Le posizioni dopo la lunghezza originale sono padding
        mask[:, original_embeds.shape[1]:, :] = 0
    masks_list.append(mask)

# Stack embeddings e maschere
stacked_embeds = torch.stack([e.squeeze(0) for e in padded_embeds_list])  # [num_movies, seq_len, hidden_dim]
stacked_masks = torch.stack([m.squeeze(0) for m in masks_list])  # [num_movies, seq_len, hidden_dim]

# Calcola la somma pesata e il conteggio per ogni componente
weighted_sum_nonzero = (stacked_embeds * stacked_masks).sum(dim=0)  # [seq_len, hidden_dim]
count_nonzero = stacked_masks.sum(dim=0)  # [seq_len, hidden_dim]

# Evita divisione per zero
count_nonzero = torch.clamp(count_nonzero, min=1e-9)

# Media considerando solo componenti non-zero
mean_nonzero = weighted_sum_nonzero / count_nonzero
mean_nonzero_embeds = mean_nonzero.unsqueeze(0)  # [1, seq_len, hidden_dim]

# Per debug: mostra quante componenti sono state mediate per alcune posizioni

with torch.no_grad():
    output_nonzero = model.generate(
        inputs_embeds=mean_nonzero_embeds,
        max_new_tokens=300,
        temperature=0.1,
        do_sample=True,
        pad_token_id=tokenizer.eos_token_id
    )

generated_text_nonzero = tokenizer.decode(output_nonzero[0], skip_special_tokens=True)
print(f"Output da media non-zero: {generated_text_nonzero}")

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

Some parameters are on the meta device because they were offloaded to the cpu.



=== METODO 2a: Usando inputs_embeds direttamente ===

--- Metodo 1: Media semplice degli embeddings ---


The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


tensor([[[-4.3640e-03, -1.0633e-04, -5.6152e-03,  ..., -5.0545e-05,
          -1.1520e-03,  1.5926e-04],
         [ 1.0529e-03, -8.8882e-04,  2.1515e-03,  ..., -4.1504e-03,
           1.5640e-03, -1.4420e-03],
         [ 1.9932e-04,  3.2806e-04, -2.8687e-03,  ..., -9.0790e-04,
          -8.3160e-04, -1.4038e-03],
         ...,
         [-2.7955e-05,  7.5758e-05,  3.3319e-05,  ..., -5.4240e-06,
           1.1700e-04,  1.2994e-05],
         [-1.2934e-05, -2.8253e-05,  1.3351e-05,  ...,  9.4175e-06,
          -2.4855e-05,  6.2704e-05],
         [ 5.9366e-05,  4.0412e-05, -3.8981e-05,  ...,  1.1802e-05,
          -1.2636e-05, -3.4332e-05]]], device='cuda:0', dtype=torch.float16,
       grad_fn=<MeanBackward1>)
Output da embeddings: ;;���€€€€€€

--- Metodo 2: Media ponderata (peso basato su lunghezza) ---
Output da media ponderata: {}


						

--- Metodo 3: Media dei primi N token ---
Output da media dei primi 100 token: rack rack rack rack rack rack rack rack rack rack rack rack rack rack

In [4]:
from collections import Counter
all_genres = []
for movie_string in movies:
    lines = movie_string.strip().split('\n')
    genre_line = lines[-1]
    genres_text = genre_line.replace('Genre: ', '')
    all_genres.extend(genres_text.split('|'))

genre_counts = Counter(all_genres)
top_10_genres = genre_counts.most_common(10)

print("I 10 generi più frequenti sono:")
for i, (genre, count) in enumerate(top_10_genres):
    print(f"{i+1}. {genre}: {count} occorrenze")

I 10 generi più frequenti sono:
1. Action: 38 occorrenze
2. Thriller: 25 occorrenze
3. Drama: 20 occorrenze
4. Adventure: 16 occorrenze
5. Romance: 12 occorrenze
6. Crime: 11 occorrenze
7. Comedy: 10 occorrenze
8. Sci-Fi: 7 occorrenze
9. War: 3 occorrenze
10. Western: 2 occorrenze


In [5]:
for movie in movies:
    print(f"Movie: {movie}")

Movie: Title: Program, The
Description: Several players from different backgrounds try to cope with the pressures of playing football at a major university. Each deals with the pressure differently, some turn to drinking, others to drugs, and some to studying.
Genre: Action|Drama
Movie: Title: Jane Austen's Mafia!
Description: Like The Godfather Part II, the narrative of Mafia! consists of a series of flashbacks interwoven with the main plot. Tony is the son of a prominent Mafia don, Vincenzo Armani Windbreaker Cortino. As the film opens, Tony introduces the main thread when he exits a Vegas casino and walks to his car, accompanied by a voiceover explaining his philosophy of life. When he starts the car, it explodes. The story then regresses more than half a century to describe the boyhood of Tony's father, Vincenzo, who was born in Italy, the clumsy son of a Sicilian postman. One day, while making a delivery for his father, Vincenzo trips and the parcel bursts open, revealing a strang

In [4]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
import warnings
warnings.filterwarnings('ignore')

# Carica modello e tokenizer
model_name = "mistralai/Mistral-7B-Instruct-v0.2"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    torch_dtype=torch.float16,
    device_map="auto",
    trust_remote_code=True
)

# Input di esempio
input_text = "what is the stronger football player?"
print("=== METODO 1: Generazione normale dal testo ===")
# Tokenizza l'input
inputs = tokenizer(input_text, return_tensors="pt")
input_ids = inputs["input_ids"].to(model.device)
input_length = input_ids.shape[1]

# Genera testo normalmente
with torch.no_grad():
    output_normal = model.generate(
        input_ids,
        max_new_tokens=50,
        temperature=0.1,  # Bassa temperatura per risultati deterministici
        do_sample=False,
        pad_token_id=tokenizer.eos_token_id
    )
# Decodifica solo i nuovi token generati (escludendo l'input)
generated_tokens = output_normal[0][input_length:]
generated_text_normal = tokenizer.decode(generated_tokens, skip_special_tokens=True)
print(f"Input: {input_text}")
print(f"Output: {generated_text_normal}\n")

print("=== METODO 2: Generazione dagli hidden states ===")

# Ottieni gli hidden states dell'input
with torch.no_grad():
    # Forward pass per ottenere gli hidden states
    outputs = model(
        input_ids=input_ids,
        output_hidden_states=True,
        return_dict=True
    )
    
    # Prendi l'ultimo hidden state (output dell'ultimo layer)
    last_hidden_state = outputs.hidden_states[-1]
    
    print(f"Forma hidden states: {last_hidden_state.shape}")
    print(f"Numero di layer: {len(outputs.hidden_states)}")

# Metodo alternativo: usa direttamente inputs_embeds
print("\n=== METODO 2a: Usando inputs_embeds direttamente ===")

# Metodo 1: Media semplice degli embeddings (ogni prompt contribuisce ugualmente)
print("\n--- Metodo 1: Media semplice degli embeddings ---")
input_embeds_list = []
max_length = 0

# Calcola embeddings per ogni movie
for i, movie in enumerate(movies):
    inputs = tokenizer(movie, return_tensors="pt")
    input_ids = inputs["input_ids"].to(model.device)
    
    # Ottieni embeddings
    embeds = model.get_input_embeddings()(input_ids)
    input_embeds_list.append(embeds)
    
    max_length = max(max_length, embeds.shape[1])
    print(f"Movie {i}: '{movie[:30]}...' - Forma embeddings: {embeds.shape}")

# Pad tutti gli embeddings alla stessa lunghezza per poter fare la media
padded_embeds_list = []
mask_list = []
for embeds in input_embeds_list:
    if embeds.shape[1] < max_length:
        # Pad con zeros (o potresti usare il pad_token_id embedding)
        padding = torch.zeros(1, max_length - embeds.shape[1], embeds.shape[2], dtype=embeds.dtype).to(embeds.device)
        padded_embeds = torch.cat([embeds, padding], dim=1)
        # crea una maschera per i token non zero
        mask = torch.ones(1, embeds.shape[1],embeds.shape[2], dtype=torch.bool).to(embeds.device)
        mask = torch.cat([mask, torch.zeros(1, max_length - embeds.shape[1],embeds.shape[2], dtype=torch.bool).to(embeds.device)], dim=1)
    else:
        padded_embeds = embeds
        mask = torch.ones(1, embeds.shape[1],embeds.shape[2], dtype=torch.bool).to(embeds.device)
    padded_embeds_list.append(padded_embeds)
    mask_list.append(mask)

# Calcola la media
stacked_embeds = torch.stack([e.squeeze(0) for e in padded_embeds_list])  # [num_movies, seq_len, hidden_dim]
input_embeds = stacked_embeds.mean(dim=0, keepdim=True)  # [1, seq_len, hidden_dim]

print(f"\nForma embeddings dopo padding: {stacked_embeds.shape}")
print(f"Forma media embeddings: {input_embeds.shape}")


print (f"Forma input embeddings: {input_embeds.shape}")
print(input_embeds)  # Mostra i primi 5 embeddings
with torch.no_grad():
    # Genera usando inputs_embeds invece di input_ids
    output_from_embeds = model.generate(
        inputs_embeds=input_embeds,
        max_new_tokens=50,
        temperature=0.1,
        do_sample=False,
        pad_token_id=tokenizer.eos_token_id
    )

generated_text_embeds = tokenizer.decode(output_from_embeds[0], skip_special_tokens=True)
print(f"Output da embeddings: {generated_text_embeds}")

# Verifica che siano uguali
print("\n=== CONFRONTO RISULTATI ===")
print(f"Testo normale: {generated_text_normal}")
print(f"Testo da embeddings: {generated_text_embeds}")
print(f"I risultati sono identici: {generated_text_normal == generated_text_embeds}")

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

Some parameters are on the meta device because they were offloaded to the cpu.
The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


=== METODO 1: Generazione normale dal testo ===


The attention mask is not set and cannot be inferred from input because pad token is same as eos token. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.


Input: what is the stronger football player?
Output: 

The term "stronger football player" can be interpreted in different ways, depending on the context. Here are a few possible interpretations and some factors that could influence the answer:

1. Physical strength: In terms of raw muscle

=== METODO 2: Generazione dagli hidden states ===
Forma hidden states: torch.Size([1, 8, 4096])
Numero di layer: 33

=== METODO 2a: Usando inputs_embeds direttamente ===

--- Metodo 1: Media semplice degli embeddings ---
Movie 0: 'Title: Program, The
Descriptio...' - Forma embeddings: torch.Size([1, 59, 4096])
Movie 1: 'Title: Jane Austen's Mafia!
De...' - Forma embeddings: torch.Size([1, 854, 4096])
Movie 2: 'Title: Under Siege
Description...' - Forma embeddings: torch.Size([1, 1112, 4096])
Movie 3: 'Title: He Got Game
Description...' - Forma embeddings: torch.Size([1, 632, 4096])
Movie 4: 'Title: Good Will Hunting
Descr...' - Forma embeddings: torch.Size([1, 777, 4096])
Movie 5: 'Title: Last of th

The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.



Forma embeddings dopo padding: torch.Size([54, 1865, 4096])
Forma media embeddings: torch.Size([1, 1865, 4096])
Forma input embeddings: torch.Size([1, 1865, 4096])
tensor([[[-4.3640e-03, -1.0633e-04, -5.6152e-03,  ..., -5.0545e-05,
          -1.1520e-03,  1.5926e-04],
         [-6.4850e-04,  6.1340e-03,  1.0757e-03,  ..., -8.8882e-04,
          -3.4027e-03, -1.6403e-03],
         [-9.8419e-04, -9.7275e-04,  4.5300e-05,  ..., -8.3923e-04,
          -2.1100e-05, -1.4114e-03],
         ...,
         [-2.7955e-05,  7.5758e-05,  3.3319e-05,  ..., -5.4240e-06,
           1.1700e-04,  1.2994e-05],
         [-1.2934e-05, -2.8253e-05,  1.3351e-05,  ...,  9.4175e-06,
          -2.4855e-05,  6.2704e-05],
         [ 5.9366e-05,  4.0412e-05, -3.8981e-05,  ...,  1.1802e-05,
          -1.2636e-05, -3.4332e-05]]], device='cuda:0', dtype=torch.float16,
       grad_fn=<MeanBackward1>)
Output da embeddings: {}����€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€

=== CONFRONTO RISULTATI ===
Testo normale: 


In [5]:
# Metodo 2: Media ponderata basata sulla lunghezza
print("\n--- Metodo 2: Media ponderata (peso basato su lunghezza) ---")
weighted_sum = torch.zeros_like(padded_embeds_list[0])
total_weight = 0

for embeds, original_embeds in zip(padded_embeds_list, input_embeds_list):
    weight = original_embeds.shape[1]  # Peso = lunghezza originale
    weighted_sum += embeds * weight
    total_weight += weight

weighted_mean_embeds = weighted_sum / total_weight

with torch.no_grad():
    output_weighted = model.generate(
        inputs_embeds=weighted_mean_embeds,
        max_new_tokens=50,
        temperature=0.1,
        do_sample=True,
        pad_token_id=tokenizer.eos_token_id
    )

generated_text_weighted = tokenizer.decode(output_weighted[0], skip_special_tokens=True)
print(f"Output da media ponderata: {generated_text_weighted}")

# Metodo 3: Media solo dei primi N token di ogni prompt
print("\n--- Metodo 3: Media dei primi N token ---")
n_tokens = 100  # Prendi solo i primi 5 token di ogni prompt

truncated_embeds_list = []
for i, movie in enumerate(movies):
    inputs = tokenizer(movie, return_tensors="pt", max_length=n_tokens, truncation=True)
    input_ids = inputs["input_ids"].to(model.device)
    embeds = model.get_input_embeddings()(input_ids)
    
    # Assicurati che abbiano tutti esattamente n_tokens
    if embeds.shape[1] < n_tokens:
        padding = torch.zeros(1, n_tokens - embeds.shape[1], embeds.shape[2], dtype=embeds.dtype).to(embeds.device)
        embeds = torch.cat([embeds, padding], dim=1)
    
    truncated_embeds_list.append(embeds)

# Stack e media
stacked_truncated = torch.stack([e.squeeze(0) for e in truncated_embeds_list])
mean_truncated = stacked_truncated.mean(dim=0, keepdim=True)

with torch.no_grad():
    output_truncated = model.generate(
        inputs_embeds=mean_truncated,
        max_new_tokens=50,
        temperature=0.7,
        do_sample=True,
        pad_token_id=tokenizer.eos_token_id
    )

generated_text_truncated = tokenizer.decode(output_truncated[0], skip_special_tokens=True)
print(f"Output da media dei primi {n_tokens} token: {generated_text_truncated}")


--- Metodo 2: Media ponderata (peso basato su lunghezza) ---
Output da media ponderata: {}�������������������������������������������������

--- Metodo 3: Media dei primi N token ---
Output da media dei primi 100 token: rack rack rack rack rack rack rack rack rack rack rack rack rack rack rack rack rack rack rack rack rack rack rack rack rack rack rack rack rack rack rack rack rack rack rack rack rack rack rack rack rack rack rack rack rack rack rack rack rack rack


In [6]:
print("\n--- Metodo 4: Media escludendo componenti zero (padding-aware) ---")

# Crea maschere per identificare le posizioni con padding (tutti zeri)
masks_list = []
for embeds, original_embeds in zip(padded_embeds_list, input_embeds_list):
    # Crea una maschera: 1 per posizioni valide, 0 per padding
    mask = torch.ones_like(embeds)
    if embeds.shape[1] > original_embeds.shape[1]:
        # Le posizioni dopo la lunghezza originale sono padding
        mask[:, original_embeds.shape[1]:, :] = 0
    masks_list.append(mask)

# Stack embeddings e maschere
stacked_embeds = torch.stack([e.squeeze(0) for e in padded_embeds_list])  # [num_movies, seq_len, hidden_dim]
stacked_masks = torch.stack([m.squeeze(0) for m in masks_list])  # [num_movies, seq_len, hidden_dim]

# Calcola la somma pesata e il conteggio per ogni componente
weighted_sum_nonzero = (stacked_embeds * stacked_masks).sum(dim=0)  # [seq_len, hidden_dim]
count_nonzero = stacked_masks.sum(dim=0)  # [seq_len, hidden_dim]

# Evita divisione per zero
count_nonzero = torch.clamp(count_nonzero, min=1e-9)

# Media considerando solo componenti non-zero
mean_nonzero = weighted_sum_nonzero / count_nonzero
mean_nonzero_embeds = mean_nonzero.unsqueeze(0)  # [1, seq_len, hidden_dim]

# Per debug: mostra quante componenti sono state mediate per alcune posizioni
print(f"Esempio conteggi non-zero per posizione 0: min={count_nonzero[0].min():.0f}, max={count_nonzero[0].max():.0f}")
print(f"Esempio conteggi non-zero per ultima posizione: min={count_nonzero[-1].min():.0f}, max={count_nonzero[-1].max():.0f}")

with torch.no_grad():
    output_nonzero = model.generate(
        inputs_embeds=mean_nonzero_embeds,
        max_new_tokens=50,
        temperature=0.1,
        do_sample=True,
        pad_token_id=tokenizer.eos_token_id
    )

generated_text_nonzero = tokenizer.decode(output_nonzero[0], skip_special_tokens=True)
print(f"Output da media non-zero: {generated_text_nonzero}")


--- Metodo 4: Media escludendo componenti zero (padding-aware) ---
Esempio conteggi non-zero per posizione 0: min=54, max=54
Esempio conteggi non-zero per ultima posizione: min=1, max=1
Output da media non-zero: 

Title: The Secret of the Lost City

Description: In the heart of the Amazon rainforest lies a lost city, guarded by ancient guardians and filled with untold riches. When a team of archaeologists discovers


In [7]:
for movie in movies:
    print(f"Movie: {movie}")  # Mostra solo i primi 30 caratteri per brevità

Movie: Title: Program, The
Description: Several players from different backgrounds try to cope with the pressures of playing football at a major university. Each deals with the pressure differently, some turn to drinking, others to drugs, and some to studying.
Genre: Action|Drama
Movie: Title: Jane Austen's Mafia!
Description: Like The Godfather Part II, the narrative of Mafia! consists of a series of flashbacks interwoven with the main plot. Tony is the son of a prominent Mafia don, Vincenzo Armani Windbreaker Cortino. As the film opens, Tony introduces the main thread when he exits a Vegas casino and walks to his car, accompanied by a voiceover explaining his philosophy of life. When he starts the car, it explodes. The story then regresses more than half a century to describe the boyhood of Tony's father, Vincenzo, who was born in Italy, the clumsy son of a Sicilian postman. One day, while making a delivery for his father, Vincenzo trips and the parcel bursts open, revealing a strang

In [None]:

print("\n--- Metodo 4: Media escludendo componenti zero (padding-aware) ---")

# Crea maschere per identificare le posizioni con padding (tutti zeri)
masks_list = []
for embeds, original_embeds in zip(padded_embeds_list, input_embeds_list):
    # Crea una maschera: 1 per posizioni valide, 0 per padding
    mask = torch.ones_like(embeds)
    if embeds.shape[1] > original_embeds.shape[1]:
        # Le posizioni dopo la lunghezza originale sono padding
        mask[:, original_embeds.shape[1]:, :] = 0
    masks_list.append(mask)

# Stack embeddings e maschere
stacked_embeds = torch.stack([e.squeeze(0) for e in padded_embeds_list])  # [num_movies, seq_len, hidden_dim]
stacked_masks = torch.stack([m.squeeze(0) for m in masks_list])  # [num_movies, seq_len, hidden_dim]

# Calcola la somma pesata e il conteggio per ogni componente
weighted_sum_nonzero = (stacked_embeds * stacked_masks).sum(dim=0)  # [seq_len, hidden_dim]
count_nonzero = stacked_masks.sum(dim=0)  # [seq_len, hidden_dim]

# Evita divisione per zero
count_nonzero = torch.clamp(count_nonzero, min=1e-9)

# Media considerando solo componenti non-zero
mean_nonzero = weighted_sum_nonzero / count_nonzero
mean_nonzero_embeds = mean_nonzero.unsqueeze(0)  # [1, seq_len, hidden_dim]

# Per debug: mostra quante componenti sono state mediate per alcune posizioni

with torch.no_grad():
    output_nonzero = model.generate(
        inputs_embeds=mean_nonzero_embeds,
        max_new_tokens=300,
        pad_token_id=tokenizer.eos_token_id
    )

generated_text_nonzero = tokenizer.decode(output_nonzero[0], skip_special_tokens=True)
print(f"Output da media non-zero: {generated_text_nonzero}")

In [15]:
# Carica modello e tokenizer
model_name = "mistralai/Mistral-7B-Instruct-v0.2"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    torch_dtype=torch.float16,
    device_map="auto",
    trust_remote_code=True
)

# Input di esempio
input_text = movies[1]
print("=== METODO 1: Generazione normale dal testo ===")
# Tokenizza l'input
inputs = tokenizer(input_text, return_tensors="pt")
input_ids = inputs["input_ids"].to(model.device)
input_length = input_ids.shape[1]

# Genera testo normalmente
with torch.no_grad():
    output_normal = model.generate(
        input_ids,
        max_new_tokens=50,
        temperature=0.1,  # Bassa temperatura per risultati deterministici
        do_sample=False,
        pad_token_id=tokenizer.eos_token_id
    )
# Decodifica solo i nuovi token generati (escludendo l'input)
generated_tokens = output_normal[0][input_length:]
generated_text_normal = tokenizer.decode(generated_tokens, skip_special_tokens=True)
print(f"Input: {input_text}")
print(f"Output: {generated_text_normal}\n")

print("=== METODO 2: Generazione dagli hidden states ===")

# Ottieni gli hidden states dell'input
with torch.no_grad():
    # Forward pass per ottenere gli hidden states
    outputs = model(
        input_ids=input_ids,
        output_hidden_states=True,
        return_dict=True
    )
    
    # Prendi l'ultimo hidden state (output dell'ultimo layer)
    last_hidden_state = outputs.hidden_states[-1]
    
    print(f"Forma hidden states: {last_hidden_state.shape}")
    print(f"Numero di layer: {len(outputs.hidden_states)}")

# Metodo alternativo: usa direttamente inputs_embeds
print("\n=== METODO 2a: Usando inputs_embeds direttamente ===")

# Ottieni gli embeddings dell'input
input_embeds = model.get_input_embeddings()(input_ids)
print (f"Forma input embeddings: {input_embeds.shape}")
print(input_embeds)  # Mostra i primi 5 embeddings
with torch.no_grad():
    # Genera usando inputs_embeds invece di input_ids
    output_from_embeds = model.generate(
        inputs_embeds=input_embeds,
        max_new_tokens=50,
        temperature=0.1,
        do_sample=False,
        pad_token_id=tokenizer.eos_token_id
    )

generated_text_embeds = tokenizer.decode(output_from_embeds[0], skip_special_tokens=True)
print(f"Output da embeddings: {generated_text_embeds}")

# Verifica che siano uguali
print("\n=== CONFRONTO RISULTATI ===")
print(f"Testo normale: {generated_text_normal}")
print(f"Testo da embeddings: {generated_text_embeds}")
print(f"I risultati sono identici: {generated_text_normal == generated_text_embeds}")

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


=== METODO 1: Generazione normale dal testo ===
Input: Title: Dark Knight Rises, The
Description: Following the death of District Attorney Harvey Dent, Batman assumes responsibility for Dent's crimes to protect the late attorney's reputation and is subsequently hunted by the Gotham City Police Department. Eight years later, Batman encounters the mysterious Selina Kyle and the villainous Bane, a new terrorist leader who overwhelms Gotham's finest. The Dark Knight resurfaces to protect a city that has branded him an enemy.
Genre: Action|Adventure|Crime|IMAX
Output: |Thriller
Release Date: 2012-07-20
Runtime: 165 minutes
Director: Christopher Nolan
Writer: Jonathan Nolan, Christopher Nolan, David S. Goy

=== METODO 2: Generazione dagli hidden states ===


The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


Forma hidden states: torch.Size([1, 121, 4096])
Numero di layer: 33

=== METODO 2a: Usando inputs_embeds direttamente ===
Forma input embeddings: torch.Size([1, 121, 4096])
tensor([[[-4.3640e-03, -1.0633e-04, -5.6152e-03,  ..., -5.0545e-05,
          -1.1520e-03,  1.5926e-04],
         [-6.4850e-04,  6.1340e-03,  1.0757e-03,  ..., -8.8882e-04,
          -3.4027e-03, -1.6403e-03],
         [-9.8419e-04, -9.7275e-04,  4.5300e-05,  ..., -8.3923e-04,
          -2.1100e-05, -1.4114e-03],
         ...,
         [-2.4319e-04, -1.5717e-03,  8.4686e-04,  ...,  9.1171e-04,
           1.1368e-03,  8.2397e-04],
         [ 1.0452e-03, -8.1635e-04, -1.4954e-03,  ...,  1.0147e-03,
          -4.6921e-04, -1.3199e-03],
         [-1.9684e-03,  1.2512e-03,  9.4986e-04,  ..., -3.0365e-03,
           1.8616e-03, -5.0354e-03]]], device='cuda:0', dtype=torch.float16,
       grad_fn=<EmbeddingBackward0>)


The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


Output da embeddings: |Thriller
Release Date: 2012-07-20
Runtime: 165 minutes
Director: Christopher Nolan
Writer: Jonathan Nolan, Christopher Nolan, David S. Goy

=== CONFRONTO RISULTATI ===
Testo normale: |Thriller
Release Date: 2012-07-20
Runtime: 165 minutes
Director: Christopher Nolan
Writer: Jonathan Nolan, Christopher Nolan, David S. Goy
Testo da embeddings: |Thriller
Release Date: 2012-07-20
Runtime: 165 minutes
Director: Christopher Nolan
Writer: Jonathan Nolan, Christopher Nolan, David S. Goy
I risultati sono identici: True

=== BONUS: Uso di hidden states da layer intermedio ===

Forma hidden state layer 16: torch.Size([1, 121, 4096])
Gli hidden states intermedi richiederebbero un wrapper personalizzato per la generazione.

=== SALVATAGGIO HIDDEN STATES ===
Hidden states pronti per il salvataggio (decommentare la riga sopra per salvare)

=== RIUTILIZZO HIDDEN STATES ===
Output da embeddings ricaricati: |Thriller
Release Date: 2012-07-20
Runtime: 165 minutes
Director: Christop

In [16]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
import warnings
warnings.filterwarnings('ignore')

# Carica modello e tokenizer
model_name = "mistralai/Mistral-7B-Instruct-v0.2"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    torch_dtype=torch.float16,
    device_map="auto",
    trust_remote_code=True
)

# Input di esempio
input_text = "what is the stronger football player?"
print("=== METODO 1: Generazione normale dal testo ===")
# Tokenizza l'input
inputs = tokenizer(input_text, return_tensors="pt")
input_ids = inputs["input_ids"].to(model.device)
input_length = input_ids.shape[1]

# Genera testo normalmente
with torch.no_grad():
    output_normal = model.generate(
        input_ids,
        max_new_tokens=50,
        temperature=0.1,  # Bassa temperatura per risultati deterministici
        do_sample=False,
        pad_token_id=tokenizer.eos_token_id
    )
# Decodifica solo i nuovi token generati (escludendo l'input)
generated_tokens = output_normal[0][input_length:]
generated_text_normal = tokenizer.decode(generated_tokens, skip_special_tokens=True)
print(f"Input: {input_text}")
print(f"Output: {generated_text_normal}\n")

print("=== METODO 2: Generazione dagli hidden states ===")

# Ottieni gli hidden states dell'input
with torch.no_grad():
    # Forward pass per ottenere gli hidden states
    outputs = model(
        input_ids=input_ids,
        output_hidden_states=True,
        return_dict=True
    )
    
    # Prendi l'ultimo hidden state (output dell'ultimo layer)
    last_hidden_state = outputs.hidden_states[-1]
    
    print(f"Forma hidden states: {last_hidden_state.shape}")
    print(f"Numero di layer: {len(outputs.hidden_states)}")

# Metodo alternativo: usa direttamente inputs_embeds
print("\n=== METODO 2a: Usando inputs_embeds direttamente ===")

# Metodo 1: Media semplice degli embeddings (ogni prompt contribuisce ugualmente)
print("\n--- Metodo 1: Media semplice degli embeddings ---")
input_embeds_list = []
max_length = 0

# Calcola embeddings per ogni movie
eletto = movies[1]  # Scegli un movie specifico per il test

for i, movie in enumerate(movies):
    inputs = tokenizer(eletto, return_tensors="pt")
    input_ids = inputs["input_ids"].to(model.device)
    
    # Ottieni embeddings
    embeds = model.get_input_embeddings()(input_ids)
    input_embeds_list.append(embeds)
    
    max_length = max(max_length, embeds.shape[1])
    print(f"Movie {i}: '{eletto[:30]}...' - Forma embeddings: {embeds.shape}")

# Pad tutti gli embeddings alla stessa lunghezza per poter fare la media
padded_embeds_list = []
for embeds in input_embeds_list:
    if embeds.shape[1] < max_length:
        # Pad con zeros (o potresti usare il pad_token_id embedding)
        padding = torch.zeros(1, max_length - embeds.shape[1], embeds.shape[2], dtype=embeds.dtype).to(embeds.device)
        padded_embeds = torch.cat([embeds, padding], dim=1)
    else:
        padded_embeds = embeds
    padded_embeds_list.append(padded_embeds)

# Calcola la media
stacked_embeds = torch.stack([e.squeeze(0) for e in padded_embeds_list])  # [num_movies, seq_len, hidden_dim]
input_embeds = stacked_embeds.mean(dim=0, keepdim=True)  # [1, seq_len, hidden_dim]

print(f"\nForma embeddings dopo padding: {stacked_embeds.shape}")
print(f"Forma media embeddings: {input_embeds.shape}")


print (f"Forma input embeddings: {input_embeds.shape}")
print(input_embeds)  # Mostra i primi 5 embeddings
with torch.no_grad():
    # Genera usando inputs_embeds invece di input_ids
    output_from_embeds = model.generate(
        inputs_embeds=input_embeds,
        max_new_tokens=50,
        temperature=0.1,
        do_sample=False,
        pad_token_id=tokenizer.eos_token_id
    )

generated_text_embeds = tokenizer.decode(output_from_embeds[0], skip_special_tokens=True)
print(f"Output da embeddings: {generated_text_embeds}")

# Verifica che siano uguali
print("\n=== CONFRONTO RISULTATI ===")
print(f"Testo normale: {generated_text_normal}")
print(f"Testo da embeddings: {generated_text_embeds}")
print(f"I risultati sono identici: {generated_text_normal == generated_text_embeds}")

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


=== METODO 1: Generazione normale dal testo ===
Input: what is the stronger football player?
Output: 

The term "stronger football player" can be interpreted in different ways, depending on the context. Here are a few possible interpretations and some factors that could influence the answer:

1. Physical strength: In terms of raw muscle

=== METODO 2: Generazione dagli hidden states ===


The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


Forma hidden states: torch.Size([1, 8, 4096])
Numero di layer: 33

=== METODO 2a: Usando inputs_embeds direttamente ===

--- Metodo 1: Media semplice degli embeddings ---
Movie 0: 'Title: Dark Knight Rises, The
...' - Forma embeddings: torch.Size([1, 121, 4096])
Movie 1: 'Title: Dark Knight Rises, The
...' - Forma embeddings: torch.Size([1, 121, 4096])
Movie 2: 'Title: Dark Knight Rises, The
...' - Forma embeddings: torch.Size([1, 121, 4096])
Movie 3: 'Title: Dark Knight Rises, The
...' - Forma embeddings: torch.Size([1, 121, 4096])
Movie 4: 'Title: Dark Knight Rises, The
...' - Forma embeddings: torch.Size([1, 121, 4096])
Movie 5: 'Title: Dark Knight Rises, The
...' - Forma embeddings: torch.Size([1, 121, 4096])
Movie 6: 'Title: Dark Knight Rises, The
...' - Forma embeddings: torch.Size([1, 121, 4096])
Movie 7: 'Title: Dark Knight Rises, The
...' - Forma embeddings: torch.Size([1, 121, 4096])
Movie 8: 'Title: Dark Knight Rises, The
...' - Forma embeddings: torch.Size([1, 121, 4096])
M