In [2]:
import pandas as pd

# Define the path to the parquet file
# Ensure 'artgraph_metadata.parquet' is in the same directory as your notebook
METADATA_FILE_PATH = 'artgraph_metadata.parquet'

print(f"Attempting to load metadata file: {METADATA_FILE_PATH}")

try:
    # Load the parquet file into a Pandas DataFrame
    metadata_df = pd.read_parquet(METADATA_FILE_PATH)
    
    print("\nMetadata loaded successfully!")
    
    # Print the first 3 rows of the DataFrame
    print("\n--- First 3 rows of metadata ---")
    print(metadata_df.head(3))
    
    # Print the names of all available columns
    print("\n--- Available columns in metadata ---")
    print(metadata_df.columns.tolist())
    
    # Print the total number of rows and columns
    print(f"\nDataFrame dimensions: {metadata_df.shape[0]} rows, {metadata_df.shape[1]} columns.")
    
except FileNotFoundError:
    print(f"\nERROR: The file '{METADATA_FILE_PATH}' was not found.")
    print("Please ensure the file is in the same directory as your notebook.")
except Exception as e:
    print(f"\nAn unexpected error occurred while loading metadata: {e}")

print("\nMetadata exploration complete.")

Attempting to load metadata file: artgraph_metadata.parquet

Metadata loaded successfully!

--- First 3 rows of metadata ---
                        ArtworkTitle            ArtistName ArtworkYear Period  \
0  Menton.&#160;Beach with umbrellas  zinaida-serebriakova        1931   None   
1                        Autumn Song                  erte        None   None   
2                           Feathers                  erte        None   None   

      Style                                           FileName  \
0  art deco  zinaida-serebriakova_menton-beach-with-umbrell...   
1  art deco                               erte_autumn-song.jpg   
2  art deco                                  erte_feathers.jpg   

               Genre                                           Movement  
0     genre painting  Mir iskusstva, Neoclassical architecture, Repr...  
1  symbolic painting                                               None  
2             design                                           

In [3]:
import os
import base64
from pathlib import Path
from PIL import Image
import pandas as pd
from ollama import chat
from io import BytesIO
import re
import numpy as np
import torch
import csv

# --- GLOBAL CONFIGURATION ---
# Path to the image directory
# Ensure 'images100' is the main folder containing your images.
IMAGE_SOURCE_DIR = './images100' 

# Ollama model to use
OLLAMA_MODEL = 'qwen2.5vl'

# Limit processing to the first N images (set to None to process all images)
IMAGES_TO_PROCESS = None # Set to None to process all images

# Number of fragments for the 2x2 grid
GRID_ROWS = 2
GRID_COLS = 2
TOTAL_GRID_SEGMENTS = GRID_ROWS * GRID_COLS # Will be 4

# Maximum number of tokens to generate for descriptions (adjust manually for your tests)
MAX_TOKENS = 512 

# Path to the metadata file
METADATA_FILE_PATH = 'artgraph_metadata.parquet'

# Output CSV filename for Cell 2
MAIN_OUTPUT_CSV_FILENAME = f"image_analysis_qwen_2x2_with_metadata_max_tokens_{MAX_TOKENS}.csv"

# Temperature for model generation (higher values = more creativity, lower values = more deterministic)
TEMPERATURE = 0.7 

# --- PROMPTS ADAPTED FOR IMAGES (Requesting English output and full detail) ---
# This prompt is designed to give absolute priority to the fragment description,
# using metadata as secondary informative context, if available.
PROMPT_FRAGMENT_BASE = (
    "As an art critic, concisely but comprehensively describe **only what is visible in this specific portion of the image.** "
    "Focus critically on the most impactful elements, subjects, colors, textures, shapes, lighting, and composition within the fragment. "
    "Avoid inferences or details not directly observable. Provide a sharp, insightful description in English."
)

METADATA_CONTEXT_SUFFIX = (
    " (Context: this fragment belongs to {metadata_info})."
)

PROMPT_TOTAL_IMAGE_BASE = (
    "As an art critic, provide a concise yet holistic analysis of the entire image. "
    "Critically examine the general themes, overall composition, the intricate interplay between elements, "
    "and the overarching message, mood, or emotion the artwork conveys. "
    "Deliver a sharp and exhaustive description in English."
)


# --- UTILITY FUNCTIONS ---

def encode_image_to_base64(image: Image.Image, size=(256, 256)) -> str:
    """
    Resizes a PIL image and encodes it to base64.
    """
    try:
        img_resized = image.resize(size) 
        
        buffer = BytesIO()
        img_resized.save(buffer, format="JPEG") 
        img_bytes = buffer.getvalue()
        
        return base64.b64encode(img_bytes).decode('utf-8')
    except Exception as e:
        print(f"Error encoding image: {e}")
        return None

def split_image_into_grid_segments(image: Image.Image, rows: int, cols: int) -> list[Image.Image]:
    """
    Splits a PIL image into a grid of segments (e.g., 2x2).
    Returns segments in reading order (left to right, top to bottom).
    """
    width, height = image.size
    segment_width = width // cols
    segment_height = height // rows
    segments = []
    
    for r in range(rows):
        for c in range(cols):
            left = c * segment_width
            upper = r * segment_height
            right = (c + 1) * segment_width if c < cols - 1 else width
            lower = (r + 1) * segment_height if r < rows - 1 else height
            
            segment = image.crop((left, upper, right, lower))
            segments.append(segment)
            
    return segments

def generate_description_ollama(model_name: str, image_base64: str, prompt: str) -> str:
    """
    Generates a description using a specific Ollama model with a base64 image.
    """
    try:
        response = chat(
            model=model_name,
            messages=[
                {
                    'role': 'user',
                    'content': prompt,
                    'images': [image_base64],
                }
            ],
            options={
                'num_predict': MAX_TOKENS,
                'temperature': TEMPERATURE # TEMPERATURE is now globally defined
            }
        )
        return response.message.content.strip()
    except Exception as e:
        return f"Error during inference with {model_name}: {e}"

# --- IMAGE DIRECTORY CHECK ---
if not os.path.isdir(IMAGE_SOURCE_DIR):
    print(f"ERROR: The image directory '{IMAGE_SOURCE_DIR}' was not found.")
    print("Please ensure the 'images100' folder is in the same directory as your notebook and contains your images.")

print("Setup complete and utility functions loaded.")

# Cell 3: Image Description Generation with Ollama and Metadata

print(f"\n\n--- STARTING IMAGE ANALYSIS with {OLLAMA_MODEL} (via Ollama) ---")
print(f" MAX_TOKENS set to: {MAX_TOKENS}")

# This list will collect dictionaries for each row of the final CSV
all_image_descriptions_data = []
processed_images_count = 0

# --- Metadata loading and preparation ---
metadata_df = None
metadata_dict = {} # Dictionary for fast lookup
try:
    # Use METADATA_FILE_PATH as defined in Cell 2
    metadata_df = pd.read_parquet(METADATA_FILE_PATH, engine='pyarrow') 
    print(f"\nMetadata '{METADATA_FILE_PATH}' loaded successfully.")
    
    # INITIAL AND AGGRESSIVE CLEANING OF NEWLY LOADED METADATA HERE
    # Apply cleaning to all 'object' type (string) columns
    for col in metadata_df.select_dtypes(include=['object']).columns:
        # Replace newlines, carriage returns, and &#160; with spaces, then strip extra whitespace
        metadata_df[col] = metadata_df[col].astype(str).str.replace('\n', ' ').str.replace('\r', ' ').str.replace('&#160;', ' ').str.strip()
        # Remove any double spaces for further cleaning
        metadata_df[col] = metadata_df[col].apply(lambda x: re.sub(r'\s+', ' ', x).strip())

    # Prepare a dictionary for efficient lookup using 'FileName' as the key
    metadata_dict = metadata_df.set_index('FileName').to_dict('index')
    print("Metadata prepared for quick lookup by 'FileName' and cleaned of special characters.")

except FileNotFoundError:
    print(f"\nWARNING: File '{METADATA_FILE_PATH}' not found. Descriptions will not be enriched with metadata.")
    metadata_df = None
except Exception as e:
    print(f"\nERROR loading/preparing metadata: {e}. Descriptions will not be enriched with metadata.")
    metadata_df = None
# --- END Metadata Loading ---

# Get the list of image files from the image directory (IMAGE_SOURCE_DIR defined in Cell 2)
all_image_files_in_dir = [f for f in os.listdir(IMAGE_SOURCE_DIR) if f.lower().endswith((".jpg", ".jpeg", ".png", ".bmp", ".tiff"))]

# Filter images based on available metadata and apply the IMAGES_TO_PROCESS limit
image_files_to_process = sorted([f for f in all_image_files_in_dir if f in metadata_dict]) 

if IMAGES_TO_PROCESS is not None and IMAGES_TO_PROCESS > 0:
    image_files_to_process = image_files_to_process[:IMAGES_TO_PROCESS]
    print(f"\nLimited to processing the first {IMAGES_TO_PROCESS} valid images.")
else:
    print(f"\nProcessing all {len(image_files_to_process)} valid images in folder '{IMAGE_SOURCE_DIR}'.")


if not image_files_to_process:
    print("\nATTENTION: No images found in the specified directory that have a corresponding entry in the metadata. Ensure file names match between the folder and the 'FileName' column of the parquet file.")
    print("Process terminated with no images to process.")
else:
    for filename in image_files_to_process: 
        image_file_path = os.path.join(IMAGE_SOURCE_DIR, filename)
        print(f"\n--- PROCESSING IMAGE: {filename} ({processed_images_count + 1}{f'/{len(image_files_to_process)}' if IMAGES_TO_PROCESS is not None else ''}) ---")

        # Retrieve metadata for the current image
        image_metadata = metadata_dict.get(filename, {})

        # Extract metadata values (already cleaned upon parquet loading)
        artwork_title = image_metadata.get('ArtworkTitle', '')
        artist_name = image_metadata.get('ArtistName', '')
        artwork_year = image_metadata.get('ArtworkYear', '')
        period = image_metadata.get('Period', '')
        style = image_metadata.get('Style', '')
        genre = image_metadata.get('Genre', '')
        movement = image_metadata.get('Movement', '')

        try:
            original_image = Image.open(image_file_path).convert("RGB")
            print(f"Original image loaded: {filename} (dim: {original_image.size})")

            # --- Building metadata context for prompts ---
            metadata_info_str = ""
            metadata_parts_list = [] 
            
            # Add parts only if the value is not an empty string or 'None' textually (after cleaning)
            if artwork_title and artwork_title.lower() not in ['none', 'unknown']:
                metadata_parts_list.append(f"the artwork '{artwork_title}'")
            if artwork_year and artwork_year.lower() not in ['none', 'unknown']:
                metadata_parts_list.append(f"created in {artwork_year}")
            if artist_name and artist_name.lower() not in ['none', 'unknown']:
                metadata_parts_list.append(f"by artist {artist_name}")
            if style and style.lower() not in ['none', 'unknown']:
                metadata_parts_list.append(f"in the style of {style}")
            if genre and genre.lower() not in ['none', 'unknown']:
                metadata_parts_list.append(f"and belonging to the genre {genre}")
            if movement and movement.lower() not in ['none', 'unknown']:
                metadata_parts_list.append(f"with the movement {movement}")

            if metadata_parts_list:
                metadata_info_str = ", ".join(metadata_parts_list)
                print(f" Metadata context generated for '{filename}': '{metadata_info_str}'")
            
            # Combine the base prompt with the metadata context (PROMPT_FRAGMENT_BASE, PROMPT_TOTAL_IMAGE_BASE, METADATA_CONTEXT_SUFFIX defined in Cell 2)
            final_fragment_prompt = PROMPT_FRAGMENT_BASE
            if metadata_info_str:
                final_fragment_prompt += METADATA_CONTEXT_SUFFIX.format(metadata_info=metadata_info_str)
            
            final_total_prompt = PROMPT_TOTAL_IMAGE_BASE
            if metadata_info_str:
                final_total_prompt += METADATA_CONTEXT_SUFFIX.format(metadata_info=metadata_info_str)


            print(f" Final prompt for fragments: {final_fragment_prompt[:100]}...")
            print(f" Final prompt for total analysis: {final_total_prompt[:100]}...")
            # --- END Prompt Construction ---

            # Fragment into 2x2 and describe each segment (GRID_ROWS, GRID_COLS defined in Cell 2)
            segments = split_image_into_grid_segments(original_image, rows=GRID_ROWS, cols=GRID_COLS)
            print(f" Image divided into {len(segments)} segments ({GRID_ROWS}x{GRID_COLS} grid).")

            # Add fragment descriptions
            for i, segment in enumerate(segments):
                print(f"   Generating description for segment {i+1}...")
                base64_segment = encode_image_to_base64(segment, size=(256, 256)) # encode_image_to_base64 defined in Cell 2
                
                fragment_description = ""
                if base64_segment:
                    fragment_description = generate_description_ollama(OLLAMA_MODEL, base64_segment, final_fragment_prompt)
                    # Apply cleaning to the generated model description
                    # Remove newlines, carriage returns, and then clean double spaces
                    fragment_description = fragment_description.replace('\n', ' ').replace('\r', ' ').strip()
                    fragment_description = re.sub(r'\s+', ' ', fragment_description).strip() # Remove double spaces

                    print(f"   Segment {i+1} Desc: {fragment_description[:100]}...")
                else:
                    fragment_description = f"Error: Segment {i+1} encoding failed."

                # Add the fragment row to the data list for the CSV
                row = {
                    "Image Name": filename if i == 0 else '', # Translated "Nome Immagine" to "Image Name"
                    "ArtworkTitle": artwork_title if i == 0 else '',
                    "ArtistName": artist_name if i == 0 else '',
                    "ArtworkYear": artwork_year if i == 0 else '',
                    "Period": period if i == 0 else '',
                    "Style": style if i == 0 else '',
                    "Genre": genre if i == 0 else '',
                    "Movement": movement if i == 0 else '',
                    "Description Type": f"Fragment {i+1}", # Translated "Tipo Descrizione" to "Description Type"
                    "Description": fragment_description # Translated "Descrizione" to "Description"
                }
                all_image_descriptions_data.append(row)
            
            # Generate description for the entire image
            print("  Generating description for the entire image...")
            base64_full_image = encode_image_to_base64(original_image, size=(512, 512))
            
            full_image_description = ""
            if base64_full_image:
                full_image_description = generate_description_ollama(OLLAMA_MODEL, base64_full_image, final_total_prompt)
                # Apply cleaning to the generated model description
                # Remove newlines, carriage returns, and then clean double spaces
                full_image_description = full_image_description.replace('\n', ' ').replace('\r', ' ').strip()
                full_image_description = re.sub(r'\s+', ' ', full_image_description).strip() # Remove double spaces

                print(f"  Total Description: {full_image_description[:100]}...")
            else:
                full_image_description = "Error: Full image encoding failed."

            # Add the total description row to the data list for the CSV
            row_total = {
                "Image Name": '', # Translated "Nome Immagine" to "Image Name"
                "ArtworkTitle": '', "ArtistName": '', "ArtworkYear": '', "Period": '', "Style": '', "Genre": '', "Movement": '',
                "Description Type": "Total", # Translated "Tipo Descrizione" to "Description Type"
                "Description": full_image_description # Translated "Descrizione" to "Description"
            }
            all_image_descriptions_data.append(row_total)
            
        except Exception as e:
            print(f"CRITICAL ERROR during image processing {filename}: {e}")
            # Add error rows for fragments and total if an error occurs
            for i in range(TOTAL_GRID_SEGMENTS): # TOTAL_GRID_SEGMENTS defined in Cell 2
                error_row_fragment = {
                    "Image Name": filename if i == 0 else '',
                    "ArtworkTitle": artwork_title if i == 0 else '',
                    "ArtistName": artist_name if i == 0 else '',
                    "ArtworkYear": artwork_year if i == 0 else '',
                    "Period": period if i == 0 else '',
                    "Style": style if i == 0 else '',
                    "Genre": genre if i == 0 else '',
                    "Movement": movement if i == 0 else '',
                    "Description Type": f"Fragment {i+1}",
                    "Description": f"ERROR DURING PROCESSING: {e}"
                }
                all_image_descriptions_data.append(error_row_fragment)
            
            error_row_total = {
                "Image Name": '',
                "ArtworkTitle": '', "ArtistName": '', "ArtworkYear": '', "Period": '', "Style": '', "Genre": '', "Movement": '',
                "Description Type": "Total",
                "Description": f"ERROR DURING PROCESSING: {e}"
            }
            all_image_descriptions_data.append(error_row_total)

        processed_images_count += 1
        
        # --- Optimization: Clear CUDA cache after each image ---
        if torch.cuda.is_available():
            torch.cuda.empty_cache()
            print(f" CUDA cache cleared after processing {filename}.")

    # --- AFTER THE MAIN LOOP, WHEN ALL IMAGES HAVE BEEN PROCESSED ---

    # Define the column order for the final DataFrame (crucial for format)
    column_order = [
        "Image Name", # Translated "Nome Immagine" to "Image Name"
        "ArtworkTitle",
        "ArtistName",
        "ArtworkYear",
        "Period",
        "Style",
        "Genre",
        "Movement",
        "Description Type", # Translated "Tipo Descrizione" to "Description Type"
        "Description" # Translated "Descrizione" to "Description"
    ]

    # Create the final DataFrame from the collected data
    df_output_images = pd.DataFrame(all_image_descriptions_data, columns=column_order)

    print(f"\nImage analysis process completed for the first {processed_images_count} images with {OLLAMA_MODEL}.")
    print("Pandas DataFrame 'df_output_images' successfully created.")

    print("\n--- Preview of DataFrame 'df_output_images' ---")
    # Print the first 10 rows to show an example of fragments and total analysis for one or more images
    print(df_output_images.head(10).to_markdown(index=False)) 

    # Save the DataFrame to a CSV file WITHOUT the 'quoting=' clause
    # This will use Pandas' default quoting behavior (QUOTE_MINIMAL or equivalent)
    df_output_images.to_csv(MAIN_OUTPUT_CSV_FILENAME, index=False, encoding='utf-8')
    print(f"\nDataFrame saved as '{MAIN_OUTPUT_CSV_FILENAME}' in the same directory as the notebook with default (minimal) quoting.")

print("\nFull process terminated.")

Setup complete and utility functions loaded.


--- STARTING IMAGE ANALYSIS with qwen2.5vl (via Ollama) ---
 MAX_TOKENS set to: 512

Metadata 'artgraph_metadata.parquet' loaded successfully.
Metadata prepared for quick lookup by 'FileName' and cleaned of special characters.

Processing all 100 valid images in folder './images100'.

--- PROCESSING IMAGE: aleksey-savrasov_courtyard-spring-1853.jpg (1) ---
Original image loaded: aleksey-savrasov_courtyard-spring-1853.jpg (dim: (220, 275))
 Metadata context generated for 'aleksey-savrasov_courtyard-spring-1853.jpg': 'the artwork 'Courtyard. Spring.', created in 1853, by artist aleksey-savrasov, in the style of realism, and belonging to the genre cityscape, with the movement Realism (arts)'
 Final prompt for fragments: As an art critic, concisely but comprehensively describe **only what is visible in this specific por...
 Final prompt for total analysis: As an art critic, provide a concise yet holistic analysis of the entire image. Criticall

In [1]:
import pandas as pd
from transformers import AutoTokenizer
import os

# --- Configuration (Adjust as needed based on your notebook's output) ---
# Questo dovrebbe corrispondere alla variabile MAIN_OUTPUT_CSV_FILENAME nel tuo notebook originale.
# Un nome comune potrebbe essere 'image_analysis_full_folder.csv'.
MAIN_OUTPUT_CSV_FILENAME = 'image_analysis_qwen_2x2_with_metadata_max_tokens_512.csv'
# Questo dovrebbe essere il nome esatto della colonna nel tuo CSV che contiene le descrizioni generate.
DESCRIPTION_COLUMN_NAME = 'Description' # Nomi comuni: 'Description', 'Generated_Text', 'text'
# Il nome del modello Qwen utilizzato per la generazione delle descrizioni delle immagini.
QWEN_MODEL_NAME_OR_PATH = 'Qwen/Qwen2.5-VL'

# --- Carica il tokenizer ---
# Assicurati che la libreria 'transformers' sia installata.
# Potrebbe richiedere del tempo la prima volta per scaricare il tokenizer.
try:
    print(f"Caricamento del tokenizer per il modello: {QWEN_MODEL_NAME_OR_PATH}...")
    tokenizer = AutoTokenizer.from_pretrained(QWEN_MODEL_NAME_OR_PATH)
    print("Tokenizer caricato con successo.")
except Exception as e:
    print(f"Errore durante il caricamento del tokenizer: {e}")
    print("Assicurati che la libreria 'transformers' sia installata e che il nome del modello sia corretto.")
    tokenizer = None
    print("Continuo con il conteggio delle parole come fallback se il caricamento del tokenizer fallisce.")

# --- Funzione per calcolare la lunghezza in token (o parole se il tokenizer fallisce) ---
def get_length(text):
    if pd.isna(text): # Gestisce i valori NaN nelle descrizioni
        return 0
    # Assicurati che il testo sia una stringa prima di codificare/dividere
    text_str = str(text)
    if tokenizer:
        return len(tokenizer.encode(text_str))
    else: # Fallback al conteggio delle parole se il tokenizer non è disponibile
        return len(text_str.split())

# --- Carica il DataFrame dal CSV di output principale ---
try:
    df_output_images = pd.read_csv(MAIN_OUTPUT_CSV_FILENAME)
    print(f"DataFrame caricato con successo da '{MAIN_OUTPUT_CSV_FILENAME}'.")
except FileNotFoundError:
    print(f"Errore: Il file '{MAIN_OUTPUT_CSV_FILENAME}' non è stato trovato.")
    print("Assicurati che le celle precedenti siano state eseguite correttamente e abbiano salvato il CSV di output con questo nome.")
    exit() # Esce dall'esecuzione della cella se il file non è trovato
except Exception as e:
    print(f"Si è verificato un errore durante il caricamento del DataFrame: {e}")
    exit() # Esce dall'esecuzione della cella in caso di altri errori di caricamento

# --- Calcola le lunghezze in token ---
if DESCRIPTION_COLUMN_NAME not in df_output_images.columns:
    print(f"Errore: La colonna '{DESCRIPTION_COLUMN_NAME}' non è stata trovata nel DataFrame.")
    print(f"Le colonne disponibili sono: {df_output_images.columns.tolist()}")
    print("Per favore, controlla e aggiusta 'DESCRIPTION_COLUMN_NAME' nella sezione di configurazione.")
    exit() # Esce dall'esecuzione della cella se la colonna non è trovata

print(f"Calcolo del conteggio dei token per la colonna '{DESCRIPTION_COLUMN_NAME}'...")
df_output_images['token_count'] = df_output_images[DESCRIPTION_COLUMN_NAME].apply(get_length)

# --- Calcola le statistiche ---
total_responses = len(df_output_images)
if total_responses == 0:
    print("Nessuna risposta trovata nel DataFrame per analizzare le statistiche dei token.")
else:
    average_tokens = df_output_images['token_count'].mean()
    max_tokens = df_output_images['token_count'].max()
    min_tokens = df_output_images['token_count'].min()

    responses_above_avg = (df_output_images['token_count'] > average_tokens).sum()
    responses_below_avg = (df_output_images['token_count'] < average_tokens).sum()
    responses_at_avg = (df_output_images['token_count'] == average_tokens).sum()

    # --- Stampa le statistiche ---
    print("\n--- STATISTICHE TOKEN DI OUTPUT (Descrizioni Immagini Qwen-VL) ---")
    print(f"Numero totale di risposte generate: {total_responses}")
    print(f"Token di output medi per risposta: {average_tokens:.2f}")
    print(f"Token massimi in una risposta: {max_tokens}")
    print(f"Token minimi in una risposta: {min_tokens}")
    print(f"Risposte con token sopra la media: {responses_above_avg}")
    print(f"Risposte con token sotto la media: {responses_below_avg}")
    print(f"Risposte con token esattamente alla media: {responses_at_avg}")

    # --- Salva le statistiche in un CSV ---
    output_stats_filename = '/qwen_image_token_statistics_full_folder.csv'
    stats_data = {
        'Metric': [
            'Total Responses Generated',
            'Average Output Tokens',
            'Maximum Tokens',
            'Minimum Tokens',
            'Responses > Average',
            'Responses < Average',
            'Responses = Average'
        ],
        'Value': [
            total_responses,
            round(average_tokens, 2),
            max_tokens,
            min_tokens,
            responses_above_avg,
            responses_below_avg,
            responses_at_avg
        ]
    }
    stats_df = pd.DataFrame(stats_data)
    stats_df.to_csv(output_stats_filename, index=False)
    print(f"Statistiche token salvate in '{output_stats_filename}'.")

print("\nProcesso di calcolo delle statistiche dei token per le descrizioni delle immagini completato.")

Caricamento del tokenizer per il modello: Qwen/Qwen2.5-VL...
Errore durante il caricamento del tokenizer: Qwen/Qwen2.5-VL is not a local folder and is not a valid model identifier listed on 'https://huggingface.co/models'
If this is a private repository, make sure to pass a token having permission to this repo either by logging in with `huggingface-cli login` or by passing `token=<your_token>`
Assicurati che la libreria 'transformers' sia installata e che il nome del modello sia corretto.
Continuo con il conteggio delle parole come fallback se il caricamento del tokenizer fallisce.
DataFrame caricato con successo da 'image_analysis_qwen_2x2_with_metadata_max_tokens_512.csv'.
Calcolo del conteggio dei token per la colonna 'Description'...

--- STATISTICHE TOKEN DI OUTPUT (Descrizioni Immagini Qwen-VL) ---
Numero totale di risposte generate: 500
Token di output medi per risposta: 209.19
Token massimi in una risposta: 430
Token minimi in una risposta: 109
Risposte con token sopra la media

PermissionError: [Errno 13] Permission denied: '/qwen_image_token_statistics_full_folder.csv'