## Generación de mensajes a través de API Nvidia

In [None]:
from openai import OpenAI 

client = OpenAI(
  base_url = "https://integrate.api.nvidia.com/v1",
  api_key = ""
)

instruccion = """"
Realiza un pitch deck para la marca Nvidia con únicamente un solo texto plano resumido, sin introducción ni nada, solo el resumen 
"""

completion = client.chat.completions.create(
  model="marin/marin-8b-instruct",
  messages=[{"role":"user","content":instruccion}],
  temperature=0.7,
  top_p=0.9,
  max_tokens=512,
  stream=True
)

for chunk in completion:
  if chunk.choices[0].delta.content is not None:
    print(chunk.choices[0].delta.content, end="")

print()

NVIDIA: Poder en tu mano

NVIDIA revoluciona la tecnología con soluciones avanzadas en inteligencia artificial, aprendizaje profundo, gaming, y gráficos, llevándola al futuro.


Leer los archivos limpios: 

In [3]:
import pandas as pd 
df = pd.read_json("./data_cleaned.json")
print("Tamaño de muestra: ",df.shape[0])
df.head()

Tamaño de muestra:  230


Unnamed: 0,category_name,blurb,name,backers_count,staff_pick
8,3D Printing,Organize anything with our innovative 3D print...,Thread Boards 2.0 | 3D printable peg boards wi...,2389,True
53,Animation,Alyson confronts her perfectionism as she stru...,Alyssum - A 2D Animated Short Film,75,True
55,Animation,A boy wonders if there might be other people l...,Is There Anyone Out There?,140,True
56,Animation,This will be the world's first feature-length ...,Loving Vincent Film - bring Van Gogh paintings...,796,True
57,Animation,A student animated musical short in which a ha...,Hamstercide: An Animated Musical Short,26,True


In [4]:
categorias_unicas = df['category_name'].value_counts().index.tolist()
print(categorias_unicas)
len(categorias_unicas)

['Anthologies', 'Product Design', 'Art Books', 'Robots', 'Animation', 'Cookbooks', 'Calendars', 'Literary Spaces', 'Music', 'Periodicals', 'Pottery', 'Indie Rock', 'Illustration', 'Comics', 'Drinks', 'Performances', 'Journalism', 'Publishing', 'Public Art', 'Social Practice', 'Stationery', 'Radio & Podcasts', 'Nonfiction', 'Comic Books', 'Audio', 'Fiction', 'Embroidery', 'Classical Music', 'Theater', 'Typography', 'Video Games', 'Electronic Music', 'Games', 'Glass', 'Graphic Novels', 'Gadgets', 'Dance', 'Design', 'Documentary', 'Flight', 'Residencies', 'Sound', 'Shorts', 'Puzzles', "Children's Books", 'Comedy', 'Chiptune', 'Zines', 'Young Adult', 'Webcomics', 'Weaving', 'World Music', 'Print', 'Letterpress', 'Taxidermy', 'Photography', 'Movie Theaters', 'Pop', 'Photobooks', 'Mobile Games', 'Metal', 'Installations', 'Food', 'DIY', 'Ceramics', '3D Printing', 'Architecture', 'Apps', 'Civic Design', 'Childrenswear', 'Footwear', "Farmer's Markets", 'Fashion', 'Country & Folk', 'Conceptual A

111

In [5]:
df.columns

Index(['category_name', 'blurb', 'name', 'backers_count', 'staff_pick'], dtype='object')

## Creación de la base de datos con prompt genérico: 

In [7]:
from openai import OpenAI
import pandas as pd

models_config = {
    "marin/marin-8b-instruct": {
        "params": {
            "temperature": 0.7,
            "top_p": 0.9,
            "max_tokens": 512
        },
        "extract_func": lambda chunk: chunk.choices[0].delta.content
    },
    "deepseek-ai/deepseek-r1": {
        "params": {
            "temperature": 0.6,
            "top_p": 0.7,
            "max_tokens": 4096
        },
        "extract_func": lambda chunk: chunk.choices[0].delta.content
    },
    "qwen/qwen3-235b-a22b": {
        "params": {
            "temperature": 0.2,
            "top_p": 0.7,
            "max_tokens": 8192,
            "extra_body": {"chat_template_kwargs": {"thinking": True}}
        },
        "extract_func": lambda chunk: (
            getattr(chunk.choices[0].delta, "reasoning_content", "") or ""
        ) + (chunk.choices[0].delta.content or "")
    }
}

num_of_samples = len(df)
rows = []

# Iterar sobre modelos con prompt genérico
for model_name, model_config in models_config.items():
    print(f"\nUsando modelo: {model_name}")

    for i in range(num_of_samples):
        try:
            category = df.iloc[i]['category_name']
            name = df.iloc[i]['name']
            original_text = df.iloc[i]['blurb']

            instruction = f"""
Summarize the essence of the brand {category} and the project {name} in a single, compact paragraph, conveying its core idea clearly and directly.
            """

            completion = client.chat.completions.create(
                model=model_name,
                messages=[{"role": "user", "content": instruction}],
                stream=True,
                **model_config["params"]
            )

            full_text = ""
            for chunk in completion:
                fragment = model_config["extract_func"](chunk)
                if fragment:
                    full_text += fragment

            rows.append({
                'category_name': category,
                'original_text': original_text,
                'created_text': full_text,
                'name': name,
                'model_name': model_name,
                'prompt_type': 'generic'  
            })

            if (i % 20) == 0: 
                print(f"Pitch {i+1}/{num_of_samples} generado.")

        except Exception as e:
            print(f"Error con modelo {model_name}, muestra {i+1}: {e}")

# Crear DataFrame final
df_generated = pd.DataFrame(rows)


Usando modelo: marin/marin-8b-instruct
Pitch 1/230 generado.
Pitch 21/230 generado.
Pitch 41/230 generado.
Pitch 61/230 generado.
Pitch 81/230 generado.
Pitch 101/230 generado.
Pitch 121/230 generado.
Pitch 141/230 generado.
Pitch 161/230 generado.
Pitch 181/230 generado.
Pitch 201/230 generado.
Pitch 221/230 generado.

Usando modelo: deepseek-ai/deepseek-r1
Pitch 1/230 generado.
Pitch 21/230 generado.
Pitch 41/230 generado.
Pitch 61/230 generado.
Pitch 81/230 generado.
Pitch 101/230 generado.
Pitch 121/230 generado.
Pitch 141/230 generado.
Pitch 161/230 generado.
Pitch 181/230 generado.
Pitch 201/230 generado.
Pitch 221/230 generado.

Usando modelo: qwen/qwen3-235b-a22b
Pitch 1/230 generado.
Pitch 21/230 generado.
Pitch 41/230 generado.
Pitch 61/230 generado.
Pitch 81/230 generado.
Pitch 101/230 generado.
Pitch 121/230 generado.
Pitch 141/230 generado.
Pitch 161/230 generado.
Pitch 181/230 generado.
Pitch 201/230 generado.
Pitch 221/230 generado.


In [10]:
df_generated.head()
print("Longitud del dataset generado: ", df_generated.shape[0])

Longitud del dataset generado:  690


In [13]:
# Ejemplo de texto creado
df_generated.iloc[0]['created_text']

#Guardar el DataFrame generado en csv
df_generated.to_csv("generic_pitches.csv", index=False)

# Creación de la base de datos con prompt estructurado: 

In [None]:
structured_rows = []  

for model_name, model_config in models_config.items():
    print(f"\nUsando modelo: {model_name}")

    for i in range(num_of_samples):
        try:
            category_name = df.iloc[i]['category_name']
            name = df.iloc[i]['name']
            blurb = df.iloc[i]['blurb']
            backers_count = df.iloc[i]['backers_count']
            staff_pick = df.iloc[i]['staff_pick']
            # Para cambiar la instrucción necesitas dejar todo entre comillas
            # Las variable están dadas entre llaves y las puedes mover de lugar 
            instruction = f"""
            Generate a professional and structured pitch deck summary for a project titled **'{name}'**, which falls under the **'{category_name}'** category. This pitch should be a fluent and engaging extension of the original project description (blurb) provided below.

            Original Project Blurb:
            "{blurb}"

            **Instructions for Generation (Max: 150 words):**

            1. **Structure** (use headings and bullet points if needed):
            - **Problem (1–2 sentences):** Clearly describe the central problem addressed by the project. Include at least 2 keywords or phrases (2–4 words) directly from the blurb.
            - **Solution (2–3 sentences):** Explain how the project '{name}' provides a unique or effective solution. Reuse at least 3 phrases from the blurb and incorporate relevant synonyms to enhance lexical richness.
            - **Value Proposition (1 sentence):** Provide a compelling summary combining key nouns and verbs from the blurb, rephrased with synonyms or grammatical variations (e.g., "connect" → "connection", "automates" → "automation").

            2. **Lexical and Semantic Alignment Guidelines (to optimize similarity metrics):**
            - Aim for **at least 70% lexical overlap** with the blurb.
            - Preserve key **verbs, nouns, and adjectives**, while enriching the language with inflections or morphological variants.
            - Incorporate **multi-word expressions** (n-grams) from the blurb without repetition or redundancy.
            - Ensure the **semantic intent and core meaning remain unchanged**, even with paraphrasing or reordering.

            3. **Style**:
            - Use **natural, persuasive, and concise** language.
            - Avoid overly generic or vague phrases.
            - Ensure coherence and logical flow across sections.
            """

            completion = client.chat.completions.create(
                model=model_name,
                messages=[
                    {
                        "role": "system",
                        "content": "You are a professional pitch deck generator. Your task is to create compelling, structured summaries based on project blurbs, highlighting key challenges, solutions, and unique value propositions. Always aim for clarity, conciseness, and strong lexical alignment with the input."
                    },
                    {
                        "role": "user",
                        "content": instruction
                    }
                ],
                stream=True,
                **model_config["params"]
            )

            full_text = ""
            for chunk in completion:
                fragment = model_config["extract_func"](chunk)
                if fragment:
                    full_text += fragment

            structured_rows.append({
                'category_name': category_name,
                'original_text': blurb,
                'created_text': full_text,
                'name': name,
                'model_name': model_name,
                'prompt_type': 'structured' # Cambiarle a 'structured_n' 
            })

            if (i % 20) == 0: 
                print(f"Pitch {i+1}/{num_of_samples} generado.")

        except Exception as e:
            print(f"Error con modelo {model_name}, muestra {i+1}: {e}")


df_structured = pd.DataFrame(structured_rows) #Le vas a cambiar el nombre a la variable 


Usando modelo: marin/marin-8b-instruct
Pitch 1/230 generado.
Pitch 21/230 generado.
Pitch 41/230 generado.
Pitch 61/230 generado.
Pitch 81/230 generado.
Pitch 101/230 generado.
Pitch 121/230 generado.
Pitch 141/230 generado.
Pitch 161/230 generado.
Pitch 181/230 generado.
Pitch 201/230 generado.
Pitch 221/230 generado.

Usando modelo: deepseek-ai/deepseek-r1
Pitch 1/230 generado.
Pitch 21/230 generado.
Pitch 41/230 generado.
Pitch 61/230 generado.
Pitch 81/230 generado.
Pitch 101/230 generado.
Pitch 121/230 generado.
Pitch 141/230 generado.
Pitch 161/230 generado.
Pitch 181/230 generado.
Pitch 201/230 generado.
Pitch 221/230 generado.

Usando modelo: qwen/qwen3-235b-a22b
Pitch 1/230 generado.
Pitch 21/230 generado.
Pitch 41/230 generado.
Pitch 61/230 generado.
Pitch 81/230 generado.


## Combinando en un solo df el prompt genérico y estructurado: 

In [None]:
dataframes = [df_generated, df_structured]  # Agrega aquí todos los DataFrames que quieras combinar

df_combined = pd.concat(dataframes, axis=0, ignore_index=True) #Agregas todos los archivos de promt. por ejemplo df_structured_1


df_combined = df_combined.sort_values(by=['model_name', 'prompt_type']).reset_index(drop=True)


print(f"DataFrame combinado ({len(df_combined)} filas):")
print(df_combined[['model_name', 'prompt_type', 'name']].head(6))
# Guardar el DataFrame combinado en un archivo CSV
df_combined.to_csv('pitch_decks_combined.csv', index=False)

DataFrame combinado (36 filas):
                model_name prompt_type  \
0  deepseek-ai/deepseek-r1     generic   
1  deepseek-ai/deepseek-r1     generic   
2  deepseek-ai/deepseek-r1     generic   
3  deepseek-ai/deepseek-r1  structured   
4  deepseek-ai/deepseek-r1  structured   
5  deepseek-ai/deepseek-r1  structured   

                                                name  
0  Thread Boards 2.0 | 3D printable peg boards wi...  
1                 Alyssum - A 2D Animated Short Film  
2                         Is There Anyone Out There?  
3  Thread Boards 2.0 | 3D printable peg boards wi...  
4                 Alyssum - A 2D Animated Short Film  
5                         Is There Anyone Out There?  


## Uso y comparación semántica de los textos: 


### Rouge-L: 

In [12]:
from rouge_score import rouge_scorer


rouge_scores = []

for i in range(len(df_combined)): 
    original = df_combined.iloc[i]['original_text']
    created = df_combined.iloc[i]['created_text']

    # Crear el evaluador
    scorer = rouge_scorer.RougeScorer(['rougeL'], use_stemmer=True)

    # Calcular ROUGE-L
    scores = scorer.score(original, created)

    rouge_scores.append({
        'Precision': round(scores['rougeL'].precision, 3),
        'Recall': round(scores['rougeL'].recall, 3),
        'F1': round(scores['rougeL'].fmeasure, 3)
    })

df_combined['Rouge-L-Score'] = rouge_scores


In [13]:
df_combined.head()

Unnamed: 0,category_name,original_text,created_text,name,model_name,prompt_type,Rouge-L-Score
0,3D Printing,Organize anything with our innovative 3D print...,"<think>\nOkay, I need to summarize the essence...",Thread Boards 2.0 | 3D printable peg boards wi...,deepseek-ai/deepseek-r1,generic,"{'Precision': 0.025, 'Recall': 0.474, 'F1': 0...."
1,Animation,Alyson confronts her perfectionism as she stru...,"<think>\nOkay, the user wants a summary of the...",Alyssum - A 2D Animated Short Film,deepseek-ai/deepseek-r1,generic,"{'Precision': 0.012, 'Recall': 0.227, 'F1': 0...."
2,Animation,A boy wonders if there might be other people l...,"<think>\nOkay, the user wants a summary of the...",Is There Anyone Out There?,deepseek-ai/deepseek-r1,generic,"{'Precision': 0.03, 'Recall': 0.346, 'F1': 0.055}"
3,3D Printing,Organize anything with our innovative 3D print...,"<think>\nOkay, let's tackle this query. The us...",Thread Boards 2.0 | 3D printable peg boards wi...,deepseek-ai/deepseek-r1,structured,"{'Precision': 0.022, 'Recall': 1.0, 'F1': 0.043}"
4,Animation,Alyson confronts her perfectionism as she stru...,"<think>\nOkay, let's tackle this. The user wan...",Alyssum - A 2D Animated Short Film,deepseek-ai/deepseek-r1,structured,"{'Precision': 0.026, 'Recall': 0.818, 'F1': 0...."


### BLEU: 

In [14]:
from nltk.translate.bleu_score import sentence_bleu, SmoothingFunction


bleu_scores = []


smooth = SmoothingFunction().method4

for i in range(len(df_combined)):
    original = df_combined.iloc[i]['original_text']
    creado = df_combined.iloc[i]['created_text']

    # Tokenizar
    ref = [original.lower().split()]  # Referencia (lista de listas)
    hyp = creado.lower().split()      # Hipótesis (generado)


    score = sentence_bleu(ref, hyp, smoothing_function=smooth)


    bleu_scores.append(round(score, 3))


df_combined['BLEU'] = bleu_scores

In [15]:
df_combined.head()

Unnamed: 0,category_name,original_text,created_text,name,model_name,prompt_type,Rouge-L-Score,BLEU
0,3D Printing,Organize anything with our innovative 3D print...,"<think>\nOkay, I need to summarize the essence...",Thread Boards 2.0 | 3D printable peg boards wi...,deepseek-ai/deepseek-r1,generic,"{'Precision': 0.025, 'Recall': 0.474, 'F1': 0....",0.004
1,Animation,Alyson confronts her perfectionism as she stru...,"<think>\nOkay, the user wants a summary of the...",Alyssum - A 2D Animated Short Film,deepseek-ai/deepseek-r1,generic,"{'Precision': 0.012, 'Recall': 0.227, 'F1': 0....",0.001
2,Animation,A boy wonders if there might be other people l...,"<think>\nOkay, the user wants a summary of the...",Is There Anyone Out There?,deepseek-ai/deepseek-r1,generic,"{'Precision': 0.03, 'Recall': 0.346, 'F1': 0.055}",0.002
3,3D Printing,Organize anything with our innovative 3D print...,"<think>\nOkay, let's tackle this query. The us...",Thread Boards 2.0 | 3D printable peg boards wi...,deepseek-ai/deepseek-r1,structured,"{'Precision': 0.022, 'Recall': 1.0, 'F1': 0.043}",0.02
4,Animation,Alyson confronts her perfectionism as she stru...,"<think>\nOkay, let's tackle this. The user wan...",Alyssum - A 2D Animated Short Film,deepseek-ai/deepseek-r1,structured,"{'Precision': 0.026, 'Recall': 0.818, 'F1': 0....",0.007


### Embeddings + Similitud coseno: 

In [16]:
from sentence_transformers import SentenceTransformer, util


model = SentenceTransformer('all-MiniLM-L6-v2')

semantic_similarities = []


for i in range(len(df_combined)):
    texto1 = df_combined.iloc[i]['original_text']
    texto2 = df_combined.iloc[i]['created_text']

    # Obtener los embeddings
    embeddings = model.encode([texto1, texto2], convert_to_tensor=True)

    # Calcular similitud de coseno
    similarity = util.pytorch_cos_sim(embeddings[0], embeddings[1]).item()

    semantic_similarities.append(round(similarity, 3))

df_combined['Semantic_Similarity'] = semantic_similarities

  from .autonotebook import tqdm as notebook_tqdm


In [17]:
df_combined.head()

Unnamed: 0,category_name,original_text,created_text,name,model_name,prompt_type,Rouge-L-Score,BLEU,Semantic_Similarity
0,3D Printing,Organize anything with our innovative 3D print...,"<think>\nOkay, I need to summarize the essence...",Thread Boards 2.0 | 3D printable peg boards wi...,deepseek-ai/deepseek-r1,generic,"{'Precision': 0.025, 'Recall': 0.474, 'F1': 0....",0.004,0.678
1,Animation,Alyson confronts her perfectionism as she stru...,"<think>\nOkay, the user wants a summary of the...",Alyssum - A 2D Animated Short Film,deepseek-ai/deepseek-r1,generic,"{'Precision': 0.012, 'Recall': 0.227, 'F1': 0....",0.001,0.13
2,Animation,A boy wonders if there might be other people l...,"<think>\nOkay, the user wants a summary of the...",Is There Anyone Out There?,deepseek-ai/deepseek-r1,generic,"{'Precision': 0.03, 'Recall': 0.346, 'F1': 0.055}",0.002,0.166
3,3D Printing,Organize anything with our innovative 3D print...,"<think>\nOkay, let's tackle this query. The us...",Thread Boards 2.0 | 3D printable peg boards wi...,deepseek-ai/deepseek-r1,structured,"{'Precision': 0.022, 'Recall': 1.0, 'F1': 0.043}",0.02,0.603
4,Animation,Alyson confronts her perfectionism as she stru...,"<think>\nOkay, let's tackle this. The user wan...",Alyssum - A 2D Animated Short Film,deepseek-ai/deepseek-r1,structured,"{'Precision': 0.026, 'Recall': 0.818, 'F1': 0....",0.007,0.434


### Meteor: 


In [18]:
import nltk
from nltk.translate.meteor_score import meteor_score
from nltk.tokenize import word_tokenize

# Descargar recursos necesarios de NLTK 
nltk.download('wordnet')
nltk.download('omw-1.4')
nltk.download('punkt')  # Para tokenización
nltk.download('punkt_tab')

meteor_scores = []

for i in range(len(df_combined)):
    try:
        texto_referencia = df_combined.iloc[i]['original_text']
        texto_hipotesis = df_combined.iloc[i]['created_text']
        
        tokens_ref = word_tokenize(str(texto_referencia).lower())
        tokens_hip = word_tokenize(str(texto_hipotesis).lower())
        
        puntuacion = meteor_score([tokens_ref], tokens_hip)
        meteor_scores.append(round(puntuacion, 3))
    
    except Exception as e:
        print(f"Error en fila {i}: {str(e)}")
        meteor_scores.append(None)  # Añadir None si hay error

df_combined['METEOR_Score'] = meteor_scores


[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\bugy1\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package omw-1.4 to
[nltk_data]     C:\Users\bugy1\AppData\Roaming\nltk_data...
[nltk_data]   Package omw-1.4 is already up-to-date!
[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\bugy1\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package punkt_tab to
[nltk_data]     C:\Users\bugy1\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt_tab is already up-to-date!


In [19]:
df_combined.head()

Unnamed: 0,category_name,original_text,created_text,name,model_name,prompt_type,Rouge-L-Score,BLEU,Semantic_Similarity,METEOR_Score
0,3D Printing,Organize anything with our innovative 3D print...,"<think>\nOkay, I need to summarize the essence...",Thread Boards 2.0 | 3D printable peg boards wi...,deepseek-ai/deepseek-r1,generic,"{'Precision': 0.025, 'Recall': 0.474, 'F1': 0....",0.004,0.678,0.124
1,Animation,Alyson confronts her perfectionism as she stru...,"<think>\nOkay, the user wants a summary of the...",Alyssum - A 2D Animated Short Film,deepseek-ai/deepseek-r1,generic,"{'Precision': 0.012, 'Recall': 0.227, 'F1': 0....",0.001,0.13,0.07
2,Animation,A boy wonders if there might be other people l...,"<think>\nOkay, the user wants a summary of the...",Is There Anyone Out There?,deepseek-ai/deepseek-r1,generic,"{'Precision': 0.03, 'Recall': 0.346, 'F1': 0.055}",0.002,0.166,0.151
3,3D Printing,Organize anything with our innovative 3D print...,"<think>\nOkay, let's tackle this query. The us...",Thread Boards 2.0 | 3D printable peg boards wi...,deepseek-ai/deepseek-r1,structured,"{'Precision': 0.022, 'Recall': 1.0, 'F1': 0.043}",0.02,0.603,0.12
4,Animation,Alyson confronts her perfectionism as she stru...,"<think>\nOkay, let's tackle this. The user wan...",Alyssum - A 2D Animated Short Film,deepseek-ai/deepseek-r1,structured,"{'Precision': 0.026, 'Recall': 0.818, 'F1': 0....",0.007,0.434,0.104


### Desviación estándar

In [None]:
# Calculo de la desviación estándar de las puntuaciones
import numpy as np
std_dev_scores = df_combined[['Rouge-L-Score', 'BLEU', 'Semantic_Similarity', 'METEOR_Score']].std().round(3)
print("\nDesviación estándar de las puntuaciones:")
print(std_dev_scores)
df_combined['std_dev'] = std_dev_scores

In [20]:
# Guardar como CSV 
df_combined.to_csv('pitches_combinados.csv', index=False)

# Resultados


## Genérico 

In [21]:
df_pitches = pd.read_csv("./pitches_combinados.csv")
df_pitches_generic = df_pitches[df_pitches["prompt_type"] == "generic"]
df_pitches_generic.head()

Unnamed: 0,category_name,original_text,created_text,name,model_name,prompt_type,Rouge-L-Score,BLEU,Semantic_Similarity,METEOR_Score
0,3D Printing,Organize anything with our innovative 3D print...,"<think>\nOkay, I need to summarize the essence...",Thread Boards 2.0 | 3D printable peg boards wi...,deepseek-ai/deepseek-r1,generic,"{'Precision': 0.025, 'Recall': 0.474, 'F1': 0....",0.004,0.678,0.124
1,Animation,Alyson confronts her perfectionism as she stru...,"<think>\nOkay, the user wants a summary of the...",Alyssum - A 2D Animated Short Film,deepseek-ai/deepseek-r1,generic,"{'Precision': 0.012, 'Recall': 0.227, 'F1': 0....",0.001,0.13,0.07
2,Animation,A boy wonders if there might be other people l...,"<think>\nOkay, the user wants a summary of the...",Is There Anyone Out There?,deepseek-ai/deepseek-r1,generic,"{'Precision': 0.03, 'Recall': 0.346, 'F1': 0.055}",0.002,0.166,0.151
12,3D Printing,Organize anything with our innovative 3D print...,The brand 3D Printing specializes in creating ...,Thread Boards 2.0 | 3D printable peg boards wi...,marin/marin-8b-instruct,generic,"{'Precision': 0.182, 'Recall': 0.421, 'F1': 0....",0.034,0.726,0.301
13,Animation,Alyson confronts her perfectionism as she stru...,Animation is a dynamic medium that brings imag...,Alyssum - A 2D Animated Short Film,marin/marin-8b-instruct,generic,"{'Precision': 0.026, 'Recall': 0.136, 'F1': 0....",0.004,0.04,0.101


In [None]:
import ast

df_pitches_generic['Rouge-L-Score'] = df_pitches_generic['Rouge-L-Score'].apply(ast.literal_eval)

df_pitches_generic[['Precision', 'Recall', 'F1']] = df_pitches_generic['Rouge-L-Score'].apply(pd.Series)

metrics_columns = ['BLEU', 'Semantic_Similarity', 'METEOR_Score', 'Precision', 'Recall', 'F1']

metrics_avg = df_pitches_generic[metrics_columns].mean()

print("Métricas generales para prompts genéricos: \n",metrics_avg)


BLEU                   0.007889
Semantic_Similarity    0.290778
METEOR_Score           0.152778
Precision              0.049222
Recall                 0.327556
F1                     0.076333
dtype: float64


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_pitches_generic['Rouge-L-Score'] = df_pitches_generic['Rouge-L-Score'].apply(ast.literal_eval)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_pitches_generic[['Precision', 'Recall', 'F1']] = df_pitches_generic['Rouge-L-Score'].apply(pd.Series)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df

### Marin-8b-instruct + promtp genérico

In [None]:
import pandas as pd
import ast  # Para convertir strings a diccionarios (si es necesario)

# Cargar datos
df_nuevo = pd.read_csv("./pitches_combinados.csv")

# Filtrar por modelo y tipo de prompt
df_marin = df_nuevo[
    (df_nuevo["model_name"] == "marin/marin-8b-instruct") & 
    (df_nuevo["prompt_type"] == "generic")
].copy()  # Usar .copy() para evitar SettingWithCopyWarning

# Extraer Precision, Recall y F1 de Rouge-L-Score (si es un diccionario)
if isinstance(df_marin['Rouge-L-Score'].iloc[0], str):
    # Si es un string, convertirlo a diccionario
    df_marin['Rouge-L-Score'] = df_marin['Rouge-L-Score'].apply(ast.literal_eval)

# Crear nuevas columnas a partir del diccionario
df_marin['Rouge_Precision'] = df_marin['Rouge-L-Score'].apply(lambda x: x['Precision'])
df_marin['Rouge_Recall'] = df_marin['Rouge-L-Score'].apply(lambda x: x['Recall'])
df_marin['Rouge_F1'] = df_marin['Rouge-L-Score'].apply(lambda x: x['F1'])

# Calcular promedios
metrics_columns = ['BLEU', 'Semantic_Similarity', 'METEOR_Score', 'Rouge_Precision', 'Rouge_Recall', 'Rouge_F1']
metrics_avg_marin = df_marin[metrics_columns].mean()

print("Métricas de prompt genérico con Marin 8-B instruct: \n")
print(metrics_avg_marin)

BLEU                   0.018333
Semantic_Similarity    0.257000
METEOR_Score           0.200667
Rouge_Precision        0.100333
Rouge_Recall           0.249667
Rouge_F1               0.140667
dtype: float64


### deepseek r1 + prompt genérico 

In [None]:
import pandas as pd
import ast

# Cargar datos
df_nuevo = pd.read_csv("./pitches_combinados.csv")

# Filtrar por modelo y tipo de prompt (¡sin tabulación!)
df_marin = df_nuevo[
    (df_nuevo["model_name"] == "deepseek-ai/deepseek-r1") & 
    (df_nuevo["prompt_type"] == "generic")
].copy()

# Verificar si hay datos
if df_marin.empty:
    print("Error: No hay registros con model_name = 'deepseek-ai/deepseek-r1' y prompt_type = 'generic'.")
else:
    # Extraer métricas de Rouge-L-Score
    if isinstance(df_marin['Rouge-L-Score'].iloc[0], str):
        df_marin['Rouge-L-Score'] = df_marin['Rouge-L-Score'].apply(ast.literal_eval)

    df_marin['Rouge_Precision'] = df_marin['Rouge-L-Score'].apply(lambda x: x['Precision'])
    df_marin['Rouge_Recall'] = df_marin['Rouge-L-Score'].apply(lambda x: x['Recall'])
    df_marin['Rouge_F1'] = df_marin['Rouge-L-Score'].apply(lambda x: x['F1'])

    # Calcular promedios
    metrics_columns = ['BLEU', 'Semantic_Similarity', 'METEOR_Score', 'Rouge_Precision', 'Rouge_Recall', 'Rouge_F1']
    metrics_avg_marin = df_marin[metrics_columns].mean()


    print("Métricas de prompt genérico con deepseek r1: \n")
    print(metrics_avg_marin)

BLEU                   0.002333
Semantic_Similarity    0.324667
METEOR_Score           0.115000
Rouge_Precision        0.022333
Rouge_Recall           0.349000
Rouge_F1               0.041667
dtype: float64


### qwen3 + prompt genérico

In [25]:
import pandas as pd
import ast

# Cargar datos
df_nuevo = pd.read_csv("./pitches_combinados.csv")

# Filtrar por modelo qwen/qwen3-235b-a22b y tipo de prompt generic
df_qwen = df_nuevo[
    (df_nuevo["model_name"] == "qwen/qwen3-235b-a22b") & 
    (df_nuevo["prompt_type"] == "generic")
].copy()

# Verificar si hay datos
if df_qwen.empty:
    print("Error: No hay registros con model_name = 'qwen/qwen3-235b-a22b' y prompt_type = 'generic'.")
    print("Valores únicos en model_name:", df_nuevo["model_name"].unique())
    print("Valores únicos en prompt_type:", df_nuevo["prompt_type"].unique())
else:
    # Extraer métricas de Rouge-L-Score
    if isinstance(df_qwen['Rouge-L-Score'].iloc[0], str):
        df_qwen['Rouge-L-Score'] = df_qwen['Rouge-L-Score'].apply(ast.literal_eval)

    df_qwen['Rouge_Precision'] = df_qwen['Rouge-L-Score'].apply(lambda x: x['Precision'])
    df_qwen['Rouge_Recall'] = df_qwen['Rouge-L-Score'].apply(lambda x: x['Recall'])
    df_qwen['Rouge_F1'] = df_qwen['Rouge-L-Score'].apply(lambda x: x['F1'])

    # Calcular promedios
    metrics_columns = ['BLEU', 'Semantic_Similarity', 'METEOR_Score', 'Rouge_Precision', 'Rouge_Recall', 'Rouge_F1']
    metrics_avg_qwen = df_qwen[metrics_columns].mean()

    print("Métricas promedio para qwen/qwen3-235b-a22b:")
    print(metrics_avg_qwen)

Métricas promedio para qwen/qwen3-235b-a22b:
BLEU                   0.003000
Semantic_Similarity    0.290667
METEOR_Score           0.142667
Rouge_Precision        0.025000
Rouge_Recall           0.384000
Rouge_F1               0.046667
dtype: float64


## Structured 

In [26]:
df_pitches = pd.read_csv("./pitches_combinados.csv")
df_pitches_structured = df_pitches[df_pitches["prompt_type"] == "structured"]
df_pitches_structured.head()

Unnamed: 0,category_name,original_text,created_text,name,model_name,prompt_type,Rouge-L-Score,BLEU,Semantic_Similarity,METEOR_Score
3,3D Printing,Organize anything with our innovative 3D print...,"<think>\nOkay, let's tackle this query. The us...",Thread Boards 2.0 | 3D printable peg boards wi...,deepseek-ai/deepseek-r1,structured,"{'Precision': 0.022, 'Recall': 1.0, 'F1': 0.043}",0.02,0.603,0.12
4,Animation,Alyson confronts her perfectionism as she stru...,"<think>\nOkay, let's tackle this. The user wan...",Alyssum - A 2D Animated Short Film,deepseek-ai/deepseek-r1,structured,"{'Precision': 0.026, 'Recall': 0.818, 'F1': 0....",0.007,0.434,0.104
5,Animation,A boy wonders if there might be other people l...,"<think>\nOkay, let's tackle this. The user wan...",Is There Anyone Out There?,deepseek-ai/deepseek-r1,structured,"{'Precision': 0.039, 'Recall': 1.0, 'F1': 0.075}",0.035,0.465,0.177
15,3D Printing,Organize anything with our innovative 3D print...,## Thread Boards 2.0 | 3D Printable Peg Boards...,Thread Boards 2.0 | 3D printable peg boards wi...,marin/marin-8b-instruct,structured,"{'Precision': 0.107, 'Recall': 0.632, 'F1': 0....",0.019,0.836,0.473
16,Animation,Alyson confronts her perfectionism as she stru...,## Pitch Deck: Alyssum - A 2D Animated Short F...,Alyssum - A 2D Animated Short Film,marin/marin-8b-instruct,structured,"{'Precision': 0.08, 'Recall': 0.545, 'F1': 0.14}",0.041,0.498,0.291


In [27]:
import ast

df_pitches_structured['Rouge-L-Score'] = df_pitches_structured['Rouge-L-Score'].apply(ast.literal_eval)

df_pitches_structured[['Precision', 'Recall', 'F1']] = df_pitches_structured['Rouge-L-Score'].apply(pd.Series)

metrics_columns = ['BLEU', 'Semantic_Similarity', 'METEOR_Score', 'Precision', 'Recall', 'F1']

metrics_avg = df_pitches_structured[metrics_columns].mean()

print(metrics_avg)


BLEU                   0.019111
Semantic_Similarity    0.535333
METEOR_Score           0.202222
Precision              0.046667
Recall                 0.788444
F1                     0.084667
dtype: float64


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_pitches_structured['Rouge-L-Score'] = df_pitches_structured['Rouge-L-Score'].apply(ast.literal_eval)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_pitches_structured[['Precision', 'Recall', 'F1']] = df_pitches_structured['Rouge-L-Score'].apply(pd.Series)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus

### Marin-8b-instruct + prompt structured 

In [28]:
import pandas as pd
import ast  # Para convertir strings a diccionarios (si es necesario)

# Cargar datos
df_nuevo = pd.read_csv("./pitches_combinados.csv")

# Filtrar por modelo y tipo de prompt
df_marin = df_nuevo[
    (df_nuevo["model_name"] == "marin/marin-8b-instruct") & 
    (df_nuevo["prompt_type"] == "structured")
].copy()  # Usar .copy() para evitar SettingWithCopyWarning

# Extraer Precision, Recall y F1 de Rouge-L-Score (si es un diccionario)
if isinstance(df_marin['Rouge-L-Score'].iloc[0], str):
    # Si es un string, convertirlo a diccionario
    df_marin['Rouge-L-Score'] = df_marin['Rouge-L-Score'].apply(ast.literal_eval)

# Crear nuevas columnas a partir del diccionario
df_marin['Rouge_Precision'] = df_marin['Rouge-L-Score'].apply(lambda x: x['Precision'])
df_marin['Rouge_Recall'] = df_marin['Rouge-L-Score'].apply(lambda x: x['Recall'])
df_marin['Rouge_F1'] = df_marin['Rouge-L-Score'].apply(lambda x: x['F1'])

# Calcular promedios
metrics_columns = ['BLEU', 'Semantic_Similarity', 'METEOR_Score', 'Rouge_Precision', 'Rouge_Recall', 'Rouge_F1']
metrics_avg_marin = df_marin[metrics_columns].mean()

print(metrics_avg_marin)

BLEU                   0.022667
Semantic_Similarity    0.621333
METEOR_Score           0.333333
Rouge_Precision        0.084667
Rouge_Recall           0.533333
Rouge_F1               0.146333
dtype: float64


### deepseek r1 + prompt structured

In [29]:
import pandas as pd
import ast

# Cargar datos
df_nuevo = pd.read_csv("./pitches_combinados.csv")

# Filtrar por modelo y tipo de prompt (¡sin tabulación!)
df_marin = df_nuevo[
    (df_nuevo["model_name"] == "deepseek-ai/deepseek-r1") & 
    (df_nuevo["prompt_type"] == "structured")
].copy()

# Verificar si hay datos
if df_marin.empty:
    print("Error: No hay registros con model_name = 'deepseek-ai/deepseek-r1' y prompt_type = 'generic'.")
else:
    # Extraer métricas de Rouge-L-Score
    if isinstance(df_marin['Rouge-L-Score'].iloc[0], str):
        df_marin['Rouge-L-Score'] = df_marin['Rouge-L-Score'].apply(ast.literal_eval)

    df_marin['Rouge_Precision'] = df_marin['Rouge-L-Score'].apply(lambda x: x['Precision'])
    df_marin['Rouge_Recall'] = df_marin['Rouge-L-Score'].apply(lambda x: x['Recall'])
    df_marin['Rouge_F1'] = df_marin['Rouge-L-Score'].apply(lambda x: x['F1'])

    # Calcular promedios
    metrics_columns = ['BLEU', 'Semantic_Similarity', 'METEOR_Score', 'Rouge_Precision', 'Rouge_Recall', 'Rouge_F1']
    metrics_avg_marin = df_marin[metrics_columns].mean()

    print(metrics_avg_marin)

BLEU                   0.020667
Semantic_Similarity    0.500667
METEOR_Score           0.133667
Rouge_Precision        0.029000
Rouge_Recall           0.939333
Rouge_F1               0.056333
dtype: float64


### qwen3 + prompt structured

In [30]:
import pandas as pd
import ast

# Cargar datos
df_nuevo = pd.read_csv("./pitches_combinados.csv")

# Filtrar por modelo qwen/qwen3-235b-a22b y tipo de prompt generic
df_qwen = df_nuevo[
    (df_nuevo["model_name"] == "qwen/qwen3-235b-a22b") & 
    (df_nuevo["prompt_type"] == "structured")
].copy()

# Verificar si hay datos
if df_qwen.empty:
    print("Error: No hay registros con model_name = 'qwen/qwen3-235b-a22b' y prompt_type = 'generic'.")
    print("Valores únicos en model_name:", df_nuevo["model_name"].unique())
    print("Valores únicos en prompt_type:", df_nuevo["prompt_type"].unique())
else:
    # Extraer métricas de Rouge-L-Score
    if isinstance(df_qwen['Rouge-L-Score'].iloc[0], str):
        df_qwen['Rouge-L-Score'] = df_qwen['Rouge-L-Score'].apply(ast.literal_eval)

    df_qwen['Rouge_Precision'] = df_qwen['Rouge-L-Score'].apply(lambda x: x['Precision'])
    df_qwen['Rouge_Recall'] = df_qwen['Rouge-L-Score'].apply(lambda x: x['Recall'])
    df_qwen['Rouge_F1'] = df_qwen['Rouge-L-Score'].apply(lambda x: x['F1'])

    # Calcular promedios
    metrics_columns = ['BLEU', 'Semantic_Similarity', 'METEOR_Score', 'Rouge_Precision', 'Rouge_Recall', 'Rouge_F1']
    metrics_avg_qwen = df_qwen[metrics_columns].mean()

    print("Métricas promedio para qwen/qwen3-235b-a22b:")
    print(metrics_avg_qwen)

Métricas promedio para qwen/qwen3-235b-a22b:
BLEU                   0.014000
Semantic_Similarity    0.484000
METEOR_Score           0.139667
Rouge_Precision        0.026333
Rouge_Recall           0.892667
Rouge_F1               0.051333
dtype: float64
