In [4]:
from openai import OpenAI

# 1. The Secret Password
# REPLACE the text inside the quotes with your actual Groq API key
my_api_key = "ENTER_YOUR_KEY_HERE"
# 2. Waking up the Robot
client = OpenAI(
    base_url="https://api.groq.com/openai/v1",
    api_key=my_api_key
)

# 3. Sending the first message
print("Calling the Groq robot...")

completion = client.chat.completions.create(
  # WE CHANGED THIS LINE BELOW:
  model="llama-3.1-8b-instant", 
  messages=[
    {"role": "user", "content": "Hello! I am building a test lab. Are you ready?"}
  ]
)

# 4. Hearing the answer
print("Groq says:")
print(completion.choices[0].message.content)

Calling the Groq robot...
Groq says:
I'm ready to help. What's the purpose of your test lab, and what kind of infrastructure or equipment are you planning to set up? Are you testing software, networking, security, or something else?


In [6]:
# This is the text we want the robot to read
source_text = """
The Sun is a huge ball of hot, glowing gases at the heart of our solar system. 
Its gravity holds the solar system together, keeping everything â€“ from the biggest planets 
to the smallest particles of debris â€“ in its orbit. The connection and interactions 
between the Sun and Earth drive the seasons, ocean currents, weather, climate, 
radiation belts and auroras. Though it is special to us, there are billions of 
stars like our Sun scattered across the Milky Way galaxy.
"""

print("Homework assignment loaded!")

Homework assignment loaded!


In [8]:
import pandas as pd # This tool makes tables

# 1. Our list of different prompts to test
prompts = [
    "Summarize the text below:",
    "Summarize the text below in exactly one sentence:",
    "Explain the text below to a 5-year-old child:"
]

results = [] # An empty box to put our answers in

print("Starting the experiment... (this might take a few seconds)")

# 2. The Loop (The Machine)
for prompt_style in prompts:
    
    # We glue the instruction (prompt) and the homework (source_text) together
    full_message = prompt_style + "\n\n" + source_text
    
    # We ask the robot
    response = client.chat.completions.create(
        model="llama-3.1-8b-instant",
        messages=[{"role": "user", "content": full_message}]
    )
    
    # We grab just the text answer
    answer_text = response.choices[0].message.content
    
    # We save the result into our box
    results.append({
        "Prompt Style": prompt_style,
        "Robot Answer": answer_text
    })

# 3. Turn the box into a nice table
df = pd.DataFrame(results)

# 4. Show the table
df

Starting the experiment... (this might take a few seconds)


Unnamed: 0,Prompt Style,Robot Answer
0,Summarize the text below:,"The text describes the Sun as a massive, glowi..."
1,Summarize the text below in exactly one sentence:,The Sun is a massive ball of hot gases at the ...
2,Explain the text below to a 5-year-old child:,"Oh boy, let's talk about the Sun. \n\nSo, the ..."


In [10]:
import tiktoken

# 1. Get the counting tool (we use the standard one for now)
# It's like a ruler for measuring words
encoder = tiktoken.get_encoding("cl100k_base")

# 2. Let's make a little machine (function) to count for us
def count_bricks(text):
    # This turns words into a list of numbers (tokens)
    tokens = encoder.encode(text) 
    # Then we count how many numbers are in the list
    return len(tokens)

# Test it out on a simple sentence first
test_sentence = "Hello world!"
print(f"The sentence '{test_sentence}' is {count_bricks(test_sentence)} tokens long.")

The sentence 'Hello world!' is 3 tokens long.


In [12]:
# 1. Apply the counting machine to our 'Robot Answer' column
# This says: "Look at every answer, count the bricks, and put the number in a new column."
df["Token Count"] = df["Robot Answer"].apply(count_bricks)

# 2. Show the fancy new table
df

Unnamed: 0,Prompt Style,Robot Answer,Token Count
0,Summarize the text below:,"The text describes the Sun as a massive, glowi...",66
1,Summarize the text below in exactly one sentence:,The Sun is a massive ball of hot gases at the ...,38
2,Explain the text below to a 5-year-old child:,"Oh boy, let's talk about the Sun. \n\nSo, the ...",166


In [14]:
# This is the Perfect Answer (The Teacher's Key)
reference_summary = "The Sun is a massive star that holds the solar system together with gravity, driving Earth's climate and weather."

print("Teacher's Answer Key loaded!")

Teacher's Answer Key loaded!


In [16]:
from rouge_score import rouge_scorer

# 1. Turn on the grading machine
# 'rouge1' checks for matching words
# 'rougeL' checks if the sentence flows in the same order
scorer = rouge_scorer.RougeScorer(['rouge1', 'rougeL'], use_stemmer=True)

# 2. Let's make a grading function
def grade_assignment(robot_text):
    # Calculate the score
    scores = scorer.score(reference_summary, robot_text)
    
    # We want the "F-Measure" (a balanced score) for ROUGE-1
    # We multiply by 100 to make it look like a percentage (0 to 100)
    grade = scores['rouge1'].fmeasure * 100
    return round(grade, 1)

# Test it!
print(f"Grade for a dummy answer: {grade_assignment('The Sun is a star.')}/100")

Grade for a dummy answer: 40.0/100


In [18]:
# 1. Apply the grading machine to the 'Robot Answer' column
df["Grade (ROUGE)"] = df["Robot Answer"].apply(grade_assignment)

# 2. Show the final report card
df

Unnamed: 0,Prompt Style,Robot Answer,Token Count,Grade (ROUGE)
0,Summarize the text below:,"The text describes the Sun as a massive, glowi...",66,43.6
1,Summarize the text below in exactly one sentence:,The Sun is a massive ball of hot gases at the ...,38,51.9
2,Explain the text below to a 5-year-old child:,"Oh boy, let's talk about the Sun. \n\nSo, the ...",166,13.3


In [20]:
import re # The Regex tool

# 1. Let's build a Vacuum Cleaner function
def clean_up_mess(text):
    # This command says: "Find 'Oh boy,' and replace it with NOTHING."
    text = re.sub(r"Oh boy,", "", text)
    
    # This says: "Find 'Here is the summary:' and destroy it."
    text = re.sub(r"Here is a summary:", "", text)
    
    # This says: "Find all the \n symbols and replace them with a space."
    text = text.replace("\n", " ")
    
    # .strip() cuts off any empty spaces at the very start or end
    return text.strip()

# 2. Let's test the vacuum on a messy sentence
messy_sentence = "Oh boy, \n\n Here is a summary: The Sun is hot."
clean_sentence = clean_up_mess(messy_sentence)

print(f"Messy: {messy_sentence}")
print(f"Clean: {clean_sentence}")

Messy: Oh boy, 

 Here is a summary: The Sun is hot.
Clean: The Sun is hot.


In [22]:
# 1. Create a new column called 'Clean Answer'
# We tell the vacuum to run on every row in 'Robot Answer'
df["Clean Answer"] = df["Robot Answer"].apply(clean_up_mess)

# 2. Grade the CLEAN answers
df["New Grade"] = df["Clean Answer"].apply(grade_assignment)

# 3. Show the table again to compare
# We will just look at the Prompt, the Old Grade, and the New Grade
df[["Prompt Style", "Grade (ROUGE)", "New Grade"]]

Unnamed: 0,Prompt Style,Grade (ROUGE),New Grade
0,Summarize the text below:,43.6,43.6
1,Summarize the text below in exactly one sentence:,51.9,51.9
2,Explain the text below to a 5-year-old child:,13.3,13.5


In [24]:
import time
from nltk.translate.bleu_score import sentence_bleu

# 1. The Stopwatch Setup
def measure_speed_and_ask(prompt, text):
    start_time = time.time() # Click the stopwatch ON
    
    # Ask the robot
    full_message = prompt + "\n\n" + text
    response = client.chat.completions.create(
        model="llama-3.1-8b-instant",
        messages=[{"role": "user", "content": full_message}]
    )
    
    end_time = time.time() # Click the stopwatch OFF
    
    # Calculate time taken
    speed = end_time - start_time
    return response.choices[0].message.content, speed

# 2. The Final Experiment Loop
final_results = []

print("Running the Final Grand Experiment... ðŸ§ª")

for prompt_style in prompts:
    # A. Run the Stopwatch and get answer
    answer, speed_seconds = measure_speed_and_ask(prompt_style, source_text)
    
    # B. Clean the answer (Vacuum)
    clean_answer = clean_up_mess(answer)
    
    # C. Count Tokens (Lego bricks)
    tokens = count_bricks(clean_answer)
    
    # D. Grade with ROUGE (The Nice Judge)
    rouge_grade = grade_assignment(clean_answer)
    
    # E. Grade with BLEU (The Strict Judge)
    # BLEU is tricky, it wants lists of words, so we split the sentences
    ref_split = reference_summary.split()
    ans_split = clean_answer.split()
    bleu_score = sentence_bleu([ref_split], ans_split) * 100 # Make it a percentage
    
    # F. Save EVERYTHING
    final_results.append({
        "Prompt": prompt_style,
        "Speed (Seconds)": round(speed_seconds, 2), # Round to 2 decimal places
        "Tokens": tokens,
        "ROUGE Score": rouge_grade,
        "BLEU Score": round(bleu_score, 1)
    })

# 3. Create the Super Table
final_df = pd.DataFrame(final_results)
final_df

Running the Final Grand Experiment... ðŸ§ª


The hypothesis contains 0 counts of 3-gram overlaps.
Therefore the BLEU score evaluates to 0, independently of
how many N-gram overlaps of lower order it contains.
Consider using lower n-gram order or use SmoothingFunction()
The hypothesis contains 0 counts of 4-gram overlaps.
Therefore the BLEU score evaluates to 0, independently of
how many N-gram overlaps of lower order it contains.
Consider using lower n-gram order or use SmoothingFunction()


Unnamed: 0,Prompt,Speed (Seconds),Tokens,ROUGE Score,BLEU Score
0,Summarize the text below:,0.44,60,44.4,0.0
1,Summarize the text below in exactly one sentence:,0.18,47,44.4,9.5
2,Explain the text below to a 5-year-old child:,0.46,203,15.1,0.0
