# LLM Simplification Exploration

This notebook tests the basic connection to Groq and runs a simple text simplification task.

In [None]:
# 1. Setup and Imports
import os
from dotenv import load_dotenv
from groq import Groq

# Load environment variables from .env file
load_dotenv()

api_key = os.getenv("GROQ_API_KEY")

if not api_key:
    print("‚ùå ERROR: GROQ_API_KEY not found in .env")
else:
    print("‚úÖ GROQ_API_KEY found.")

In [None]:
# 2. Initialize Client
if api_key:
    client = Groq(api_key=api_key)
    print("Client initialized.")

In [None]:
# 3. Load Sample Data
sample_path = '../data/samples/sample_en.txt'

if os.path.exists(sample_path):
    with open(sample_path, 'r') as f:
        original_text = f.read().strip()
    print(f"Loaded text ({len(original_text)} chars):\n")
    print("---")
    print(original_text)
    print("---")
else:
    print(f"‚ùå File not found: {sample_path}")
    original_text = "The quick brown fox jumps over the lazy dog."

In [None]:
# 4. Run Simplification
simplified_text = ""

if api_key and original_text:
    try:
        completion = client.chat.completions.create(
            model="llama-3.1-8b-instant",
            messages=[
                {
                    "role": "system",
                    "content": "Simplify the following text for a general audience. Make it easier to read and understand."
                },
                {
                    "role": "user",
                    "content": original_text
                }
            ],
            temperature=0.5,
            max_tokens=1024,
            top_p=1,
            stream=False,
            stop=None,
        )
        
        simplified_text = completion.choices[0].message.content
        print("\n‚ú® Simplified Text:\n")
        print("---")
        print(simplified_text)
        print("---")
        
    except Exception as e:
        print(f"‚ùå Error calling LLM: {e}")

In [None]:
# 5. Evaluate Simplification
def evaluate_text(text):
    if not text: return "No text to evaluate."
    
    try:
        prompt = f"""
        Evaluate the following text for: 
        1. Simplicity (1-10)
        2. Readability (1-10)
        3. Universal Understanding (1-10)
        
        Text: "{text}"
        
        Provide the scores and a brief 1-sentence explanation for each.
        """
        
        completion = client.chat.completions.create(
            model="llama-3.1-8b-instant",
            messages=[{"role": "user", "content": prompt}],
            temperature=0.1
        )
        return completion.choices[0].message.content
        
    except Exception as e:
        return f"Error evaluating: {e}"

if simplified_text:
    print("\nüìä Evaluation Results:\n")
    print(evaluate_text(simplified_text))

# 6. Hugging Face Data Integration

Now testing with real data from the `wiki_lingua` dataset (a standard for simplification).

In [None]:
from datasets import load_dataset
import pandas as pd

try:
    # Load a small slice of the dataset (first 3 examples)
    print("‚è≥ Loading dataset slice (wiki_lingua)...")
    dataset = load_dataset("wiki_lingua", "english", split="train[:3]", trust_remote_code=True)
    
    print(f"‚úÖ Loaded {len(dataset)} examples.")
except Exception as e:
    print(f"‚ùå Error loading dataset: {e}")
    dataset = []

In [None]:
# Run simplification loop
if hasattr(dataset, '__iter__') and len(dataset) > 0:
    for i, item in enumerate(dataset):
        # Extract text (WikiLingua structure is specific, handling basic case)
        try:
             # 'article' is usually the source dict, 'document' is the text list
            input_text = item['article']['document'][0] 
        except:
            input_text = str(item) # Fallback

        print(f"\n--- Example {i+1} ---")
        print(f"üìù Input (snippet): {input_text[:150]}...")
        
        # Simplify
        if api_key:
            try:
                completion = client.chat.completions.create(
                    model="llama-3.1-8b-instant",
                    messages=[
                        {"role": "system", "content": "Simplify this text."}, 
                        {"role": "user", "content": input_text}
                    ],
                    temperature=0.5
                )
                result = completion.choices[0].message.content
                print(f"‚ú® Simplified: {result[:150]}... (check full output in var)")
                
                # Optional: Evaluate (commented out to save tokens/time if needed)
                # score = evaluate_text(result)
                # print(f"üìä {score.splitlines()[0]}...")
                
            except Exception as e:
                print(f"Error: {e}")
else:
    print("Skipping loop (no dataset loaded).")