In [None]:
from openai import OpenAI
from dotenv import load_dotenv

load_dotenv()
client = OpenAI()

print("Penalties Parameter Explorer Ready!")

In [None]:

prompt = "Write 4 lines starting with the word AI."

print("Penalties Experiment")
print(f"Prompt: '{prompt}'\n")

for freq_penalty, pres_penalty in [(0.0, 0.0), (0.8, 0.0), (0.0, 0.8), (0.8, 0.8)]:
    print(f"⚖️ Frequency: {freq_penalty}, Presence: {pres_penalty}")
    
    response = client.chat.completions.create(
        model="gpt-4o-mini",
        messages=[{"role": "user", "content": prompt}],
        temperature=0.7,
        frequency_penalty=freq_penalty,
        presence_penalty=pres_penalty,
        max_tokens=150
    )
    
    print(f"Result: {response.choices[0].message.content}")
    print()

In [None]:

import re
from collections import Counter


results = {
    "example_config": {
        "config": {"frequency_penalty": 0.0, "presence_penalty": 0.0},
        "runs": [
            "AI is amazing.\nAI can learn.\nAI helps people.\nAI is the future.",
            "AI is everywhere.\nAI is powerful.\nAI is changing the world.\nAI is here."
        ]
    }
}

def analyze_repetition(text):
    """Analyze word repetition in text"""
    words = re.findall(r'\b\w+\b', text.lower())
    word_counts = Counter(words)
    
    total_words = len(words)
    unique_words = len(word_counts)
    
    
    repetition_rate = 1 - (unique_words / total_words) if total_words > 0 else 0
    most_common = word_counts.most_common(3)
    
    return {
        'total_words': total_words,
        'unique_words': unique_words,
        'repetition_rate': repetition_rate,
        'diversity_score': unique_words / total_words if total_words > 0 else 0,
        'most_common': most_common,
        'word_counts': word_counts
    }

def count_ai_starts(text):
    """Count lines starting with 'AI'"""
    lines = text.strip().split('\n')
    ai_starts = sum(1 for line in lines if line.strip().lower().startswith('ai'))
    return ai_starts, len(lines)


analysis_results = {}

print("DETAILED ANALYSIS")
print("="*70)

for config_name, data in results.items():
    print(f"\n🔍 Configuration: {config_name.upper()}")
    print("-" * 40)
    
    config_analysis = {
        'config': data['config'],
        'runs_analysis': [],
        'avg_metrics': {}
    }
    
    all_repetition_rates = []
    all_diversity_scores = []
    all_ai_counts = []
    all_word_counts = []
    
    for i, run_text in enumerate(data['runs']):
        if not run_text.startswith('Error'):
            analysis = analyze_repetition(run_text)
            ai_count, total_lines = count_ai_starts(run_text)
            
            run_analysis = {
                'run_number': i + 1,
                'text': run_text,
                **analysis,
                'ai_starts': ai_count,
                'total_lines': total_lines,
                'ai_compliance': ai_count / total_lines if total_lines > 0 else 0
            }
            
            config_analysis['runs_analysis'].append(run_analysis)
            
            all_repetition_rates.append(analysis['repetition_rate'])
            all_diversity_scores.append(analysis['diversity_score'])
            all_ai_counts.append(ai_count)
            all_word_counts.append(analysis['total_words'])
            
            print(f"Run {i+1}:")
            print(f"  Words: {analysis['total_words']} total, {analysis['unique_words']} unique")
            print(f"  Diversity: {analysis['diversity_score']:.3f}")
            print(f"  AI starts: {ai_count}/{total_lines} lines")
            print(f"  Most common: {analysis['most_common'][:2]}")
    
    
    if all_repetition_rates:
        config_analysis['avg_metrics'] = {
            'avg_repetition_rate': sum(all_repetition_rates) / len(all_repetition_rates),
            'avg_diversity_score': sum(all_diversity_scores) / len(all_diversity_scores),
            'avg_ai_compliance': sum(all_ai_counts) / sum(all_ai_counts + [1]) if all_ai_counts else 0,
            'avg_word_count': sum(all_word_counts) / len(all_word_counts)
        }
        
        print(f"\nAverages:")
        print(f"  Repetition Rate: {config_analysis['avg_metrics']['avg_repetition_rate']:.3f}")
        print(f"  Diversity Score: {config_analysis['avg_metrics']['avg_diversity_score']:.3f}")
        print(f"  Word Count: {config_analysis['avg_metrics']['avg_word_count']:.1f}")
    
    analysis_results[config_name] = config_analysis

print(f"\n{'='*70}")
print("ANALYSIS COMPLETE")
print('='*70)