# Using OPENAPI - GPT 4.0 mini 

In [28]:
# Import required libraries
import os
import re
from pathlib import Path
import pandas as pd
from tqdm import tqdm
import warnings
warnings.filterwarnings('ignore')
base_path = Path("Documents")
years = ["2020_inf_profile", "2021_inf_profile", 
         "2022_inf_profile", "2023_inf_profile", "2024_inf_profile"]

In [29]:
import pandas as pd
import os
from pathlib import Path
def combine_year_texts():
    # Create empty list to store year, race and text data
    race_texts = []
    
    # Iterate through each year folder
    for year in years:
        year_path = base_path / year
        
        if year_path.exists():
            # Get all txt files in the year folder
            txt_files = list(year_path.glob('*.txt'))
            
            # Process each race document separately
            for txt_file in txt_files:
                try:
                    with open(txt_file, 'r', encoding='utf-8') as f:
                        # Extract race name from filename
                        race_name = txt_file.stem.split('-')[0].strip()
                        
                        # Add as separate row with year, race and content
                        race_texts.append({
                            'year': year.split('-')[0],  # Extract year from folder name
                            'race': race_name,
                            'text': f.read().strip()  # Remove extra whitespace
                        })
                except Exception as e:
                    print(f"Error reading {txt_file}: {e}")
    
    # Create DataFrame with columns for year, race and text
    df = pd.DataFrame(race_texts)
    
    # Sort by year and race
    df = df.sort_values(['year', 'race'])
    
    # Save to CSV
    df.to_csv('goldstandard/group.csv', index=False)
    print("Created group.csv with documents organized by year and race")

# Run the function
combine_year_texts()


Created group.csv with documents organized by year and race


In [30]:
import os
from dotenv import load_dotenv
import pandas as pd
from tqdm import tqdm
from openai import OpenAI
import requests
import json

# Load environment variables
load_dotenv()

# Initialize model clients
models_config = {
    'gpt-4o-mini': {
        'provider': 'openai',
        'client': OpenAI(api_key=os.getenv('OPENAI_API_KEY')),
        'model_name': 'gpt-4o-mini',
        'level1_model': 'gpt-4o-mini',
        'level2_model': 'gpt-4o',
        'level3_model': 'gpt-4o'
    },
    'groq-llama': {
        'provider': 'groq',
        'client': OpenAI(
            api_key=os.getenv('GROQ_API_KEY'),
            base_url="https://api.groq.com/openai/v1"
        ),
        'model_name': 'llama-3.3-70b-versatile"',
        'level1_model': 'llama-3.3-70b-versatile',
        'level2_model': 'llama-3.3-70b-versatile',
        'level3_model': 'llama-3.3-70b-versatile'
    }
}

print("✓ Multiple model clients initialized")
print(f"\nAvailable models:")
for model_key in models_config.keys():
    print(f"  - {model_key}")

✓ Multiple model clients initialized

Available models:
  - gpt-4o-mini
  - groq-llama


In [35]:
def call_model(model_config, system_message, user_message, temperature=0.3, level='level1'):
    """
    Universal function to call different model APIs
    """
    provider = model_config['provider']
    
    try:
        if provider == 'openai' or provider == 'groq':
            # OpenAI-compatible API (OpenAI, Groq)
            client = model_config['client']
            model_name = model_config[f'{level}_model']
            
            response = client.chat.completions.create(
                model=model_name,
                messages=[
                    {"role": "system", "content": system_message},
                    {"role": "user", "content": user_message}
                ],
                temperature=temperature
            )
            return response.choices[0].message.content
            
            if response.status_code == 200:
                return response.json()['message']['content']
            else:
                raise Exception(f"Ollama API error: {response.status_code}")
                
    except Exception as e:
        raise Exception(f"Error calling {provider} model: {str(e)}")

print("✓ Universal model calling function created")

✓ Universal model calling function created


In [None]:
# def process_all_levels(model_key, model_config, df, chunk_size=8):
#     """
#     Process all 3 levels of summarization for a given model
#     """
#     print(f"\n{'='*60}")
#     print(f"PROCESSING MODEL: {model_key}")
#     print(f"{'='*60}\n")
    
#     chunk_summaries = []
#     year_summaries = []
#     final_summary = None
    
#     # Group by year
#     grouped_by_year = df.groupby('year')
    
#     # LEVEL 1: Chunk Summaries
#     print("Level 1: Generating chunk summaries...")
#     for year, group in tqdm(grouped_by_year, desc=f"[{model_key}] Years"):
#         races = group['text'].tolist()
#         race_chunks = [races[i:i + chunk_size] for i in range(0, len(races), chunk_size)]
        
#         for i, chunk in enumerate(race_chunks):
#             chunk_text = " ".join(chunk)
            
#             system_msg = "You are a data analyst extracting statistics from F1 steward decisions. Provide only factual counts and percentages, no interpretations."
            
#             user_msg = f"""Analyze these F1 stewards' decisions from {year} (races {i*chunk_size+1}-{min((i+1)*chunk_size, len(races))}) and create a DATA-DRIVEN summary (exactly 100 words):

# {chunk_text}

# Required format:
# "This chunk covers [#] incidents. Infraction breakdown: [type]: [#] cases ([%]%), [type]: [#] cases ([%]%). Driver involvement: Driver [#] had [#] incidents, Driver [#] had [#]. Penalties: [#] time penalties, [#] fines, [#] reprimands, [#] no action. Sessions: [#] race, [#] qualifying, [#] practice. Common violations: [list top 3 with counts]."

# Include ONLY factual counts and percentages. No interpretations."""
            
#             try:
#                 summary = call_model(model_config, system_msg, user_msg, temperature=0.3, level='level1')
#                 chunk_summaries.append({
#                     'year': year,
#                     'chunk': i+1,
#                     'summary': summary,
#                     'model': model_key
#                 })
#             except Exception as e:
#                 print(f"  ✗ Error: {year} chunk {i+1}: {e}")
    
#     print(f"  ✓ Level 1 complete: {len(chunk_summaries)} chunks")
    
#     # LEVEL 2: Yearly Summaries
#     print("\nLevel 2: Generating yearly summaries...")
#     chunk_df = pd.DataFrame(chunk_summaries)
#     grouped_chunks = chunk_df.groupby('year')
    
#     for year, chunks in tqdm(grouped_chunks, desc=f"[{model_key}] Yearly"):
#         combined_chunks = " ".join(chunks['summary'].tolist())
        
#         system_msg = "You are a statistical analyst creating data-rich narratives from F1 incident data."
        
#         user_msg = f"""Combine these chunk summaries into a comprehensive {year} statistical summary (exactly 200 words):

# {combined_chunks}

# Required narrative format with embedded statistics:
# "In {year}, Mercedes accumulated [#] total incidents across [#] races. The infraction distribution showed [type] as the leading category with [#] incidents ([%]%), followed by [type] at [#] incidents ([%]%), and [type] with [#] incidents ([%]%). Driver [#] was involved in [#] incidents ([%]% of yearly total), while Driver [#] accounted for [#] incidents ([%]%). Penalty-wise, [#]% resulted in time penalties, [#]% in fines, [#]% in reprimands, and [#]% received no action. Session analysis revealed [#]% occurred during races, [#]% in qualifying, [#]% in practice. The most frequent violations were [list top 3 with counts]. Monthly distribution peaked in [month] with [#] incidents."

# Use only statistics from chunk summaries. Write as flowing narrative with embedded numbers. No bullet points or headers."""
        
#         try:
#             summary = call_model(model_config, system_msg, user_msg, temperature=0.3, level='level2')
#             year_summaries.append({
#                 'year': year,
#                 'summary': summary,
#                 'model': model_key
#             })
#         except Exception as e:
#             print(f"  ✗ Error: {year}: {e}")
    
#     print(f"  ✓ Level 2 complete: {len(year_summaries)} years")
    
#     # LEVEL 3: Final Summary
#     print("\nLevel 3: Generating final summary...")
#     all_year_summaries = " ".join([ys['summary'] for ys in year_summaries])
    
#     system_msg = "You are creating a concise statistical profile across multiple years of F1 data."
    
#     user_msg = f"""Create a comprehensive 2020-2024 Mercedes infringement profile (200-250 words) as a statistical narrative:

# {all_year_summaries}

# Write as continuous flowing paragraphs:

# "Over 2020-2024, Mercedes accumulated [total #] FIA infractions. The distribution showed [year] with [#] incidents ([%]%), [year] with [#] ([%]%), through [year] with [#] ([%]%). The leading infraction category was [type] with [#] cases ([%]%), followed by [type] at [%] and [type] at [%]. 

# Penalty-wise, [%]% resulted in time penalties, [%]% in fines, [%]% in reprimands, and [%]% received no action. 

# Driver analysis revealed that in 2020 Driver [#] caused [%]% of incidents, in 2021 Driver [#] caused [%]%, continuing through 2024. Overall, Driver [#] accumulated [#] infractions including [#] [type], [#] [type], while Driver [#] had [#] total with [breakdown]. 

# The peak year was [year] with [#] infractions, averaging [#] per year across the period. Session distribution showed [%]% in races, [%]% in qualifying, [%]% in practice. The trend from 2020 to 2024 showed a [%]% [increase/decrease]."

# CRITICAL: 
# - Exactly 200-250 words
# - Continuous narrative paragraphs, no bullet points
# - Embed ALL key statistics naturally
# - No strategic insights or interpretations
# - Pure data storytelling"""
    
#     try:
#         final_summary = call_model(model_config, system_msg, user_msg, temperature=0.3, level='level3')
#         print(f"  ✓ Level 3 complete")
#     except Exception as e:
#         print(f"  ✗ Error: {e}")
    
#     return chunk_summaries, year_summaries, final_summary

# print("✓ Main processing function created")

✓ Main processing function created


In [None]:
# def process_all_levels(model_key, model_config, df, chunk_size=8):
#     """
#     Process all 3 levels of summarization for a given model
#     """
#     print(f"\n{'='*60}")
#     print(f"PROCESSING MODEL: {model_key}")
#     print(f"{'='*60}\n")
    
#     chunk_summaries = []
#     year_summaries = []
#     final_summary = None
    
#     # Group by year
#     grouped_by_year = df.groupby('year')
    
#     # LEVEL 1: Chunk Summaries
#     print("Level 1: Generating chunk summaries...")
#     for year, group in tqdm(grouped_by_year, desc=f"[{model_key}] Years"):
#         num_rows = len(group)
        
#         # Chunk the DATAFRAME, not just text
#         for i in range(0, num_rows, chunk_size):
#             chunk_df = group.iloc[i:i + chunk_size]
            
#             # ACTUAL count from dataframe rows
#             actual_count = len(chunk_df)
            
#             # Combine text for LLM context
#             chunk_text = " ".join(chunk_df['text'].tolist())
            
#             system_msg = "You are a data analyst summarizing F1 steward decisions. Use the provided statistics."
            
#             user_msg = f"""Summarize these {year} F1 steward decisions (exactly 100 words):

# VERIFIED COUNT: {actual_count} infringement decisions in this chunk (DO NOT COUNT YOURSELF)

# TEXT FOR ANALYSIS:
# {chunk_text}

# Write a summary following this structure:
# "This chunk contains {actual_count} infringement decisions. [Analyze the text to describe]: Infraction type breakdown with approximate counts. Drivers involved with frequencies. Penalty types distribution. Session breakdown. Common violation patterns."

# CRITICAL: Use the exact number {actual_count} for total infractions. Extract other details by analyzing the text content."""
            
#             try:
#                 summary = call_model(model_config, system_msg, user_msg, temperature=0.2, level='level1')
#                 chunk_summaries.append({
#                     'year': year,
#                     'chunk': (i // chunk_size) + 1,
#                     'summary': summary,
#                     'model': model_key,
#                     'verified_count': actual_count  # Store for validation
#                 })
#                 print(f"  ✓ {year} chunk {(i // chunk_size) + 1}: {actual_count} infractions")
#             except Exception as e:
#                 print(f"  ✗ Error: {year} chunk {(i // chunk_size) + 1}: {e}")
    
#     print(f"  ✓ Level 1 complete: {len(chunk_summaries)} chunks")
    
#     # Validate counts
#     chunk_df = pd.DataFrame(chunk_summaries)
#     if 'verified_count' in chunk_df.columns:
#         print(f"\n  Validation:")
#         for year in chunk_df['year'].unique():
#             year_chunks = chunk_df[chunk_df['year'] == year]
#             total_from_chunks = year_chunks['verified_count'].sum()
#             actual_from_data = len(df[df['year'] == year])
#             match = "✓" if total_from_chunks == actual_from_data else "✗"
#             print(f"    {year}: {total_from_chunks} (chunks) vs {actual_from_data} (data) {match}")
    
#     # LEVEL 2: Yearly Summaries
#     print("\nLevel 2: Generating yearly summaries...")
#     chunk_df = pd.DataFrame(chunk_summaries)
#     grouped_chunks = chunk_df.groupby('year')
    
#     for year, chunks in tqdm(grouped_chunks, desc=f"[{model_key}] Yearly"):
#         # Get total verified count for this year
#         year_total = chunks['verified_count'].sum() if 'verified_count' in chunks.columns else 'unknown'
        
#         combined_chunks = " ".join(chunks['summary'].tolist())
        
#         system_msg = "You are a statistical analyst creating data-rich narratives from F1 incident data."
        
#         user_msg = f"""Combine these chunk summaries into a comprehensive {year} statistical summary (exactly 200 words):

# VERIFIED TOTAL FOR {year}: {year_total} total infringement decisions

# CHUNK SUMMARIES:
# {combined_chunks}

# Required narrative format with embedded statistics:
# "In {year}, Mercedes accumulated {year_total} total infringement decisions. The infraction distribution showed [type] as the leading category with [#] incidents ([%]%), followed by [type] at [#] incidents ([%]%), and [type] with [#] incidents ([%]%). Driver [#] was involved in [#] incidents ([%]% of yearly total), while Driver [#] accounted for [#] incidents ([%]%). Penalty-wise, [#]% resulted in time penalties, [#]% in fines, [#]% in reprimands, and [#]% received no action. Session analysis revealed [#]% occurred during races, [#]% in qualifying, [#]% in practice. The most frequent violations were [list top 3 with counts]."

# Use EXACT total {year_total}. Extract other statistics from chunk summaries. Write as flowing narrative with embedded numbers. No bullet points."""
        
#         try:
#             summary = call_model(model_config, system_msg, user_msg, temperature=0.2, level='level2')
#             year_summaries.append({
#                 'year': year,
#                 'summary': summary,
#                 'model': model_key,
#                 'verified_count': year_total
#             })
#         except Exception as e:
#             print(f"  ✗ Error: {year}: {e}")
    
#     print(f"  ✓ Level 2 complete: {len(year_summaries)} years")
    
#     # LEVEL 3: Final Summary
#     print("\nLevel 3: Generating final summary...")
    
#     # Calculate grand total
#     grand_total = sum([ys['verified_count'] for ys in year_summaries if isinstance(ys['verified_count'], (int, float))])
    
#     all_year_summaries = " ".join([ys['summary'] for ys in year_summaries])
    
#     system_msg = "You are creating a concise statistical profile across multiple years of F1 data."
    
#     user_msg = f"""Create a comprehensive 2020-2024 Mercedes infringement profile (200-250 words) as a statistical narrative:

# VERIFIED GRAND TOTAL: {grand_total} total infringement decisions across 2020-2024

# YEARLY SUMMARIES:
# {all_year_summaries}

# Write as continuous flowing paragraphs:

# "Over 2020-2024, Mercedes accumulated {grand_total} FIA infringement decisions. The distribution showed [year] with [#] incidents ([%]%), [year] with [#] ([%]%), through [year] with [#] ([%]%). The leading infraction category was [type] with [#] cases ([%]%), followed by [type] at [%] and [type] at [%]. 

# Penalty-wise, [%]% resulted in time penalties, [%]% in fines, [%]% in reprimands, and [%]% received no action. 

# Driver analysis revealed that in 2020 Driver [#] caused [%]% of incidents, in 2021 Driver [#] caused [%]%, continuing through 2024. Overall, Driver [#] accumulated [#] infractions including [#] [type], [#] [type], while Driver [#] had [#] total with [breakdown]. 

# The peak year was [year] with [#] infractions, averaging [#] per year across the period. Session distribution showed [%]% in races, [%]% in qualifying, [#]% in practice. The trend from 2020 to 2024 showed a [%]% [increase/decrease]."

# CRITICAL: 
# - Use EXACT total {grand_total}
# - Exactly 200-250 words
# - Continuous narrative paragraphs, no bullet points
# - Embed statistics naturally
# - Pure data storytelling"""
    
#     try:
#         final_summary = call_model(model_config, system_msg, user_msg, temperature=0.2, level='level3')
#         print(f"  ✓ Level 3 complete")
#     except Exception as e:
#         print(f"  ✗ Error: {e}")
    
#     return chunk_summaries, year_summaries, final_summary

# print("✓ Main processing function updated with accurate counting")

✓ Main processing function updated with accurate counting


In [40]:
def process_all_levels(model_key, model_config, df, chunk_size=8):
    """
    Process all 3 levels of summarization for a given model
    """
    print(f"\n{'='*60}")
    print(f"PROCESSING MODEL: {model_key}")
    print(f"{'='*60}\n")
    
    chunk_summaries = []
    year_summaries = []
    final_summary = None
    
    # Group by year
    grouped_by_year = df.groupby('year')
    
    # LEVEL 1: Chunk Summaries
    print("Level 1: Generating chunk summaries...")
    for year, group in tqdm(grouped_by_year, desc=f"[{model_key}] Years"):
        num_rows = len(group)
        
        # Chunk the DATAFRAME, not just text
        for i in range(0, num_rows, chunk_size):
            chunk_df = group.iloc[i:i + chunk_size]
            
            # ACTUAL count from dataframe rows
            actual_count = len(chunk_df)
            
            # Combine text for LLM context
            chunk_text = " ".join(chunk_df['text'].tolist())
            
            system_msg = "You are a data analyst summarizing F1 steward decisions. Use the provided statistics."
            
            user_msg = f"""Summarize these {year} F1 steward decisions (exactly 100 words):

VERIFIED COUNT: {actual_count} infringement decisions in this chunk (DO NOT COUNT YOURSELF)

TEXT FOR ANALYSIS:
{chunk_text}

Write a summary following this structure:
"This chunk contains {actual_count} infringement decisions. [Analyze the text to describe]: Infraction type breakdown with approximate counts. Drivers involved with frequencies. Penalty types distribution. Session breakdown. Common violation patterns."

CRITICAL: Use the exact number {actual_count} for total infractions. Extract other details by analyzing the text content."""
            
            try:
                summary = call_model(model_config, system_msg, user_msg, temperature=0.2, level='level1')
                chunk_summaries.append({
                    'year': year,
                    'chunk': (i // chunk_size) + 1,
                    'summary': summary,
                    'model': model_key,
                    'verified_count': actual_count  # Store for validation
                })
                print(f"  ✓ {year} chunk {(i // chunk_size) + 1}: {actual_count} infractions")
            except Exception as e:
                print(f"  ✗ Error: {year} chunk {(i // chunk_size) + 1}: {e}")
    
    print(f"  ✓ Level 1 complete: {len(chunk_summaries)} chunks")
    
    # Validate counts
    chunk_df_validation = pd.DataFrame(chunk_summaries)
    if 'verified_count' in chunk_df_validation.columns:
        print(f"\n  Validation:")
        for year in chunk_df_validation['year'].unique():
            year_chunks = chunk_df_validation[chunk_df_validation['year'] == year]
            total_from_chunks = year_chunks['verified_count'].sum()
            actual_from_data = len(df[df['year'] == year])
            match = "✓" if total_from_chunks == actual_from_data else "✗"
            print(f"    {year}: {total_from_chunks} (chunks) vs {actual_from_data} (data) {match}")
    
    # LEVEL 2: Yearly Summaries
    print("\nLevel 2: Generating yearly summaries...")
    chunk_df = pd.DataFrame(chunk_summaries)
    grouped_chunks = chunk_df.groupby('year')
    
    for year, chunks in tqdm(grouped_chunks, desc=f"[{model_key}] Yearly"):
        # Get total verified count for this year
        year_total = int(chunks['verified_count'].sum()) if 'verified_count' in chunks.columns else 0
        
        combined_chunks = " ".join(chunks['summary'].tolist())
        
        system_msg = "You are a statistical analyst creating data-rich narratives from F1 incident data."
        
        user_msg = f"""Combine these chunk summaries into a comprehensive {year} statistical summary (exactly 200 words):

VERIFIED TOTAL FOR {year}: {year_total} total infringement decisions

CHUNK SUMMARIES:
{combined_chunks}

Required narrative format with embedded statistics:
"In {year}, Mercedes accumulated {year_total} total infringement decisions. The infraction distribution showed [type] as the leading category with [#] incidents ([%]%), followed by [type] at [#] incidents ([%]%), and [type] with [#] incidents ([%]%). Driver [#] was involved in [#] incidents ([%]% of yearly total), while Driver [#] accounted for [#] incidents ([%]%). Penalty-wise, [#]% resulted in time penalties, [#]% in fines, [#]% in reprimands, and [#]% received no action. Session analysis revealed [#]% occurred during races, [#]% in qualifying, [#]% in practice. The most frequent violations were [list top 3 with counts]."

Use EXACT total {year_total}. Extract other statistics from chunk summaries. Write as flowing narrative with embedded numbers. No bullet points."""
        
        try:
            summary = call_model(model_config, system_msg, user_msg, temperature=0.2, level='level2')
            year_summaries.append({
                'year': year,
                'summary': summary,
                'model': model_key,
                'verified_count': year_total
            })
            print(f"  ✓ {year}: {year_total} infractions")
        except Exception as e:
            print(f"  ✗ Error: {year}: {e}")
    
    print(f"  ✓ Level 2 complete: {len(year_summaries)} years")
    
    # LEVEL 3: Final Summary
    print("\nLevel 3: Generating final summary...")
    
    # Calculate grand total from ORIGINAL DATAFRAME (most reliable)
    grand_total = len(df)
    
    # Alternative: sum from year_summaries (should match)
    grand_total_from_years = sum([ys['verified_count'] for ys in year_summaries if isinstance(ys['verified_count'], (int, float))])
    
    # Verify they match
    if grand_total != grand_total_from_years:
        print(f"  ⚠ WARNING: Mismatch! DataFrame: {grand_total}, Year summaries: {grand_total_from_years}")
        print(f"  Using DataFrame count: {grand_total}")
    
    all_year_summaries = " ".join([ys['summary'] for ys in year_summaries])
    
    system_msg = "You are creating a concise statistical profile across multiple years of F1 data."
    
    user_msg = f"""Create a comprehensive 2020-2024 Mercedes infringement profile (200-250 words) as a statistical narrative:

VERIFIED GRAND TOTAL: {grand_total} total infringement decisions across 2020-2024

DO NOT COUNT OR ADD UP NUMBERS YOURSELF. THE TOTAL IS EXACTLY {grand_total}.

YEARLY SUMMARIES:
{all_year_summaries}

Your response MUST begin with this EXACT sentence:
"Over 2020-2024, Mercedes accumulated {grand_total} FIA infringement decisions."

Then continue with flowing paragraphs covering:
- Year-by-year distribution with counts and percentages
- Leading infraction categories with counts and percentages
- Penalty type breakdown with percentages
- Driver-specific patterns across years
- Peak year identification and yearly average
- Session distribution percentages
- Overall trend from 2020 to 2024

CRITICAL RULES:
- START with: "Over 2020-2024, Mercedes accumulated {grand_total} FIA infringement decisions."
- Use ONLY {grand_total} as the total - do not calculate or add years yourself
- Extract all other statistics from yearly summaries
- Write as continuous flowing narrative, NO bullet points or headers
- Exactly 200-250 words
- Pure data storytelling with embedded statistics"""
    
    try:
        final_summary = call_model(model_config, system_msg, user_msg, temperature=0.1, level='level3')
        
        # Validate the summary contains correct total
        if str(grand_total) not in final_summary:
            print(f"  ⚠ WARNING: Final summary doesn't mention {grand_total}!")
            print(f"  Summary preview: {final_summary[:200]}...")
        
        print(f"  ✓ Level 3 complete (Grand Total: {grand_total})")
        
    except Exception as e:
        print(f"  ✗ Error: {e}")
    
    return chunk_summaries, year_summaries, final_summary

print("✓ Main processing function updated with accurate counting and validation")

✓ Main processing function updated with accurate counting and validation


In [41]:
# Load data
df = pd.read_csv('goldstandard/group.csv')

print(f"✓ Data loaded: {len(df)} rows")
print(f"  Years: {df['year'].unique()}")
print(f"  Rows per year:\n{df['year'].value_counts().sort_index()}")

✓ Data loaded: 82 rows
  Years: ['2020_inf_profile' '2021_inf_profile' '2022_inf_profile'
 '2023_inf_profile' '2024_inf_profile']
  Rows per year:
year
2020_inf_profile    11
2021_inf_profile    16
2022_inf_profile    15
2023_inf_profile    23
2024_inf_profile    17
Name: count, dtype: int64


In [None]:
# Store results for all models
all_results = {}

# Select which models to run (comment out models you don't want to use)
models_to_run = [
    'gpt-4o-mini',      # OpenAI
    'groq-llama',       # Groq with Llama
    # 'groq-mixtral',   # Groq with Mixtral
    #'ollama-llama',   # Local Ollama (make sure Ollama is running!)
]

for model_key in models_to_run:
    if model_key not in models_config:
        print(f"⚠ Model '{model_key}' not found in config, skipping...")
        continue
    
    try:
        chunk_sums, year_sums, final_sum = process_all_levels(
            model_key, 
            models_config[model_key], 
            df, 
            chunk_size=8
        )
        
        all_results[model_key] = {
            'chunks': chunk_sums,
            'years': year_sums,
            'final': final_sum
        }
        
        print(f"\n✓ {model_key} processing complete!\n")
        
    except Exception as e:
        print(f"\n✗ Error processing {model_key}: {e}\n")

print("\n" + "="*60)
print("ALL MODELS PROCESSED")
print("="*60)


PROCESSING MODEL: groq-llama

Level 1: Generating chunk summaries...


[groq-llama] Years:   0%|          | 0/5 [00:00<?, ?it/s]

  ✓ 2020_inf_profile chunk 1: 8 infractions


[groq-llama] Years:  20%|██        | 1/5 [00:02<00:09,  2.49s/it]

  ✓ 2020_inf_profile chunk 2: 3 infractions
  ✓ 2021_inf_profile chunk 1: 8 infractions


[groq-llama] Years:  40%|████      | 2/5 [00:04<00:07,  2.37s/it]

  ✓ 2021_inf_profile chunk 2: 8 infractions
  ✓ 2022_inf_profile chunk 1: 8 infractions


[groq-llama] Years:  60%|██████    | 3/5 [00:32<00:28, 14.03s/it]

  ✓ 2022_inf_profile chunk 2: 7 infractions
  ✓ 2023_inf_profile chunk 1: 8 infractions
  ✓ 2023_inf_profile chunk 2: 8 infractions


[groq-llama] Years:  80%|████████  | 4/5 [01:19<00:27, 27.14s/it]

  ✓ 2023_inf_profile chunk 3: 7 infractions
  ✓ 2024_inf_profile chunk 1: 8 infractions
  ✓ 2024_inf_profile chunk 2: 8 infractions


[groq-llama] Years: 100%|██████████| 5/5 [02:00<00:00, 24.19s/it]


  ✓ 2024_inf_profile chunk 3: 1 infractions
  ✓ Level 1 complete: 12 chunks

  Validation:
    2020_inf_profile: 11 (chunks) vs 11 (data) ✓
    2021_inf_profile: 16 (chunks) vs 16 (data) ✓
    2022_inf_profile: 15 (chunks) vs 15 (data) ✓
    2023_inf_profile: 23 (chunks) vs 23 (data) ✓
    2024_inf_profile: 17 (chunks) vs 17 (data) ✓

Level 2: Generating yearly summaries...


[groq-llama] Yearly:  20%|██        | 1/5 [00:03<00:14,  3.57s/it]

  ✓ 2020_inf_profile: 11 infractions


[groq-llama] Yearly:  40%|████      | 2/5 [00:06<00:10,  3.45s/it]

  ✓ 2021_inf_profile: 16 infractions


[groq-llama] Yearly:  60%|██████    | 3/5 [00:11<00:07,  3.90s/it]

  ✓ 2022_inf_profile: 15 infractions


[groq-llama] Yearly:  80%|████████  | 4/5 [00:15<00:03,  4.00s/it]

  ✓ 2023_inf_profile: 23 infractions


[groq-llama] Yearly: 100%|██████████| 5/5 [00:20<00:00,  4.14s/it]

  ✓ 2024_inf_profile: 17 infractions
  ✓ Level 2 complete: 5 years

Level 3: Generating final summary...





  ✓ Level 3 complete (Grand Total: 82)

✓ groq-llama processing complete!


ALL MODELS PROCESSED


In [45]:
# Save results for each model
for model_key, results in all_results.items():
    # Save chunk summaries
    chunk_df = pd.DataFrame(results['chunks'])
    chunk_df.to_csv(f'goldstandard/chunk_summaries_{model_key}.csv', index=False)
    
    # Save yearly summaries
    year_df = pd.DataFrame(results['years'])
    year_df.to_csv(f'goldstandard/year_summaries_{model_key}.csv', index=False)
    
    # Save final summary
    if results['final']:
        final_df = pd.DataFrame([{
            'summary': results['final'],
            'model': model_key
        }])
        final_df.to_csv(f'goldstandard/final_summary_{model_key}.csv', index=False)
    
    print(f"✓ Saved results for {model_key}")

print("\n" + "="*60)
print("FILES CREATED:")
print("="*60)
for model_key in all_results.keys():
    print(f"\n{model_key}:")
    print(f"  - goldstandard/chunk_summaries_{model_key}.csv")
    print(f"  - goldstandard/year_summaries_{model_key}.csv")
    print(f"  - goldstandard/final_summary_{model_key}.csv")

✓ Saved results for groq-llama

FILES CREATED:

groq-llama:
  - goldstandard/chunk_summaries_groq-llama.csv
  - goldstandard/year_summaries_groq-llama.csv
  - goldstandard/final_summary_groq-llama.csv


In [47]:
# Create comparison dataframe
comparison_data = []

for model_key, results in all_results.items():
    word_count = len(results['final'].split()) if results['final'] else 0
    
    comparison_data.append({
        'model': model_key,
        'num_chunks': len(results['chunks']),
        'num_years': len(results['years']),
        'final_word_count': word_count,
        'word_count_ok': 200 <= word_count <= 250
    })

comparison_df = pd.DataFrame(comparison_data)

print("="*60)
print("MODEL COMPARISON SUMMARY")
print("="*60)
display(comparison_df)

# Display final summaries side by side
print("\n" + "="*60)
print("FINAL SUMMARIES BY MODEL")
print("="*60)

for model_key, results in all_results.items():
    print(f"\n--- {model_key.upper()} ---")
    print(results['final'][:500] + "..." if len(results['final']) > 500 else results['final'])
    print()



MODEL COMPARISON SUMMARY


Unnamed: 0,model,num_chunks,num_years,final_word_count,word_count_ok
0,groq-llama,12,5,205,True



FINAL SUMMARIES BY MODEL

--- GROQ-LLAMA ---
Over 2020-2024, Mercedes accumulated 82 FIA infringement decisions. The yearly distribution shows a varying trend, with 11 incidents in 2020, 16 in 2021, 15 in 2022, 23 in 2023, and 17 in 2024. The leading infraction categories across these years include track limits breaches, technical breaches, and procedural mistakes, with track limits breaches being the most frequent in 2020 and 2023, and technical breaches leading in 2021 and 2024. Penalty-wise, time penalties were the most common, ranging ...

