In [1]:
import json
import pandas as pd
from collections import Counter

# Load the testset-38.json file
with open('testsets/testset-38.json', 'r', encoding='utf-8') as file:
    testset_data = json.load(file)

# Extract all question_type values from metadata
question_types = [item['metadata']['question_type'] for item in testset_data]

# Find unique question types and their counts
unique_question_types = list(set(question_types))
question_type_counts = Counter(question_types)

print("Unique metadata.question_type values found:")
print("=" * 50)

for i, qt in enumerate(sorted(unique_question_types), 1):
    print(f"{i}. '{qt}' - appears {question_type_counts[qt]} times")

print(f"\nTotal unique question types: {len(unique_question_types)}")
print(f"Total questions in dataset: {len(testset_data)}")

# Create a summary DataFrame
df_summary = pd.DataFrame([
    {'question_type': qt, 'count': question_type_counts[qt]} 
    for qt in sorted(unique_question_types)
])

print("\nSummary DataFrame:")
print(df_summary)


Unique metadata.question_type values found:
1. 'complex' - appears 16 times
2. 'conversational' - appears 16 times
3. 'distracting element' - appears 16 times
4. 'double' - appears 15 times
5. 'simple' - appears 16 times
6. 'situational' - appears 16 times

Total unique question types: 6
Total questions in dataset: 95

Summary DataFrame:
         question_type  count
0              complex     16
1       conversational     16
2  distracting element     16
3               double     15
4               simple     16
5          situational     16


In [3]:
import requests
import json
from collections import defaultdict, Counter

def fetch_data(url):
    """Fetch data from the API endpoint"""
    response = requests.get(url)
    response.raise_for_status()
    data = response.json()
    return data

def analyze_answer_relevancy_zeros(all_scores):
    """Analyze answer_relevancy scores and find indexes with 0.0 values"""
    all_zero_indexes = []  # List to store all zero indexes from all records
    record_zero_indexes = {}  # Dict to store zero indexes per record
    
    record_counter = 1
    
    for scores_entry in all_scores:
        scores_string = scores_entry.get("scores")
        if not scores_string:
            print(f"⚠ Skipping record {record_counter}: empty or missing 'scores'")
            record_counter += 1
            continue

        try:
            scores_dict = json.loads(scores_string)
        except json.JSONDecodeError as e:
            print(f"⚠ Failed to parse JSON for record {record_counter}: {e}")
            record_counter += 1
            continue

        if "answer_relevancy" not in scores_dict:
            print(f"⚠ Missing 'answer_relevancy' key in record {record_counter}")
            record_counter += 1
            continue

        # Find indexes where answer_relevancy is 0.0
        zero_indexes = [k for k, v in scores_dict["answer_relevancy"].items() if v == 0.0]
        
        if zero_indexes:
            record_zero_indexes[record_counter] = zero_indexes
            all_zero_indexes.extend(zero_indexes)
            print(f"Record {record_counter}: Zero indexes = {zero_indexes}")
        else:
            print(f"Record {record_counter}: No zero answer_relevancy scores")
        
        record_counter += 1
    
    return record_zero_indexes, all_zero_indexes

def find_overlapping_indexes(record_zero_indexes):
    """Find indexes that appear in multiple records"""
    index_counter = Counter()
    
    # Count occurrences of each index across all records
    for record_id, indexes in record_zero_indexes.items():
        for idx in indexes:
            index_counter[idx] += 1
    
    # Find indexes that appear in more than one record
    overlapping_indexes = {idx: count for idx, count in index_counter.items() if count > 1}
    
    return overlapping_indexes, index_counter

# Fetch data from the API
url = "http://localhost:9876/ratings/?status=Completed&testset_id=38&llm_to_be_evaluated_type=openai/gpt-4.1&show_scores=True"

print("Fetching data from API...")
try:
    all_scores = fetch_data(url)
    print(f"✅ Successfully fetched {len(all_scores)} records")
except Exception as e:
    print(f"❌ Error fetching data: {e}")

print("\n" + "="*60)
print("ANALYZING ANSWER_RELEVANCY ZEROS")
print("="*60)

record_zero_indexes, all_zero_indexes = analyze_answer_relevancy_zeros(all_scores)

print("\n" + "="*60)
print("SUMMARY")
print("="*60)

print(f"Total records processed: {len(all_scores)}")
print(f"Records with zero answer_relevancy scores: {len(record_zero_indexes)}")
print(f"Total zero indexes found: {len(all_zero_indexes)}")
print(f"Unique zero indexes: {len(set(all_zero_indexes))}")

if record_zero_indexes:
    print(f"\nAll zero indexes: {sorted(set(all_zero_indexes))}")
    
    overlapping_indexes, index_counter = find_overlapping_indexes(record_zero_indexes)
    
    if overlapping_indexes:
        print(f"\nOVERLAPPING INDEXES (appear in multiple records):")
        print("-" * 50)
        for idx, count in sorted(overlapping_indexes.items()):
            print(f"Index '{idx}': appears in {count} records")
    else:
        print(f"\nNo overlapping indexes found - each zero index appears in only one record")
    
    print(f"\nDETAILED INDEX FREQUENCY:")
    print("-" * 30)
    for idx, count in sorted(index_counter.items()):
        print(f"Index '{idx}': {count} time(s)")
    
    # Find indexes that appear in ALL records
    total_records = len(record_zero_indexes)
    indexes_in_all_records = [idx for idx, count in index_counter.items() if count == total_records]
    
    print(f"\nINDEXES THAT APPEAR IN ALL {total_records} RECORDS:")
    print("-" * 50)
    print(f"Array of indexes overlapping in all reports: {sorted(indexes_in_all_records)}")
    print(f"Total indexes appearing in all records: {len(indexes_in_all_records)}")
else:
    print("No records found with zero answer_relevancy scores")


Fetching data from API...
✅ Successfully fetched 3 records

ANALYZING ANSWER_RELEVANCY ZEROS
Record 1: Zero indexes = ['2', '8', '11', '22', '28', '29', '33', '36', '37', '38', '47', '50', '52', '56', '60', '71', '79', '80', '81', '85', '86', '87', '88', '89', '93']
Record 2: Zero indexes = ['2', '11', '22', '28', '29', '33', '36', '37', '38', '47', '50', '52', '56', '60', '71', '79', '80', '81', '86', '87', '88', '89', '93']
Record 3: Zero indexes = ['2', '8', '11', '22', '28', '29', '33', '36', '38', '47', '50', '52', '56', '60', '71', '79', '80', '81', '85', '86', '87', '88', '89']

SUMMARY
Total records processed: 3
Records with zero answer_relevancy scores: 3
Total zero indexes found: 71
Unique zero indexes: 25

All zero indexes: ['11', '2', '22', '28', '29', '33', '36', '37', '38', '47', '50', '52', '56', '60', '71', '79', '8', '80', '81', '85', '86', '87', '88', '89', '93']

OVERLAPPING INDEXES (appear in multiple records):
--------------------------------------------------
Inde

In [4]:
# Function to examine testset records by index
def print_record_by_index(index, testset_data):
    """Print the testset record for a given index"""
    try:
        # Convert string index to integer for list indexing
        idx = int(index)
        
        if idx < 0 or idx >= len(testset_data):
            print(f"❌ Index {idx} is out of range. Valid range: 0-{len(testset_data)-1}")
            return
        
        record = testset_data[idx]
        
        print(f"RECORD FOR INDEX {idx}:")
        print("=" * 50)
        print(f"Question: {record.get('question', 'N/A')}")
        print(f"Answer: {record.get('answer', 'N/A')}")
        print(f"Context: {record.get('context', 'N/A')}")
        
        if 'metadata' in record:
            print(f"\nMetadata:")
            for key, value in record['metadata'].items():
                print(f"  {key}: {value}")
        
        return record
        
    except ValueError:
        print(f"❌ Invalid index '{index}'. Please provide a numeric index.")
        return None
    except Exception as e:
        print(f"❌ Error retrieving record: {e}")
        return None

# Load testset data if not already loaded
if 'testset_data' not in locals():
    with open('testsets/testset-38.json', 'r', encoding='utf-8') as file:
        testset_data = json.load(file)

# Example usage - change the index to examine different records
# Uncomment and modify the line below to examine a specific record:

# index_to_examine = "2"  # This is one of the indexes that appears in all 3 records
# print_record_by_index(index_to_examine, testset_data)

print("📋 To examine a specific record, use:")
print("print_record_by_index('INDEX_NUMBER', testset_data)")
print("\nFor example, to see record for index 2:")
print("print_record_by_index('2', testset_data)")
print(f"\nTestset contains {len(testset_data)} records (indexes 0-{len(testset_data)-1})")

print_record_by_index('11', testset_data)


📋 To examine a specific record, use:
print_record_by_index('INDEX_NUMBER', testset_data)

For example, to see record for index 2:
print_record_by_index('2', testset_data)

Testset contains 95 records (indexes 0-94)
RECORD FOR INDEX 11:
Question: Wo werden die Dokumente und die Dokumentation des Quellcodes abgelegt?
Answer: N/A
Context: N/A

Metadata:
  question_type: simple
  seed_document_id: 608
  topic: IT-Unternehmensgründung


{'id': 'b2587f66-6f55-48ff-962e-e514f88746ce',
 'question': 'Wo werden die Dokumente und die Dokumentation des Quellcodes abgelegt?',
 'reference_answer': 'Die Dokumente werden in der Cloud von Microsoft abgelegt und die Dokumentation des Quellcodes liegt in Gitlab zusammen mit dem eigentlichen Quellcode.',
 'reference_context': 'Document 608: Um keine Probleme und Missverständnisse innerhalb der Firma zu haben werden alle Dokumente in der Cloud abgelegt und mit den entsprechenden Berechtigungen versehen.\n\nProgrammiere müssen zudem bei jeder Änderung die Dokumentation über den Code Aktualisieren, um dessen Instandhaltung zu garantieren. Auch diese ist mit den Entsprechenden Berechtigungen versehen.\n\nDie Dokumente werden in der Cloud von Microsoft liegen da mit Microsoft Office gearbeitet wird, die Dokumentation des Quellcodes liegt jedoch in Gitlab zusammen mit dem eigentlichen Quellcode.\n\n\n\n2.3.4.2 Gründerteam und Schlüsselpersonen\n\nIm ersten Jahr ist keine Erweiterung des T