## Test: De afwisseling van staand en slepend rijm ontbreekt

In [4]:
import json
import re
from collections import defaultdict, Counter
from tqdm import tqdm
with open('../data/verse_id_lookup.json', 'r', encoding='utf-8') as f:
    verse_id_lookup = json.load(f)

with open('../data/stresses_restructured.json', 'r', encoding='utf-8') as f:
    stress_data = json.load(f)

with open('../data/vers_laatste_woorden.json', 'r', encoding='utf-8') as f:
    vers_data = json.load(f)

# Definieer de suffixen die je wilt overslaan
skip_suffixes = {
    'M1_01_000',
    'M2_01_000',
    'M3_01_000',
    'M1_75_976',
    'M2_26_339',
    'M3_39_508'
}

def should_skip_vers(vers_id):
    """Check of een vers_id geskipt moet worden"""
    return any(vers_id.endswith(suffix) for suffix in skip_suffixes)

def parse_vers_id(vers_id):
    """Parse vers_id zoals A_M1_01_001 naar componenten"""
    parts = vers_id.split('_')
    doorlopend_vers = int(parts[3])
    strofe_num = int(parts[2])
    
    # Bereken vers positie binnen strofe (1-13)
    vers_in_strofe = ((doorlopend_vers - 1) % 13) + 1
    
    return {
        'manuscript': parts[0],
        'section': parts[1],
        'strofe': strofe_num,
        'doorlopend_vers': doorlopend_vers,
        'vers_in_strofe': vers_in_strofe
    }

def get_word_data(word, vers_id=None):
    """
    Haal syllabified en stresses op voor een woord.
    Gebruikt vers_id om de juiste vorm te selecteren bij woorden met meerdere vormen.
    """
    word = word.lower().strip()
    
    # Probeer eerst de verse_id_lookup
    if vers_id and vers_id in verse_id_lookup:
        entry = verse_id_lookup[vers_id]
        return entry['syllabified'], entry['stresses']
    
    # Fallback naar stress_data
    if word in stress_data:
        entry = stress_data[word]
        if isinstance(entry, list):
            # Meerdere vormen, neem de eerste als fallback
            return entry[0]['syllabified'], entry[0]['stresses']
        else:
            return entry['syllabified'], entry['stresses']
    
    return None, None

def classify_rhyme_gender(word, vers_id=None):
    """
    Classificeer of een woord masculine (staand) of feminine (slepend) rijm heeft.
    Masculine = eindigt op beklemtoonde lettergreep
    Feminine = eindigt op onbeklemtoonde lettergreep
    
    Gebruikt vers_id om de juiste stress data te vinden bij woorden met meerdere vormen.
    """
    
    word = word.lower().strip()
    
    # Check voor skipped/damaged
    if word in ['skipped', 'damaged']:
        return None, None, None
    
    # Haal de juiste data op via vers_id
    syllables, stresses = get_word_data(word, vers_id)
    
    if syllables is None:
        return 'unknown', None, None
    
    # stresses bevat negatieve indexen
    # -1 betekent de laatste lettergreep is beklemtoond
    # -2 betekent de voorlaatste lettergreep is beklemtoond
    # etc.
    
    # Check of -1 (laatste lettergreep) in stresses staat
    if stresses and -1 in stresses:
        gender = 'masculine'  # staand rijm - laatste lettergreep is beklemtoond
    else:
        gender = 'feminine'  # slepend rijm - laatste lettergreep is onbeklemtoond
    
    # Return ook de syllables en stresses voor debugging
    return gender, syllables, stresses

def analyze_rhyme_gender_pattern(strofe, section, strofe_num):
    """
    Analyseer het patroon van masculine/feminine rijm in een strofe
    Retourneert gedetailleerde info per vers
    """
    a_lines = [1, 2, 4, 5, 7, 8, 10, 11]
    b_lines = [3, 6, 9, 12, 13]
    
    result = {
        'section': section,
        'strofe': strofe_num,
        'a_rhymes': [],
        'b_rhymes': []
    }
    
    # Verzamel info voor A-rijmen
    for pos in a_lines:
        if pos in strofe:
            word = strofe[pos]['woord']
            vers_id = strofe[pos]['vers_id']
            gender, syllables, stresses = classify_rhyme_gender(word, vers_id)

            if gender is None:
                continue
            
            result['a_rhymes'].append({
                'vers_pos': pos,
                'vers_id': vers_id,
                'word': word,
                'gender': gender,
                'syllables': syllables,
                'stresses': stresses
            })
    
    # Verzamel info voor B-rijmen
    for pos in b_lines:
        if pos in strofe:
            word = strofe[pos]['woord']
            vers_id = strofe[pos]['vers_id']
            gender, syllables, stresses = classify_rhyme_gender(word, vers_id)

            if gender is None:
                continue
            
            result['b_rhymes'].append({
                'vers_pos': pos,
                'vers_id': vers_id,
                'word': word,
                'gender': gender,
                'syllables': syllables,
                'stresses': stresses
            })
    
    # Tel totalen
    a_genders = [r['gender'] for r in result['a_rhymes']]
    b_genders = [r['gender'] for r in result['b_rhymes']]
    
    result['a_masculine'] = sum(1 for g in a_genders if g == 'masculine')
    result['a_feminine'] = sum(1 for g in a_genders if g == 'feminine')
    result['a_unknown'] = sum(1 for g in a_genders if g == 'unknown')
    result['a_total'] = len(a_genders)
    
    result['b_masculine'] = sum(1 for g in b_genders if g == 'masculine')
    result['b_feminine'] = sum(1 for g in b_genders if g == 'feminine')
    result['b_unknown'] = sum(1 for g in b_genders if g == 'unknown')
    result['b_total'] = len(b_genders)
    
    # Bepaal dominant patroon
    if result['a_masculine'] > result['a_feminine']:
        result['a_dominant'] = 'masculine'
    elif result['a_feminine'] > result['a_masculine']:
        result['a_dominant'] = 'feminine'
    else:
        result['a_dominant'] = 'mixed'
    
    if result['b_masculine'] > result['b_feminine']:
        result['b_dominant'] = 'masculine'
    elif result['b_feminine'] > result['b_masculine']:
        result['b_dominant'] = 'feminine'
    else:
        result['b_dominant'] = 'mixed'
    
    return result

rhyme_gender_results = {}
skipped_count = 0
damaged_count = 0

for manuscript, verzen in tqdm(vers_data.items(), desc="Analyzing gender"):
    # Filter verzen
    filtered_verzen = {}
    for vers_id, woord in verzen.items():
        if should_skip_vers(vers_id):
            skipped_count += 1
            continue
        if woord == 'damaged':
            damaged_count += 1
            continue
        filtered_verzen[vers_id] = woord
    
    # Groepeer verzen per strofe
    strofes = defaultdict(lambda: defaultdict(dict))
    for vers_id, woord in filtered_verzen.items():
        parsed = parse_vers_id(vers_id)
        section = parsed['section']
        strofe_num = parsed['strofe']
        vers_in_strofe = parsed['vers_in_strofe']
        strofes[(section, strofe_num)][vers_in_strofe] = {
            'vers_id': vers_id,
            'woord': woord
        }
    
    manuscript_gender_results = []
    
    for (section, strofe_num), strofe in sorted(strofes.items()):
        gender_pattern = analyze_rhyme_gender_pattern(strofe, section, strofe_num)
        manuscript_gender_results.append(gender_pattern)
    
    rhyme_gender_results[manuscript] = manuscript_gender_results

print(f"\n Geskipt: {skipped_count} verzen")
print(f"Damaged: {damaged_count} verzen")

# Save gender analysis
with open('../data/rhyme_gender_analysis.json', 'w', encoding='utf-8') as f:
    json.dump(rhyme_gender_results, f, indent=2, ensure_ascii=False)

print(f" Resultaten opgeslagen in 'rhyme_gender_analysis.json'")

# ============================================================================
# STATISTIEKEN PER MANUSCRIPT EN SECTIE
# ============================================================================

print("\n" + "="*80)
print("RHYME GENDER ANALYSIS PER MANUSCRIPT")
print("="*80)

for manuscript, strofe_results in rhyme_gender_results.items():
    print(f"\n{manuscript}:")
    
    # Groepeer per sectie
    for section in ['M1', 'M2', 'M3']:
        section_results = [s for s in strofe_results if s['section'] == section]
        
        if not section_results:
            continue
        
        # Tel totalen voor deze sectie
        total_a_masc = sum(s['a_masculine'] for s in section_results)
        total_a_fem = sum(s['a_feminine'] for s in section_results)
        total_a_unk = sum(s['a_unknown'] for s in section_results)
        
        total_b_masc = sum(s['b_masculine'] for s in section_results)
        total_b_fem = sum(s['b_feminine'] for s in section_results)
        total_b_unk = sum(s['b_unknown'] for s in section_results)
        
        print(f"\n  {section} ({len(section_results)} strofes):")
        
        # A-rijmen
        a_total_known = total_a_masc + total_a_fem
        if a_total_known > 0:
            a_masc_perc = (total_a_masc / a_total_known) * 100
            a_fem_perc = (total_a_fem / a_total_known) * 100
            print(f"    A-rijmen: {total_a_masc} masculine ({a_masc_perc:.1f}%), {total_a_fem} feminine ({a_fem_perc:.1f}%)")
            if total_a_unk > 0:
                print(f"              {total_a_unk} unknown")
        else:
            print(f"    A-rijmen: geen data")
        
        # B-rijmen
        b_total_known = total_b_masc + total_b_fem
        if b_total_known > 0:
            b_masc_perc = (total_b_masc / b_total_known) * 100
            b_fem_perc = (total_b_fem / b_total_known) * 100
            print(f"    B-rijmen: {total_b_masc} masculine ({b_masc_perc:.1f}%), {total_b_fem} feminine ({b_fem_perc:.1f}%)")
            if total_b_unk > 0:
                print(f"              {total_b_unk} unknown")
        else:
            print(f"    B-rijmen: geen data")

# ============================================================================
# OVERALL ANALYSE PER SECTIE
# ============================================================================

print(f"\n" + "="*80)
print("OVERALL GENDER PATTERNS PER SECTION (ALL MANUSCRIPTS)")
print("="*80)

for section in ['M1', 'M2', 'M3']:
    print(f"\n{section}:")
    
    total_a_masc = 0
    total_a_fem = 0
    total_a_unk = 0
    total_b_masc = 0
    total_b_fem = 0
    total_b_unk = 0
    total_strofes = 0
    
    for manuscript, strofe_results in rhyme_gender_results.items():
        section_results = [s for s in strofe_results if s['section'] == section]
        total_strofes += len(section_results)
        total_a_masc += sum(s['a_masculine'] for s in section_results)
        total_a_fem += sum(s['a_feminine'] for s in section_results)
        total_a_unk += sum(s['a_unknown'] for s in section_results)
        total_b_masc += sum(s['b_masculine'] for s in section_results)
        total_b_fem += sum(s['b_feminine'] for s in section_results)
        total_b_unk += sum(s['b_unknown'] for s in section_results)
    
    print(f"  Totaal {total_strofes} strofes geanalyseerd")
    
    # A-rijmen
    a_total_known = total_a_masc + total_a_fem
    if a_total_known > 0:
        a_masc_perc = (total_a_masc / a_total_known) * 100
        a_fem_perc = (total_a_fem / a_total_known) * 100
        print(f"\n  A-rijmen:")
        print(f"    Masculine: {total_a_masc} ({a_masc_perc:.1f}%)")
        print(f"    Feminine:  {total_a_fem} ({a_fem_perc:.1f}%)")
        if total_a_unk > 0:
            print(f"    Unknown:   {total_a_unk}")
    
    # B-rijmen
    b_total_known = total_b_masc + total_b_fem
    if b_total_known > 0:
        b_masc_perc = (total_b_masc / b_total_known) * 100
        b_fem_perc = (total_b_fem / b_total_known) * 100
        print(f"\n  B-rijmen:")
        print(f"    Masculine: {total_b_masc} ({b_masc_perc:.1f}%)")
        print(f"    Feminine:  {total_b_fem} ({b_fem_perc:.1f}%)")
        if total_b_unk > 0:
            print(f"    Unknown:   {total_b_unk}")

# ============================================================================
# AFWIJKENDE RIJMEN (die niet voldoen aan verwachtingen)
# ============================================================================

print(f"\n" + "="*80)
print("DEVIANT RHYMES (not matching expected pattern)")
print("="*80)

# Verzamel afwijkende rijmen
deviant_rhymes = {
    'M1': {'a_feminine': [], 'b_masculine': []},
    'M2': {'a_feminine': [], 'b_masculine': []},
    'M3': {'a_masculine': [], 'b_feminine': []}
}

for manuscript, strofe_results in rhyme_gender_results.items():
    for strofe_result in strofe_results:
        section = strofe_result['section']
        strofe_num = strofe_result['strofe']
        
        if section in ['M1', 'M2']:
            # Verwachting: A=masculine, B=feminine
            # Afwijkend: A=feminine, B=masculine
            
            # A-rijmen die feminine zijn (terwijl masculine verwacht)
            for rhyme_info in strofe_result['a_rhymes']:
                if rhyme_info['gender'] == 'feminine':
                    deviant_rhymes[section]['a_feminine'].append({
                        'manuscript': manuscript,
                        'strofe': strofe_num,
                        'vers_pos': rhyme_info['vers_pos'],
                        'vers_id': rhyme_info['vers_id'],
                        'word': rhyme_info['word'],
                        'syllables': rhyme_info['syllables'],
                        'stresses': rhyme_info['stresses']
                    })
            
            # B-rijmen die masculine zijn (terwijl feminine verwacht)
            for rhyme_info in strofe_result['b_rhymes']:
                if rhyme_info['gender'] == 'masculine':
                    deviant_rhymes[section]['b_masculine'].append({
                        'manuscript': manuscript,
                        'strofe': strofe_num,
                        'vers_pos': rhyme_info['vers_pos'],
                        'vers_id': rhyme_info['vers_id'],
                        'word': rhyme_info['word'],
                        'syllables': rhyme_info['syllables'],
                        'stresses': rhyme_info['stresses']
                    })
        
        else:  # M3
            # Verwachting: A=feminine, B=masculine
            # Afwijkend: A=masculine, B=feminine
            
            # A-rijmen die masculine zijn (terwijl feminine verwacht)
            for rhyme_info in strofe_result['a_rhymes']:
                if rhyme_info['gender'] == 'masculine':
                    deviant_rhymes[section]['a_masculine'].append({
                        'manuscript': manuscript,
                        'strofe': strofe_num,
                        'vers_pos': rhyme_info['vers_pos'],
                        'vers_id': rhyme_info['vers_id'],
                        'word': rhyme_info['word'],
                        'syllables': rhyme_info['syllables'],
                        'stresses': rhyme_info['stresses']
                    })
            
            # B-rijmen die feminine zijn (terwijl masculine verwacht)
            for rhyme_info in strofe_result['b_rhymes']:
                if rhyme_info['gender'] == 'feminine':
                    deviant_rhymes[section]['b_feminine'].append({
                        'manuscript': manuscript,
                        'strofe': strofe_num,
                        'vers_pos': rhyme_info['vers_pos'],
                        'vers_id': rhyme_info['vers_id'],
                        'word': rhyme_info['word'],
                        'syllables': rhyme_info['syllables'],
                        'stresses': rhyme_info['stresses']
                    })

# Print afwijkende rijmen per sectie
for section in ['M1', 'M2', 'M3']:
    print(f"\n{section}:")
    
    if section in ['M1', 'M2']:
        # A-rijmen die feminine zijn (afwijkend)
        a_fem_list = deviant_rhymes[section]['a_feminine']
        print(f"\n  A-rijmen die FEMININE zijn (verwacht: masculine): {len(a_fem_list)} gevallen")
        if a_fem_list:
            print(f"  Voorbeelden (eerste 100):")
            for item in a_fem_list[:100]:
                syllables_str = '-'.join(item['syllables']) if item['syllables'] else 'N/A'
                stresses_str = str(item['stresses']) if item['stresses'] else 'N/A'
                vers_id = item['vers_id']
                print(f"  {vers_id:20s} {item['word']:15s} → {syllables_str:20s} stress: {stresses_str}")
        
        # B-rijmen die masculine zijn (afwijkend)
        b_masc_list = deviant_rhymes[section]['b_masculine']
        print(f"\n  B-rijmen die MASCULINE zijn (verwacht: feminine): {len(b_masc_list)} gevallen")
        if b_masc_list:
            print(f"  Voorbeelden (eerste 100):")
            for item in b_masc_list[:100]:
                syllables_str = '-'.join(item['syllables']) if item['syllables'] else 'N/A'
                stresses_str = str(item['stresses']) if item['stresses'] else 'N/A'
                vers_id = item['vers_id']
                print(f"  {vers_id:20s} {item['word']:15s} → {syllables_str:20s} stress: {stresses_str}")
    
    else:  # M3
        # A-rijmen die masculine zijn (afwijkend)
        a_masc_list = deviant_rhymes[section]['a_masculine']
        print(f"\n  A-rijmen die MASCULINE zijn (verwacht: feminine): {len(a_masc_list)} gevallen")
        if a_masc_list:
            print(f"  Voorbeelden (eerste 100):")
            for item in a_masc_list[:100]:
                syllables_str = '-'.join(item['syllables']) if item['syllables'] else 'N/A'
                stresses_str = str(item['stresses']) if item['stresses'] else 'N/A'
                vers_id = item['vers_id']
                print(f"  {vers_id:20s} {item['word']:15s} → {syllables_str:20s} stress: {stresses_str}")
        
        # B-rijmen die feminine zijn (afwijkend)
        b_fem_list = deviant_rhymes[section]['b_feminine']
        print(f"\n  B-rijmen die FEMININE zijn (verwacht: masculine): {len(b_fem_list)} gevallen")
        if b_fem_list:
            print(f"  Voorbeelden (eerste 100):")
            for item in b_fem_list[:100]:
                syllables_str = '-'.join(item['syllables']) if item['syllables'] else 'N/A'
                stresses_str = str(item['stresses']) if item['stresses'] else 'N/A'
                vers_id = item['vers_id']
                print(f"  {vers_id:20s} {item['word']:15s} → {syllables_str:20s} stress: {stresses_str}")

# Save deviant rhymes
with open('../data/deviant_rhyme_genders.json', 'w', encoding='utf-8') as f:
    json.dump(deviant_rhymes, f, indent=2, ensure_ascii=False)

print(f"\n Afwijkende rijmen opgeslagen in 'deviant_rhyme_genders.json'")

# ============================================================================
# SAMENVATTING
# ============================================================================

print(f"\n" + "="*80)
print("SUMMARY")
print("="*80)

summary_data = {}
for section in ['M1', 'M2', 'M3']:
    total_a_masc = 0
    total_a_fem = 0
    total_b_masc = 0
    total_b_fem = 0
    
    for manuscript, strofe_results in rhyme_gender_results.items():
        section_results = [s for s in strofe_results if s['section'] == section]
        total_a_masc += sum(s['a_masculine'] for s in section_results)
        total_a_fem += sum(s['a_feminine'] for s in section_results)
        total_b_masc += sum(s['b_masculine'] for s in section_results)
        total_b_fem += sum(s['b_feminine'] for s in section_results)
    
    a_total = total_a_masc + total_a_fem
    b_total = total_b_masc + total_b_fem
    
    if a_total > 0:
        a_masc_perc = (total_a_masc / a_total) * 100
    else:
        a_masc_perc = 0
    
    if b_total > 0:
        b_masc_perc = (total_b_masc / b_total) * 100
    else:
        b_masc_perc = 0
    
    summary_data[section] = {
        'a_masc_perc': a_masc_perc,
        'b_masc_perc': b_masc_perc
    }

print("\n┌─────────┬────────────────────┬────────────────────┐")
print("│ Section │ A-rhymes masculine │ B-rhymes masculine │")
print("├─────────┼────────────────────┼────────────────────┤")
for section in ['M1', 'M2', 'M3']:
    a_perc = summary_data[section]['a_masc_perc']
    b_perc = summary_data[section]['b_masc_perc']
    
    if section in ['M1', 'M2']:
        a_status = "✓" if a_perc > 50 else "✗"
        b_status = "✓" if b_perc < 50 else "✗"
    else:
        a_status = "✓" if a_perc < 50 else "✗"
        b_status = "✓" if b_perc > 50 else "✗"
    
    print(f"│ {section:7s} │ {a_perc:6.1f}% {a_status:9s} │ {b_perc:6.1f}% {b_status:9s} │")

print("└─────────┴────────────────────┴────────────────────┘")

Analyzing gender: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████| 18/18 [00:00<00:00, 155.62it/s]



 Geskipt: 0 verzen
Damaged: 1053 verzen
 Resultaten opgeslagen in 'rhyme_gender_analysis.json'

RHYME GENDER ANALYSIS PER MANUSCRIPT

A:

  M1 (75 strofes):
    A-rijmen: 583 masculine (98.6%), 8 feminine (1.4%)
    B-rijmen: 2 masculine (0.5%), 372 feminine (99.5%)

  M2 (26 strofes):
    A-rijmen: 208 masculine (100.0%), 0 feminine (0.0%)
    B-rijmen: 0 masculine (0.0%), 130 feminine (100.0%)

  M3 (39 strofes):
    A-rijmen: 2 masculine (0.7%), 278 feminine (99.3%)
    B-rijmen: 175 masculine (100.0%), 0 feminine (0.0%)

Ant:

B:

  M1 (75 strofes):
    A-rijmen: 588 masculine (98.7%), 8 feminine (1.3%)
    B-rijmen: 2 masculine (0.5%), 370 feminine (99.5%)

  M2 (26 strofes):
    A-rijmen: 206 masculine (100.0%), 0 feminine (0.0%)
    B-rijmen: 0 masculine (0.0%), 129 feminine (100.0%)

  M3 (39 strofes):
    A-rijmen: 3 masculine (1.0%), 308 feminine (99.0%)
    B-rijmen: 195 masculine (100.0%), 0 feminine (0.0%)

Br:

  M1 (25 strofes):
    A-rijmen: 163 masculine (100.0%), 0 f

In [7]:
# ============================================================================
# PATTERN DEVIATION PERCENTAGE PER MANUSCRIPT
# ============================================================================

manuscript_deviation_percentage = {}

for manuscript in sorted(rhyme_gender_results.keys()):
    strofe_results = rhyme_gender_results[manuscript]
    
    total_rhymes = 0
    deviant_rhymes = 0
    
    for strofe_result in strofe_results:
        section = strofe_result['section']
        
        if section in ['M1', 'M2']:
            # Verwacht: A = masculine, B = feminine
            # Afwijkend: A = feminine, B = masculine
            
            # Tel A-rijmen
            for rhyme in strofe_result['a_rhymes']:
                if rhyme['gender'] != 'unknown':
                    total_rhymes += 1
                    if rhyme['gender'] == 'feminine':  # Afwijkend!
                        deviant_rhymes += 1
            
            # Tel B-rijmen
            for rhyme in strofe_result['b_rhymes']:
                if rhyme['gender'] != 'unknown':
                    total_rhymes += 1
                    if rhyme['gender'] == 'masculine':  # Afwijkend!
                        deviant_rhymes += 1
        
        else:  # M3
            # Verwacht: A = feminine, B = masculine
            # Afwijkend: A = masculine, B = feminine
            
            # Tel A-rijmen
            for rhyme in strofe_result['a_rhymes']:
                if rhyme['gender'] != 'unknown':
                    total_rhymes += 1
                    if rhyme['gender'] == 'masculine':  # Afwijkend!
                        deviant_rhymes += 1
            
            # Tel B-rijmen
            for rhyme in strofe_result['b_rhymes']:
                if rhyme['gender'] != 'unknown':
                    total_rhymes += 1
                    if rhyme['gender'] == 'feminine':  # Afwijkend!
                        deviant_rhymes += 1
    
    # Bereken percentage afwijkingen
    if total_rhymes > 0:
        deviation_percentage = (deviant_rhymes / total_rhymes) * 100
        compliance_percentage = 100 - deviation_percentage
    else:
        deviation_percentage = None
        compliance_percentage = None
    
    manuscript_deviation_percentage[manuscript] = {
        'deviation_percentage': deviation_percentage,
        'compliance_percentage': compliance_percentage,
        'deviant_rhymes': deviant_rhymes,
        'total_rhymes': total_rhymes
    }

# Print resultaten gesorteerd op afwijkingspercentage (laag naar hoog)
print("\n")
sorted_manuscripts = sorted(
    manuscript_deviation_percentage.items(),
    key=lambda x: x[1]['deviation_percentage'] if x[1]['deviation_percentage'] is not None else 999,
)

for manuscript, data in sorted_manuscripts:
    if data['deviation_percentage'] is None:
        print(f"{manuscript}: NO DATA")
    else:
        dev_perc = data['deviation_percentage']
        comp_perc = data['compliance_percentage']
        deviant = data['deviant_rhymes']
        total = data['total_rhymes']
        
        # Visuele indicator
        if dev_perc <= 10:
            status = "✓✓"
        elif dev_perc <= 20:
            status = "✓ "
        elif dev_perc <= 40:
            status = "~ "
        else:
            status = "✗ "
        
        print(f"{manuscript}: {status} {dev_perc:5.1f}% deviant = {comp_perc:5.1f}% compliant ({deviant}/{total} deviant)")



Br: ✓✓   0.0% deviant = 100.0% compliant (0/539 deviant)
D2: ✓✓   0.0% deviant = 100.0% compliant (0/145 deviant)
E: ✓✓   0.0% deviant = 100.0% compliant (0/270 deviant)
Ge: ✓✓   0.0% deviant = 100.0% compliant (0/76 deviant)
K: ✓✓   0.0% deviant = 100.0% compliant (0/67 deviant)
O: ✓✓   0.0% deviant = 100.0% compliant (0/1816 deviant)
W: ✓✓   0.0% deviant = 100.0% compliant (0/507 deviant)
G: ✓✓   0.1% deviant =  99.9% compliant (1/1767 deviant)
F: ✓✓   0.2% deviant =  99.8% compliant (3/1818 deviant)
H: ✓✓   0.3% deviant =  99.7% compliant (1/384 deviant)
D: ✓✓   0.3% deviant =  99.7% compliant (6/1807 deviant)
Y: ✓✓   0.4% deviant =  99.6% compliant (1/254 deviant)
L: ✓✓   0.4% deviant =  99.6% compliant (2/491 deviant)
C: ✓✓   0.5% deviant =  99.5% compliant (8/1469 deviant)
A: ✓✓   0.7% deviant =  99.3% compliant (12/1758 deviant)
B: ✓✓   0.7% deviant =  99.3% compliant (13/1809 deviant)
Z: ✓✓   1.8% deviant =  98.2% compliant (9/493 deviant)
Ant: NO DATA
