In [8]:
import csv
import json
import random
import re
import os
from collections import defaultdict
from copy import deepcopy

def print_progress(iteration, total, prefix='Progress:', length=50):
    percent = ("{0:.1f}").format(100 * (iteration / float(total)))
    filled_length = int(length * iteration // total)
    bar = '█' * filled_length + '-' * (length - filled_length)
    print(f'\r{prefix} |{bar}| {percent}%', end='\r')
    if iteration == total:
        print()

def find_csv_file():
    csv_files = [f for f in os.listdir('.') if f.endswith('.csv')]
    if csv_files:
        return csv_files[0]
    return None

def csv_to_bingo_json(csv_file_path, output_file_path=None):
    bingo_list = {}
    
    try:
        with open(csv_file_path, 'r', encoding='utf-8-sig') as csv_file:
            csv_reader = csv.DictReader(csv_file)
            
            for row in csv_reader:
                try:
                    classification = int(row['Classification'])
                    id = int(row['ID'])
                    objective = row['Objective']
                    core_tags = [tag.strip() for tag in row['Core Tags'].split(',') if tag.strip()]
                    supp_tags = [tag.strip() for tag in row['Supplementary Tags'].split(',') if tag.strip()]
                    restrictions = [r.strip() for r in row['Mutually Exclusive With'].split(',') if r.strip()]
                    
                    if classification not in bingo_list:
                        bingo_list[classification] = []
                    
                    bingo_list[classification].append({
                        "name": objective,
                        "types": core_tags,
                        "id": id,
                        "SuppTags": supp_tags,
                        "Restrictions": restrictions
                    })
                except ValueError as e:
                    print(f"Error processing row: {row}. Error: {e}")
                    continue
        
        if output_file_path:
            formatted_bingo_list = []
            for i in range(1, max(bingo_list.keys()) + 1):
                if i in bingo_list:
                    formatted_bingo_list.append(f"bingoList[{i}] = {json.dumps(bingo_list[i], indent=2)};")
                else:
                    formatted_bingo_list.append(f"bingoList[{i}] = [];")
            
            with open(output_file_path, 'w') as js_file:
                js_file.write("var bingoGenerator = require(\"./generators/generator_bases/srl_generator_v5.js\");\n")
                js_file.write("var bingoList = [];\n\n")
                js_file.write("\n\n".join(formatted_bingo_list))
        
        return bingo_list
    except FileNotFoundError:
        print(f"Error: The file {csv_file_path} was not found.")
        return None
    except Exception as e:
        print(f"An unexpected error occurred: {e}")
        return None

def is_valid_objective(objective, selected_objectives, classification_count, max_per_classification, classification):
    for selected in selected_objectives:
        if str(selected['id']) in objective['Restrictions'] or str(objective['id']) in selected['Restrictions']:
            return False
    
    if any(obj['name'] == objective['name'] for obj in selected_objectives):
        return False
    
    if classification_count[classification] >= max_per_classification:
        return False
    
    return True

def check_tag_occurrences(selected_objectives, tag_limits, track_violations=False):
    tag_counts = defaultdict(int)
    for obj in selected_objectives:
        for tag in obj['types']:
            if tag in tag_limits:
                tag_counts[tag] += 1
    
    violations = {}
    for tag, count in tag_counts.items():
        if tag_limits[tag] != '-' and count > int(tag_limits[tag]):
            violations[tag] = count
    
    if track_violations:
        return violations, tag_counts
    return violations

def is_valid_reroll_objective(objective, selected_objectives, classification_count, max_per_classification, classification, tag_limits):
    if not is_valid_objective(objective, selected_objectives, classification_count, max_per_classification, classification):
        return False
    
    temp_selected = selected_objectives + [objective]
    violations = check_tag_occurrences(temp_selected, tag_limits)
    
    return len(violations) == 0

def classify_into_buckets(classification):
    if classification in [1, 2, 3, 4, 8]:
        return "A"
    elif classification in [5, 6, 7, 9, 23]:
        return "B"
    elif classification in [11, 12, 21]:
        return "C"
    elif classification in [10, 13, 14, 15, 16]:
        return "D"
    elif classification in [17, 18, 19, 20]:
        return "E"
    elif classification in [22, 24, 25]:
        return "F"
    else:
        return "Unknown"

def select_random_objectives(bingo_list, race_mode=False, remove_easy=False, harder_board=False, 
                           tag_limits=None, bucket_mode=False, bucket_hard_mode=False, 
                           exclude_boss_objectives=False, track_violations=False):
    all_classifications = list(bingo_list.keys())
    selected_objectives = []
    classification_count = defaultdict(int)
    max_per_classification = 2 if race_mode else float('inf')
    
    # Add violation tracking
    if track_violations:
        violation_counts = defaultdict(int)
        total_rerolls = 0

    if bucket_mode:
        bucket_objectives = {
            "A": [], "B": [], "C": [], "D": [], "E": [], "F": []
        }
        
        if bucket_hard_mode:
            bucket_limits = {
                "A": 1, "B": 4, "C": 4, "D": 8, "E": 5, "F": 3
            }
        else:
            bucket_limits = {
                "A": 4, "B": 6, "C": 3, "D": 7, "E": 4, "F": 1
            }

        def select_from_bucket(bucket):
            nonlocal total_rerolls
            objectives = bucket_objectives[bucket]
            random.shuffle(objectives)
            for objective in objectives:
                if exclude_boss_objectives and "Boss" in objective['types']:
                    continue
                classification = next(c for c in all_classifications if objective in bingo_list[c])
                if is_valid_objective(objective, selected_objectives, classification_count, 
                                   max_per_classification, classification):
                    # Check for tag limit violations
                    if track_violations:
                        temp_selected = selected_objectives + [objective]
                        violations, _ = check_tag_occurrences(temp_selected, tag_limits, True)
                        if violations:
                            for tag in violations:
                                violation_counts[tag] += 1
                            total_rerolls += 1
                            continue
                    
                    selected_objectives.append(objective)
                    classification_count[classification] += 1
                    return True
            return False

        for classification, objectives in bingo_list.items():
            bucket = classify_into_buckets(classification)
            if bucket != "Unknown":
                bucket_objectives[bucket].extend(objectives)

        bucket_selections = []
        for bucket, limit in bucket_limits.items():
            bucket_selections.extend([bucket] * limit)
        random.shuffle(bucket_selections)

        for bucket in bucket_selections:
            if not select_from_bucket(bucket):
                continue

        if track_violations:
            return selected_objectives, violation_counts, total_rerolls
        return selected_objectives

def analyze_tag_conflicts(bingo_list):
    """Analyzes which tags conflict with each other most frequently."""
    tag_conflicts = defaultdict(lambda: defaultdict(int))
    tag_pairs = []
    
    # Gather all unique tags
    all_tags = set()
    for classification in bingo_list.values():
        for objective in classification:
            all_tags.update(objective['types'])
    
    # For each objective, check which tags conflict with its tags
    for classification1, objectives1 in bingo_list.items():
        for obj1 in objectives1:
            for classification2, objectives2 in bingo_list.items():
                for obj2 in objectives2:
                    # Skip self-comparison
                    if obj1['id'] == obj2['id']:
                        continue
                        
                    # Check if objectives conflict
                    if (str(obj1['id']) in obj2['Restrictions'] or 
                        str(obj2['id']) in obj1['Restrictions']):
                        # Record conflicts between all tag pairs
                        for tag1 in obj1['types']:
                            for tag2 in obj2['types']:
                                if tag1 < tag2:  # Ensure consistent ordering
                                    tag_conflicts[tag1][tag2] += 1
                                    tag_pairs.append((tag1, tag2))
                                elif tag2 < tag1:
                                    tag_conflicts[tag2][tag1] += 1
                                    tag_pairs.append((tag2, tag1))
    
    return tag_conflicts, tag_pairs

def analyze_objective_frequencies(iterations=10000):
    default_csv = find_csv_file()
    if not default_csv:
        print("No CSV file found in the current directory.")
        return
    
    print(f"Using CSV file: {default_csv}")
    bingo_list = csv_to_bingo_json(default_csv)
    
    if bingo_list is None:
        print("Failed to generate bingo list. Exiting.")
        return

    tag_limits = {
        "Whirlwind": "5", "Lash": "2", "Pound": "2", "Scoop": "2", "Reveal": "2",
        "Douse": "2", "Frost": "2", "Growth": "1", "Cyclone": "2", "Sand": "2",
        "Parch": "2", "Burst": "2", "Grind": "-", "Hover": "-",
        "Lift": "1", "Carry": "1", "Force": "1", "Blaze": "2", "Teleport": "2",
        "Mind Read": "1", "RarePsy": "1"
    }

    freq_hard = defaultdict(int)
    freq_normal = defaultdict(int)
    objective_details = {}

    for classification, objectives in bingo_list.items():
        for obj in objectives:
            objective_details[obj['name']] = {
                'id': obj['id'],
                'classification': classification,
                'bucket': classify_into_buckets(classification),
                'core_tags': ', '.join(obj['types']),
                'supp_tags': ', '.join(obj['SuppTags']),
                'restrictions': ', '.join(obj['Restrictions'])
            }
    
    # Add violation tracking
    hard_mode_violations = defaultdict(int)
    hard_mode_rerolls = 0
    normal_mode_violations = defaultdict(int)
    normal_mode_rerolls = 0

    print("\nRunning bucket mode + hard mode + boss objectives...")
    for i in range(iterations):
        objectives, violations, rerolls = select_random_objectives(
            deepcopy(bingo_list),
            race_mode=False,
            remove_easy=False,
            harder_board=False,
            tag_limits=tag_limits,
            bucket_mode=True,
            bucket_hard_mode=True,
            exclude_boss_objectives=True,
            track_violations=True
        )
        for obj in objectives:
            freq_hard[obj['name']] += 1
        for tag, count in violations.items():
            hard_mode_violations[tag] += count
        hard_mode_rerolls += rerolls
        print_progress(i + 1, iterations)

    print("\nRunning bucket mode + normal mode + boss objectives...")
    for i in range(iterations):
        objectives, violations, rerolls = select_random_objectives(
            deepcopy(bingo_list),
            race_mode=False,
            remove_easy=False,
            harder_board=False,
            tag_limits=tag_limits,
            bucket_mode=True,
            bucket_hard_mode=False,
            exclude_boss_objectives=True,
            track_violations=True
        )
        for obj in objectives:
            freq_normal[obj['name']] += 1
        for tag, count in violations.items():
            normal_mode_violations[tag] += count
        normal_mode_rerolls += rerolls
        print_progress(i + 1, iterations)
    
    fieldnames = ['Objective', 'ID', 'Classification', 'Bucket', 'Core Tags', 'Supplementary Tags', 
                 'Restrictions', 'Selection Count', 'Selection Frequency']

    with open('hard_mode_frequencies.csv', 'w', newline='', encoding='utf-8') as f:
        writer = csv.DictWriter(f, fieldnames=fieldnames)
        writer.writeheader()
        for name, count in sorted(freq_hard.items(), key=lambda x: (-x[1], x[0])):
            details = objective_details[name]
            writer.writerow({
                'Objective': name,
                'ID': details['id'],
                'Classification': details['classification'],
                'Bucket': details['bucket'],
                'Core Tags': details['core_tags'],
                'Supplementary Tags': details['supp_tags'],
                'Restrictions': details['restrictions'],
                'Selection Count': count,
                'Selection Frequency': f"{count/iterations:.1%}"
            })

    with open('normal_mode_frequencies.csv', 'w', newline='', encoding='utf-8') as f:
        writer = csv.DictWriter(f, fieldnames=fieldnames)
        writer.writeheader()
        for name, count in sorted(freq_normal.items(), key=lambda x: (-x[1], x[0])):
            details = objective_details[name]
            writer.writerow({
                'Objective': name,
                'ID': details['id'],
                'Classification': details['classification'],
                'Bucket': details['bucket'],
                'Core Tags': details['core_tags'],
                'Supplementary Tags': details['supp_tags'],
                'Restrictions': details['restrictions'],
                'Selection Count': count,
                'Selection Frequency': f"{count/iterations:.1%}"
            })

    # Add tag conflict analysis
    print("\nAnalyzing tag conflicts...")
    tag_conflicts, tag_pairs = analyze_tag_conflicts(bingo_list)
    
    # Sort conflicts by frequency
    conflict_summary = []
    for tag1 in tag_conflicts:
        for tag2, count in tag_conflicts[tag1].items():
            conflict_summary.append({
                'Tag1': tag1,
                'Tag2': tag2,
                'Conflict_Count': count
            })
    
    # Sort by conflict count in descending order
    conflict_summary.sort(key=lambda x: (-x['Conflict_Count'], x['Tag1'], x['Tag2']))
    
    # Write tag conflicts to CSV
    with open('tag_conflicts.csv', 'w', newline='', encoding='utf-8') as f:
        writer = csv.DictWriter(f, fieldnames=['Tag1', 'Tag2', 'Conflict_Count'])
        writer.writeheader()
        for conflict in conflict_summary:
            writer.writerow(conflict)
    
    # Print top conflicts
    print("\nTop 10 Most Conflicted Tag Pairs:")
    for i, conflict in enumerate(conflict_summary[:10], 1):
        print(f"{i}. {conflict['Tag1']} vs {conflict['Tag2']}: {conflict['Conflict_Count']} conflicts")
    
    # Calculate total conflicts per tag
    total_conflicts_per_tag = defaultdict(int)
    for conflict in conflict_summary:
        total_conflicts_per_tag[conflict['Tag1']] += conflict['Conflict_Count']
        total_conflicts_per_tag[conflict['Tag2']] += conflict['Conflict_Count']
    
    # Sort tags by total conflicts
    sorted_tag_conflicts = sorted(total_conflicts_per_tag.items(), key=lambda x: (-x[1], x[0]))
    
    # Write violation statistics
    with open('tag_limit_violations.csv', 'w', newline='', encoding='utf-8') as f:
        writer = csv.DictWriter(f, fieldnames=['Tag', 'Limit', 'Hard_Mode_Violations', 
                                             'Hard_Mode_Rate', 'Normal_Mode_Violations', 
                                             'Normal_Mode_Rate'])
        writer.writeheader()
        all_tags = set(hard_mode_violations.keys()) | set(normal_mode_violations.keys())
        for tag in sorted(all_tags):
            writer.writerow({
                'Tag': tag,
                'Limit': tag_limits[tag],
                'Hard_Mode_Violations': hard_mode_violations[tag],
                'Hard_Mode_Rate': f"{hard_mode_violations[tag]/hard_mode_rerolls:.1%}" if hard_mode_rerolls else "0%",
                'Normal_Mode_Violations': normal_mode_violations[tag],
                'Normal_Mode_Rate': f"{normal_mode_violations[tag]/normal_mode_rerolls:.1%}" if normal_mode_rerolls else "0%"
            })

    print("\nTag Limit Violation Statistics:")
    print(f"\nHard Mode - Total rerolls needed: {hard_mode_rerolls}")
    print("\nTop 10 Most Violated Tag Limits (Hard Mode):")
    for i, (tag, violations) in enumerate(sorted(hard_mode_violations.items(), 
                                               key=lambda x: (-x[1], x[0]))[:10], 1):
        print(f"{i}. {tag} (limit: {tag_limits[tag]}): {violations} violations "
              f"({violations/hard_mode_rerolls:.1%} of rerolls)")

    print(f"\nNormal Mode - Total rerolls needed: {normal_mode_rerolls}")
    print("\nTop 10 Most Violated Tag Limits (Normal Mode):")
    for i, (tag, violations) in enumerate(sorted(normal_mode_violations.items(), 
                                               key=lambda x: (-x[1], x[0]))[:10], 1):
        print(f"{i}. {tag} (limit: {tag_limits[tag]}): {violations} violations ")
    
    print("\nAnalysis complete!")
    print("Results saved to:")
    print("- hard_mode_frequencies.csv")
    print("- normal_mode_frequencies.csv")
    print("- tag_conflicts.csv")
    print("- tag_conflict_summary.csv")
    
    print("\nSummary Statistics:")
    print("\nHard Mode:")
    print(f"Total unique objectives selected: {len(freq_hard)}")
    print(f"Most common objective: {max(freq_hard.items(), key=lambda x: x[1])[0]} "
          f"({max(freq_hard.items(), key=lambda x: x[1])[1]/iterations:.1%})")
    print(f"Least common objective: {min(freq_hard.items(), key=lambda x: x[1])[0]} "
          f"({min(freq_hard.items(), key=lambda x: x[1])[1]/iterations:.1%})")
    
    print("\nNormal Mode:")
    print(f"Total unique objectives selected: {len(freq_normal)}")
    print(f"Most common objective: {max(freq_normal.items(), key=lambda x: x[1])[0]} "
          f"({max(freq_normal.items(), key=lambda x: x[1])[1]/iterations:.1%})")
    print(f"Least common objective: {min(freq_normal.items(), key=lambda x: x[1])[0]} "
          f"({min(freq_normal.items(), key=lambda x: x[1])[1]/iterations:.1%})")

if __name__ == "__main__":
    analyze_objective_frequencies()

Using CSV file: Bingo Eval.csv

Running bucket mode + hard mode + boss objectives...
Progress: |██████████████████████████████████████████████████| 100.0%

Running bucket mode + normal mode + boss objectives...
Progress: |██████████████████████████████████████████████████| 100.0%

Analyzing tag conflicts...

Top 10 Most Conflicted Tag Pairs:
1. Class vs Djinn_c: 242 conflicts
2. Collect_s vs Summon: 160 conflicts
3. Forge vs Shopping: 144 conflicts
4. Character vs Djinn_c: 142 conflicts
5. Character vs Collect_c: 138 conflicts
6. Character vs Inventory: 138 conflicts
7. Battle vs Summon: 130 conflicts
8. Djinn_c vs Summon: 130 conflicts
9. Battle vs Djinn_c: 112 conflicts
10. Collect_c vs Inventory: 90 conflicts

Tag Limit Violation Statistics:

Hard Mode - Total rerolls needed: 254576

Top 10 Most Violated Tag Limits (Hard Mode):
1. Pound (limit: 2): 43625 violations (17.1% of rerolls)
2. Lift (limit: 1): 40401 violations (15.9% of rerolls)
3. Reveal (limit: 2): 34248 violations (13.5