In [1]:
import json
import math
import numpy as np
from scipy.optimize import linear_sum_assignment

def load_data(filename):
    """Liest die JSON-Datei ein und gibt die Daten als Dictionary zurück."""
    with open(filename, 'r') as f:
        return json.load(f)

def preprocess(data):
    """
    Extrahiert aus den Daten:
      - reale Teilnehmer (Männer und Frauen),
      - harte Constraints aus den Truth Booths:
          * must_match: Falls ein perfektes Match bestätigt wurde, wird in must_match[man] die entsprechende Frau gespeichert.
          * cannot_match: Für jeden Mann wird eine Menge von Frauen aufgebaut, die ausgeschlossen wurden.
      - weiche Constraints aus den Matching Ceremonies:
          Es wird eine Liste erstellt, in der zu jeder Ceremony der erwartete Score sowie die betrachteten Paare gespeichert werden.
      - known_matching: Die Ground Truth der perfekten Matches (optional).
    """
    participants = data["participants"]
    real_men = participants["men"]
    real_women = participants["women"]

    must_match = {}  # man -> woman (bei is_match==True)
    cannot_match = {man: set() for man in real_men}

    for episode in data["episodes"]:
        truth_booths = episode.get("truth_booths")
        if truth_booths:
            for booth in truth_booths:
                man = booth["man"]
                woman = booth["woman"]
                if booth["is_match"]:
                    must_match[man] = woman
                else:
                    cannot_match[man].add(woman)

    matching_ceremonies = []
    for episode in data["episodes"]:
        ceremony = episode.get("match_ceremony")
        if ceremony:
            score = ceremony["score"]
            pairs = [(p["man"], p["woman"]) for p in ceremony["pairs"]]
            matching_ceremonies.append({"score": score, "pairs": pairs})

    # Extrahiere die Ground Truth, falls vorhanden.
    known_matching = data.get("known_matching", {})
    
    return real_men, real_women, must_match, cannot_match, matching_ceremonies, known_matching

def backtrack(men, available_women, current_assignment, must_match, cannot_match,
              matching_ceremonies, lambda_value, pair_weights, total_weight_container, solutions, depth=0):
    if not men:
        # Prüfe, ob in einer Ceremony mit Score 0 ein unbestätigtes Paar auftaucht (Blackout).
        blackout_violation = False
        for ceremony in matching_ceremonies:
            if ceremony["score"] == 0:
                for (m, w) in ceremony["pairs"]:
                    if current_assignment.get(m) == w and not (m in must_match and must_match[m] == w):
                        blackout_violation = True
                        break
                if blackout_violation:
                    break
        if blackout_violation:
            weight = 0.0
        else:
            cost = 0
            for ceremony in matching_ceremonies:
                expected = ceremony["score"]
                confirmed = 0
                candidate_score = 0
                for (m, w) in ceremony["pairs"]:
                    if m in must_match and must_match[m] == w:
                        confirmed += 1
                    if current_assignment.get(m) == w:
                        candidate_score += 1
                effective_expected = expected - confirmed
                effective_candidate = candidate_score - confirmed
                cost += (effective_candidate - effective_expected) ** 2
            weight = math.exp(-lambda_value * cost)
        for man, woman in current_assignment.items():
            pair_weights[(man, woman)] += weight
        total_weight_container[0] += weight
        if weight > 0:
            solutions.append((current_assignment.copy(), weight))
        return

    next_man = men[0]
    if next_man in must_match:
        candidate_woman = must_match[next_man]
        if candidate_woman in available_women:
            if candidate_woman in cannot_match.get(next_man, set()):
                return
            new_assignment = current_assignment.copy()
            new_assignment[next_man] = candidate_woman
            new_available = available_women.copy()
            new_available.remove(candidate_woman)
            backtrack(men[1:], new_available, new_assignment, must_match,
                      cannot_match, matching_ceremonies, lambda_value, pair_weights, total_weight_container, solutions, depth+1)
        else:
            return
    else:
        for woman in list(available_women):
            if woman in cannot_match.get(next_man, set()):
                continue
            new_assignment = current_assignment.copy()
            new_assignment[next_man] = woman
            new_available = available_women.copy()
            new_available.remove(woman)
            backtrack(men[1:], new_available, new_assignment, must_match,
                      cannot_match, matching_ceremonies, lambda_value, pair_weights, total_weight_container, solutions, depth+1)

def apply_blackout_rules(probabilities, matching_ceremonies, must_match):
    for ceremony in matching_ceremonies:
        expected = ceremony["score"]
        confirmed = sum(1 for (m, w) in ceremony["pairs"] if m in must_match and must_match[m] == w)
        if expected - confirmed == 0:
            for (m, w) in ceremony["pairs"]:
                if not (m in must_match and must_match[m] == w):
                    if (m, w) in probabilities:
                        probabilities[(m, w)] = 0.0

def simulate_run(must_match, cannot_match, matching_ceremonies, real_men, real_women, lambda_value):
    """
    Führt eine exhaustive Lösungssuche (Backtracking) durch, speichert alle gefundenen Lösungen
    und berechnet ein Dictionary der Wahrscheinlichkeiten für reale Paare (ohne Dummies).
    """
    if len(real_men) < len(real_women):
        dummy_count = len(real_women) - len(real_men)
        men = real_men[:] + [f"dummy_man_{i}" for i in range(1, dummy_count + 1)]
        women = real_women[:]
    elif len(real_women) < len(real_men):
        dummy_count = len(real_men) - len(real_women)
        women = real_women[:] + [f"dummy_woman_{i}" for i in range(1, dummy_count + 1)]
        men = real_men[:]
    else:
        men = real_men[:]
        women = real_women[:]

    pair_weights = {}
    for man in men:
        for woman in women:
            pair_weights[(man, woman)] = 0.0
    total_weight_container = [0.0]
    solutions = []
    backtrack(men, set(women), {}, must_match, cannot_match, matching_ceremonies, lambda_value, pair_weights, total_weight_container, solutions)
    total_weight = total_weight_container[0]
    probabilities = {}
    for (man, woman), weight in pair_weights.items():
        if total_weight > 0:
            prob = weight / total_weight
        else:
            prob = 0.0
        if "dummy" not in man and "dummy" not in woman:
            probabilities[(man, woman)] = prob
    apply_blackout_rules(probabilities, matching_ceremonies, must_match)
    return probabilities, men, women, solutions

def extract_final_matching(probabilities, real_men, real_women):
    """
    Extrahiert ein finales Matching aus dem Wahrscheinlichkeits-Dictionary.
    Dazu werden Dummy-Einträge hinzugefügt, um die Listen quadratisch zu machen,
    und anschließend der Ungarische Algorithmus (linear_sum_assignment) angewendet.
    Dummy-Zuordnungen werden anschließend herausgefiltert.
    """
    men = real_men.copy()
    women = real_women.copy()
    if len(men) < len(women):
        for i in range(len(women) - len(men)):
            men.append(f"dummy_man_{i+1}")
    elif len(women) < len(men):
        for i in range(len(men) - len(women)):
            women.append(f"dummy_woman_{i+1}")
    n = len(men)
    cost_matrix = np.zeros((n, n))
    for i, man in enumerate(men):
        for j, woman in enumerate(women):
            cost_matrix[i, j] = -probabilities.get((man, woman), 0)
    row_ind, col_ind = linear_sum_assignment(cost_matrix)
    final_matching = {}
    for i, j in zip(row_ind, col_ind):
        if men[i].startswith("dummy") or women[j].startswith("dummy"):
            continue
        final_matching[men[i]] = women[j]
    return final_matching

def compute_matching_error(predicted_matching, known_matching):
    """
    Vergleicht das vom Modell extrahierte Matching mit der Ground Truth (known_matching)
    und zählt die Anzahl der Paare, die abweichen.
    """
    error = 0
    for man, woman in known_matching.items():
        if predicted_matching.get(man) != woman:
            error += 1
    return error

def evaluate_model(lambda_value, known_matching, must_match, cannot_match, matching_ceremonies, real_men, real_women):
    """
    Führt einen Simulationslauf mit dem gegebenen lambda_value durch, extrahiert das finale Matching
    und berechnet einen Fehlerwert anhand der bekannten perfekten Matches.
    """
    probabilities, _, _, _ = simulate_run(must_match, cannot_match, matching_ceremonies, real_men, real_women, lambda_value)
    final_matching = extract_final_matching(probabilities, real_men, real_women)
    error = compute_matching_error(final_matching, known_matching)
    return error, final_matching

def parameter_optimization(known_matching, must_match, cannot_match, matching_ceremonies, real_men, real_women):
    """
    Optimiert den Parameter lambda, indem verschiedene Werte getestet werden.
    Gibt den optimalen lambda-Wert, den minimalen Fehler und das finale Matching zurück.
    """
    best_lambda = None
    best_error = float('inf')
    best_matching = None
    # Beispiel: Teste lambda im Bereich 0.1 bis 2.0 in Schritten von 0.1
    for lambda_val in np.arange(0.1, 2.1, 0.1):
        error, matching = evaluate_model(lambda_val, known_matching, must_match, cannot_match, matching_ceremonies, real_men, real_women)
        print(f"Lambda: {lambda_val:.1f}, Matching Error: {error}")
        if error < best_error:
            best_error = error
            best_lambda = lambda_val
            best_matching = matching
    return best_lambda, best_error, best_matching

def print_summary(real_men, must_match, cannot_match, probabilities):
    """
    Gibt eine Zusammenfassung aus:
      1. Perfekte Matches (aus must_match)
      2. Ausgeschlossene Paarungen (aus cannot_match)
      3. Mögliche Paarungen (Paare, die weder ausgeschlossen noch als perfekt fixiert sind)
    """
    print("\n=== Zusammenfassung ===\n")
    print("Perfekte Matches:")
    for man, woman in must_match.items():
        print(f"{man} - {woman}")
    print("\nAusgeschlossene Paarungen:")
    for man in real_men:
        for woman in cannot_match.get(man, []):
            print(f"{man} - {woman}")
    print("\nMögliche Paarungen:")
    for (man, woman), prob in probabilities.items():
        if man in must_match and must_match[man] == woman:
            continue
        if woman in cannot_match.get(man, set()):
            continue
        if prob > 0:
            print(f"{man} - {woman}: {prob:.4f}")

def print_final_matching(final_matching):
    print("\n=== Finales Matching ===\n")
    for man, woman in final_matching.items():
        print(f"{man} - {woman}")

def print_parameter_optimization_result(best_lambda, best_error, best_matching):
    print("\n=== Parameteroptimierung ===\n")
    print(f"Optimaler Lambda-Wert: {best_lambda:.2f}")
    print(f"Matching Error: {best_error}")
    print("Finales Matching (aus der Optimierung):")
    for man, woman in best_matching.items():
        print(f"{man} - {woman}")

def main():
    data = load_data("Season_4.json")
    real_men, real_women, must_match, cannot_match, matching_ceremonies, known_matching = preprocess(data)
    # Falls keine Ground Truth vorhanden ist, kannst du known_matching leer lassen:
    """if not known_matching:
        print("Keine Ground Truth (known_matching) gefunden. Bitte in der JSON-Datei einbauen!")
        return"""

    # Zunächst eine exhaustive Lösungssuche (optional, um einen Eindruck vom Lösungsraum zu bekommen):
    probabilities, _, _, solutions = simulate_run(must_match, cannot_match, matching_ceremonies, real_men, real_women, lambda_value=1.0)
    print(f"\nAnzahl gefundener Lösungen: {len(solutions)}")
    print_summary(real_men, must_match, cannot_match, probabilities)

    # Nun: Parameteroptimierung mit der bekannten Ground Truth
    best_lambda, best_error, best_matching = parameter_optimization(known_matching, must_match, cannot_match, matching_ceremonies, real_men, real_women)
    print_parameter_optimization_result(best_lambda, best_error, best_matching)

    # Extrahiere abschließend ein finales Matching basierend auf den finalen Wahrscheinlichkeiten.
    final_matching = extract_final_matching(probabilities, real_men, real_women)
    print_final_matching(final_matching)

if __name__ == "__main__":
    main()



Anzahl gefundener Lösungen: 131280

=== Zusammenfassung ===

Perfekte Matches:
cris - stefanie
ken - caroline

Ausgeschlossene Paarungen:
burim - dorna
burim - carina
joel - juliette
kenneth - dorna
marwin - valeria
marwin - dorna
max - valeria
pascal - carina

Mögliche Paarungen:
barkin - aurelia: 0.1170
barkin - carina: 0.2707
barkin - henna: 0.0642
barkin - larissa: 0.1177
barkin - valeria: 0.1538
barkin - vanessa: 0.0772
burim - aurelia: 0.1496
burim - juliette: 0.3465
burim - larissa: 0.1651
burim - vanessa: 0.1314
deniz - aurelia: 0.1079
deniz - carina: 0.2105
deniz - dorna: 0.1439
deniz - juliette: 0.0922
deniz - larissa: 0.2447
deniz - valeria: 0.0903
joel - carina: 0.1549
joel - henna: 0.0331
joel - larissa: 0.1759
joel - valeria: 0.5168
joel - vanessa: 0.0093
kenneth - aurelia: 0.0824
kenneth - henna: 0.6527
kenneth - larissa: 0.0765
kenneth - valeria: 0.0752
kenneth - vanessa: 0.0191
marwin - henna: 0.0974
marwin - juliette: 0.1765
marwin - larissa: 0.0744
marwin - vanessa:

Sasa und Vanessa kann kein Paar sein, da bei deren Paarung ein Blackout war. 