In [16]:
import json
import time
import pandas as pd

def jaccard_similarity(text1, text2):
    words_text1 = set(text1.split())
    words_text2 = set(text2.split())
    intersection = words_text1.intersection(words_text2)
    union = words_text1.union(words_text2)
    return len(intersection) / len(union) if len(union) > 0 else 0

with open('manuelle_bewertung.json', 'r') as file:
    data = json.load(file)

results = []

for i, pair in enumerate(data, 1):
    musterantwort = pair.get("Musterantwort", "")
    chatbotantwort = pair.get("ChatVGH", "")
    
    if not musterantwort or not chatbotantwort:
        continue
    
    start_time = time.perf_counter()
    
    jaccard_coefficient = jaccard_similarity(musterantwort, chatbotantwort)
    
    end_time = time.perf_counter()
    
    elapsed_time_ms = round((end_time - start_time) * 1000, 9) 
    
    results.append({
        "Vergleich": i,
        "Jaccard-Koeffizient": jaccard_coefficient,
        "Berechnungszeit (ms)": elapsed_time_ms
    })

df = pd.DataFrame(results)

print(df.to_string(index=False))

 Vergleich  Jaccard-Koeffizient  Berechnungszeit (ms)
         1             0.187050              0.113799
         2             0.300000              0.047200
         3             0.375000              0.025300
         4             0.540541              0.025900
         5             0.067568              0.035100
         6             0.091837              0.051100
         7             0.070588              0.034900
         8             0.968750              0.029800
         9             1.000000              0.020300
        10             0.306122              0.023700
        11             0.300000              0.043000
        12             0.948718              0.029600
        13             0.200000              0.086299


In [18]:
import json
import time
import pandas as pd
from Levenshtein import distance as levenshtein_distance

# Funktion zur Berechnung der Levenshtein-Distanz
def levenshtein_similarity(text1, text2):
    return levenshtein_distance(text1, text2)

# Lade die Datei
with open('manuelle_bewertung.json', 'r') as file:
    data = json.load(file)

# Liste zur Speicherung der Ergebnisse
results = []

# Iteriere über alle Paare in der Datei
for i, pair in enumerate(data, 1):
    musterantwort = pair.get("Musterantwort", "")
    chatbotantwort = pair.get("ChatVGH", "")
    
    if not musterantwort or not chatbotantwort:
        continue
    
    # Zeitmessung starten
    start_time = time.perf_counter()
    
    # Berechne die Levenshtein-Distanz
    levenshtein_dist = levenshtein_similarity(musterantwort, chatbotantwort)
    
    # Zeitmessung beenden
    end_time = time.perf_counter()
    
    # Berechnungszeit in Millisekunden
    elapsed_time_ms = round((end_time - start_time) * 1000, 9)
    
    # Füge das Ergebnis zur Liste hinzu
    results.append({
        "Vergleich": i,
        "Levenshtein-Distanz": levenshtein_dist,
        "Berechnungszeit (ms)": elapsed_time_ms
    })

# Erstelle einen DataFrame aus den Ergebnissen
df = pd.DataFrame(results)

# Ausgabe des DataFrames ohne Index
print(df.to_string(index=False))

 Vergleich  Levenshtein-Distanz  Berechnungszeit (ms)
         1                  666              0.123600
         2                  315              0.045200
         3                  169              0.021400
         4                  151              0.017500
         5                  764              0.029400
         6                  713              0.066800
         7                  499              0.036600
         8                   12              0.011500
         9                    0              0.001400
        10                  197              0.019000
        11                  314              0.958700
        12                    1              0.002700
        13                  837              0.237701
