In [8]:
import plotly.graph_objects as go

# Globale Liste aller möglichen Knoten
labels = ["Humira V1", "Stelara V1", 
          "Humira V2", "Amgevita V2",
          "Humira V3", "Amgevita V3", "Therapie beendet V3"]

# Links definieren
source = [0, 1, 2, 3]   # Indizes in labels
target = [2, 3, 4, 6]
values = [50, 30, 40, 10]

# Sankey bauen
fig = go.Figure(data=[go.Sankey(
    node=dict(label=labels, pad=15, thickness=20),
    link=dict(source=source, target=target, value=values)
)])

fig.show()


In [9]:
import plotly.graph_objects as go
from collections import Counter
import json
import re

# import sqlite3
# db_target = r'C:\xampp\htdocs\MIQ_projects\cedur\db\cedur.sqlite3'
# conn = sqlite3.connect(db_target)
# conn.row_factory = sqlite3.Row 
# cursor = conn.cursor()


import mysql.connector
mysql_conn = mysql.connector.connect(
    host="localhost",
    user="root",
    password="",
    database="cedur",
    charset="utf8mb4",     # wichtig für Emojis
    use_unicode=True       # stellt sicher, dass Python Unicode-Strings nutzt
)


def clean_sequence(seq):
    """Entfernt aufeinanderfolgende Duplikate"""
    cleaned = []
    for med in seq:
        if not cleaned or cleaned[-1] != med:
            cleaned.append(med)
    return cleaned

def normalize_med(med):
    """
    Gruppiert nach Wirkstoff, z. B. "Remsima (Infliximab)" → "Infliximab"
    Falls kein Klammerinhalt vorhanden ist → original zurückgeben.
    """
    match = re.search(r"\(([^)]+)\)", med)
    if match:
        return match.group(1).strip()  # nur Wirkstoff
    return med.strip()

def process_sequences(data):
    """Bereinigt und normalisiert alle Patienten-Sequenzen"""
    cleaned_data = {}
    for pid, meds in data.items():
        # Schritt 1: Duplikate entfernen
        seq = clean_sequence(meds)
        # Schritt 2: auf Wirkstoff-Ebene normalisieren
        seq = [normalize_med(m) for m in seq]
        # Schritt 3: erneut Duplikate entfernen (falls durch Normalisierung neue Doppelte entstehen)
        seq = clean_sequence(seq)
        cleaned_data[pid] = seq
    return cleaned_data


# Alle Patienten-FCIDs holen
# cursor.execute("SELECT * FROM forms_10020 WHERE 1 ORDER BY fcid ") # LIMIT 0,10000
# rows = cursor.fetchall()
# results_as_dicts = [dict(row) for row in rows]
# med_a = {}
# if results_as_dicts:
#     for row_dict in results_as_dicts:
#         fcid = row_dict['fcid']
#         fid = row_dict['fid']
#         fcont = row_dict['fcont']

#         if fcid not in med_a:
#             med_a[fcid] = {}
#         med_a[fcid][fid] = fcont

cursor = mysql_conn.cursor(dictionary=True)
cursor.execute("SELECT * FROM forms_10020 ORDER BY fcid")
rows = cursor.fetchall()

med_a = {}
for row in rows:
    fcid = row['fcid']
    fid = row['fid']
    fcont = row['fcont']

    if fcid not in med_a:
        med_a[fcid] = {}
    med_a[fcid][fid] = fcont



immunsenker = ["Filgotinib","Upadacitinib","Etrasimod","Tofacitinib","Ozanimod"]
# immunsenker = ["Azathioprin","Mercaptopurin","Methotrexat","Puri-Nethol","Jyseleca (Filgotinib)","Rinvoq (Upadacitinib)","Velsipity (Etrasimod)","Xeljanz (Tofacitinib)","Zeposia (Ozanimod)","andere Immunsenker"]

patients = {}
for fcont in med_a.values():
    if 90 in fcont and 10020040 in fcont and 10020020 in fcont:
        # if fcont[10020020]=='Biologika':
        if fcont[10020020] == 'Immunsenker':
            # print(fcont[10020020])
            patients.setdefault(fcont[90], []).append(fcont[10020040])

print(json.dumps(patients, indent=2, ensure_ascii=False))



transitions = Counter()
labels = []
for meds in patients.values():
    for i in range(len(meds)-1):
        src = f"{meds[i]}_T{i}"
        dst = f"{meds[i+1]}_T{i+1}"
        transitions[(src, dst)] += 1
        labels.extend([src, dst])

labels = list(dict.fromkeys(labels))  # Reihenfolge behalten, Duplikate raus
label_index = {lab: i for i, lab in enumerate(labels)}

source = [label_index[s] for s, t in transitions]
target = [label_index[t] for s, t in transitions]
values = list(transitions.values())


cleaned_data = patients

# Alle Patientensequenzen bereinigen
cleaned_data = {pid: clean_sequence(meds) for pid, meds in patients.items()}

cleaned_data = process_sequences(cleaned_data)


print(json.dumps(cleaned_data, indent=2, ensure_ascii=False))

{
  "2013010715082300": [
    "Azathioprin"
  ],
  "2013062009274100": [
    "Azathioprin"
  ],
  "2013062112053800": [
    "Azathioprin"
  ],
  "2013062110430800": [
    "Azathioprin"
  ],
  "2013070109201800": [
    "Azathioprin"
  ],
  "2013070214004100": [
    "Azathioprin"
  ],
  "2013070315190100": [
    "Azathioprin"
  ],
  "2013070814213300": [
    "Azathioprin"
  ],
  "2013071612395700": [
    "Azathioprin",
    "Azathioprin",
    "Azathioprin",
    "Azathioprin",
    "Azathioprin",
    "Azathioprin",
    "Azathioprin",
    "Azathioprin",
    "Azathioprin",
    "Azathioprin",
    "Azathioprin"
  ],
  "2013071610190400": [
    "Azathioprin",
    "Azathioprin"
  ],
  "2013072209082900": [
    "Azathioprin",
    "Azathioprin",
    "Azathioprin",
    "Azathioprin",
    "Azathioprin",
    "Rinvoq (Upadacitinib)"
  ],
  "2013072310295900": [
    "Azathioprin",
    "Azathioprin"
  ],
  "2013072310581200": [
    "Azathioprin",
    "Azathioprin",
    "Azathioprin",
    "Azathioprin"
  

In [10]:

# Beispiel-Daten: jeder Patient hat eine Liste von Medikamenten in Reihenfolge
# patients = {
#     "P1": ["Humira", "Hulio", "Yuflyma"],
#     "P2": ["Humira", "Imraldi", "Imraldi", "Imraldi", "Imraldi", "Imraldi", "Imraldi", "Imraldi", "Yuflyma", "Hyrimoz", "Idacio"],
#     "P3": ["Humira", "Yuflyma"],
# }

patients = cleaned_data

# Übergänge zählen
transitions = Counter()
for meds in patients.values():
    for i in range(len(meds) - 1):
        transitions[(meds[i], meds[i+1])] += 1

# Labels bestimmen
labels = list(set([src for src, _ in transitions] + [dst for _, dst in transitions]))
label_index = {label: i for i, label in enumerate(labels)}

# Quellen, Ziele und Werte für Sankey
source = [label_index[src] for (src, dst) in transitions]
target = [label_index[dst] for (src, dst) in transitions]
values = list(transitions.values())

# Sankey-Diagramm erstellen
fig = go.Figure(data=[go.Sankey(
    node=dict(
        pad=20,
        thickness=20,
        line=dict(color="black", width=0.5),
        label=labels,
    ),
    link=dict(
        source=source,
        target=target,
        value=values,
    )
)])

fig.update_layout(title_text="Wechsel der Medikation (CED-Patienten)", font_size=12)
fig.show()


In [11]:
import plotly.graph_objects as go

fig = go.Figure(data=[go.Sankey(
    arrangement="fixed",  # keine automatische Neuordnung
    node=dict(
        pad=20,
        thickness=20,
        line=dict(color="black", width=0.5),
        label=labels,
        x=[0.1, 0.3, 0.5, 0.7, 0.9],  # Positionen von links nach rechts
        y=[0.5]*len(labels)            # alles auf gleicher Höhe
    ),
    link=dict(
        source=source,
        target=target,
        value=values,
    )
)])
fig.update_layout(title_text="Medikationswechsel", font_size=12)
fig.show()

In [12]:
import plotly.graph_objects as go
import json
from collections import Counter


# Übergänge zählen
transitions = Counter()
labels = []
for meds in cleaned_data.values():
    for i in range(len(meds) - 1):
        src = f"{meds[i]}_T{i}"
        dst = f"{meds[i+1]}_T{i+1}"
        transitions[(src, dst)] += 1
        labels.extend([src, dst])
    # print(meds,src,dst)
    # print(transitions)

count_border = 0
# Filter: nur Übergänge mit Count > 1
transitions = {k: v for k, v in transitions.items() if v > count_border}

# Labels bauen
labels = list(dict.fromkeys([lab for s, t in transitions for lab in (s, t)]))
label_index = {lab: i for i, lab in enumerate(labels)}

source = [label_index[s] for s, t in transitions]
target = [label_index[t] for s, t in transitions]
values = list(transitions.values())

# Sankey-Plot erstellen
fig = go.Figure(data=[go.Sankey(
    arrangement="snap",
    node=dict(
        pad=20,
        thickness=20,
        line=dict(color="black", width=0.5),
        label=labels,
    ),
    link=dict(
        source=source,
        target=target,
        value=values,
    )
)])

fig.update_layout(title_text="Medikationswechsel (nur Counts > " + str(count_border) + ")", font_size=12)

# Interaktive HTML-Datei speichern
fig.write_html("sankey_medikation.html", include_plotlyjs="cdn")
print("✅ Sankey-Plot gespeichert: sankey_medikation.html")


fig.update_layout(title_text="Medikationswechsel (nur Counts > " + str(count_border) + ")", font_size=12)
fig.show()

✅ Sankey-Plot gespeichert: sankey_medikation.html
