### Sankey Assignment

In [None]:
import pandas as pd 
import plotly.express as px
import plotly.graph_objects as go
import plotly.io as pio
import random

# Load the dataset
df = pd.read_csv("sankey_assignment.csv")  

# Define the columns
source_columns = ['PS', 'OMP', 'CNP', 'NRP', 'NMCCC', 'PEC', 'NCDM', 'RGS']
target_columns = ['Reg', 'Aca', 'Oth']

df[source_columns + target_columns] = df[source_columns + target_columns].fillna(0).astype(int) # Ensure numeric values and fill NaN with 0
all_labels = list(set(df["LABEL"]).union(source_columns, target_columns)) # Create label list (remove duplicates)
label_indices = {label: idx for idx, label in enumerate(all_labels)} # Create mappings for indices
source_nodes, target_nodes, values = [], [], [] # Build the Sankey source and target lists

# Define custom colors (make sure you have enough)
custom_colors = [
    "#1F77B4",
    "#FF7F0E",
    "#2CA02C",
    "#D62728",
    "#9467BD",
    "#8C564B",
    "#E377C2",
    "#7F7F7F",
    "#BCBD22",
    "#17BECF",
    "#AEC7E8"
]

# Assign colors in sequence
link_colors = []

# Map sources to LABEL
color_index = 0  # Track color assignment
for _, row in df.iterrows():
    label = row["LABEL"]
    for source in source_columns:
        if row[source] > 0:
            source_nodes.append(label_indices[source])
            target_nodes.append(label_indices[label])
            values.append(row[source])
            link_colors.append(custom_colors[color_index % len(custom_colors)])  # Use modulo for looping colors
            color_index += 1  # Move to the next color

# Map LABEL to targets (Reg, Aca, Oth)
for _, row in df.iterrows():
    label = row["LABEL"]
    for target in target_columns:
        if row[target] > 0:
            source_nodes.append(label_indices[label])
            target_nodes.append(label_indices[target])
            values.append(row[target])
            link_colors.append(custom_colors[color_index % len(custom_colors)])
            color_index += 1

# Create the Sankey Diagram
fig = go.Figure(data=[go.Sankey(
    node=dict(
        pad=15,
        thickness=24,
        line=dict(color="black", width=0.5),
        label=all_labels,
    ),
    link=dict(
        source=source_nodes,
        target=target_nodes,
        value=values,
        color=link_colors
    )
)])

fig.update_layout(title="Sankey Diagram", font_size=14)

fig.show()