In [71]:
import pandas as pd
import plotly.graph_objects as go
import plotly.express as px

# Bar Chart

In [72]:
# Step 1: Read the CSV file
df = pd.read_csv('bar_assignment.csv')

# Step 2: Transform 1 -> "Yes" and 0 -> "No"
df['COUNT'] = df['COUNT'].replace({1: 'Yes', 0: 'No'})

# Step 3: Group by LABEL and COUNT, then count occurrences
grouped_df = df.groupby(['LABEL', 'COUNT']).size().reset_index(name='TOTAL')

# Step 4: Sort the DataFrame alphabetically by LABEL
grouped_df = grouped_df.sort_values(by='LABEL')

# Step 5: Create the horizontal stacked bar chart
fig = px.bar(
    grouped_df, 
    x='TOTAL', 
    y='LABEL', 
    color='COUNT', 
    orientation='h', 
    barmode='stack',
    labels={'TOTAL': 'Count', 'LABEL': 'Label', 'COUNT': 'Response'},
    title='Horizontal Stacked Bar Chart: Yes/No Responses by Label',
    color_discrete_map={'Yes': 'blue', 'No': 'red'}  # Set colors for Yes and No
)

# Step 6: Move the legend to the top-left and arrange it horizontally
fig.update_layout(
    legend=dict(
        title='Response',
        orientation='h',  # Horizontal legend
        yanchor='bottom',  # Anchor legend at the bottom
        y=1.02,  # Position legend above the chart
        xanchor='left',  # Anchor legend to the left
        x=0  # Position legend at the left
    )
)

# Step 7: Add bar labels (values) at the tail end of each bar
fig.update_traces(
    textposition='inside',  # Place text outside the bars
    texttemplate='%{x}',  # Display the value of each bar segment
)

# Step 8: Force alphabetical sorting on the y-axis
fig.update_yaxes(categoryorder='array', categoryarray=sorted(grouped_df['LABEL'].unique()))

# Show the plot
fig.show()

# Sankey Diagram

In [73]:
df = pd.read_csv('sankey_assignment.csv')

# Define node layers
first_layer = ['PS', 'OMP', 'CNP', 'NRP', 'NMCCC', 'PEC', 'NCDM', 'RGS']
labels = df['LABEL'].tolist()
second_layer = ['Reg', 'Aca', 'Oth']

# Define colors to match the image
node_colors = {
    'OMP': '#54AFA9',
    'PS': '#F2A481',
    'CNP': '#F09135',
    'RGS': '#AD5ECD',
    'NRP': '#F285A6',
    'NCDM': '#F9D749',
    'NMCCC': '#A1D490',
    'PEC': '#96CDF6'
}

label_colors = {
    'S': '#87CEFA',
    'I': '#00BFFF',
    'D': '#5E9EA0',
    'F': '#4781B4',
    'N': '#6394ED'
}

# Define second-layer colors based on the image
second_layer_colors = {
  'Aca': '#90EE90',
  'Reg': '#3CB371',
  'Oth': '#32CD32'
}

# Create a list of all unique nodes
nodes = first_layer + labels + second_layer
node_indices = {node: idx for idx, node in enumerate(nodes)}

# Assign colors to all nodes
node_color_list = [node_colors.get(node, label_colors.get(node, second_layer_colors.get(node, 'black'))) for node in nodes]

# Generate links: First Layer -> Labels -> Second Layer
sources, targets, values, colors = [], [], [], []

for _, row in df.iterrows():
    label = row['LABEL']
    for fl in first_layer:
        if row[fl] > 0:
            sources.append(node_indices[fl])
            targets.append(node_indices[label])
            values.append(row[fl])
            colors.append(node_colors[fl])  # Source color
    for sl in second_layer:
        if row[sl] > 0:
            sources.append(node_indices[label])
            targets.append(node_indices[sl])
            values.append(row[sl])
            colors.append(label_colors[label])  # Label color instead of second-layer color

# Create the Sankey diagram
fig = go.Figure(go.Sankey(
    node=dict(
        pad=5,
        thickness=20,
        line=dict(color='black', width=0.5),
        label=nodes,
        color=node_color_list  # Apply custom node colors
    ),
    link=dict(
        source=sources,
        target=targets,
        value=values,
        color=colors  # Apply label colors to links
    )
))

fig.update_layout(
    title_text='Sankey Diagram',
    font=dict(family='Arial', size=12)
)

fig.show()


# Network Graph