In [1]:
from deepface import DeepFace
import pandas as pd
import os
import unicodedata
import matplotlib.pyplot as plt
import plotly.express as px
import plotly.graph_objects as go
import dash
from dash import dcc, html, Input, Output




In [2]:

df = pd.read_csv('deepface_emotions_combined.csv')

In [3]:
photo_counts = df['Person'].value_counts().reset_index()
photo_counts.columns = ['Person', 'Photo Count']

# Display the table
print(photo_counts)

                   Person  Photo Count
0          Thierry Baudet          224
1          Dilan Yesilgoz          136
2   Caroline van der Plas          110
3          Joost Eerdmans          106
4      Lilian Marijnissen           98
5          Henri Bontebal           93
6              Rob Jetten           88
7          Pieter Omtzigt           80
8         Esther Ouwehand           76
9           Geert Wilders           74
10     Stephan van Baarle           59
11          Mirjam Bikker           47
12       Frans Timmermans           39
13         Laurens Dassen           38
14        Wybren van Haga           23
15              Edson Olf           17
16    Kees van der Staaij            8


In [4]:
import pandas as pd
import plotly.express as px
import ipywidgets as widgets
from IPython.display import display

# Define emotion color palette
emotion_colors = {
    'angry': '#D33F49',
    'disgust': '#7DAF75',
    'fear': '#735D78',
    'happy': '#F4D35E',
    'sad': '#577590',
    'surprise': '#F18F01',
    'neutral': '#B0BEC5'
}

# Prepare data
df['Source'] = df['Folder'].apply(lambda x: 'Instagram' if x == 'Instagram' else 'News')

emotion_counts = df.groupby(['Person', 'Source', 'Dominant Emotion']).size().reset_index(name='Count')
total_counts = emotion_counts.groupby(['Person', 'Source'])['Count'].transform('sum')
emotion_counts['Percentage'] = (emotion_counts['Count'] / total_counts) * 100

# Create dropdown widget
person_selector = widgets.Dropdown(
    options=sorted(emotion_counts['Person'].unique()),
    description='Select Person:',
    layout=widgets.Layout(width='400px')
)

# Function to update plot
def plot_emotion_comparison(person):
    # Filter and fill missing combinations
    person_df = emotion_counts[emotion_counts['Person'] == person].copy()
    all_emotions = list(emotion_colors.keys())
    person_df = person_df.set_index(['Source', 'Dominant Emotion']).unstack(fill_value=0).stack().reset_index()
    person_df = person_df[person_df['Dominant Emotion'].isin(all_emotions)]

    # Create grouped bar chart
    fig = px.bar(
        person_df,
        x='Dominant Emotion',
        y='Percentage',
        color='Source',
        barmode='group',
        category_orders={'Dominant Emotion': all_emotions},
        color_discrete_map={'Instagram': '#1f77b4', 'News': '#ff7f0e'},
        title=f"Emotion Comparison: Instagram vs News for {person}",
        labels={'Dominant Emotion': 'Emotion', 'Percentage': '% of Dominant Emotion'}
    )

    fig.update_traces(marker_line_width=1.5, marker_line_color='white')
    fig.update_layout(height=500, width=900)
    fig.show()  # Only show, don't return

# Build interface without returning the fig
ui = widgets.VBox([person_selector])
out = widgets.interactive_output(plot_emotion_comparison, {'person': person_selector})

display(ui, out)


VBox(children=(Dropdown(description='Select Person:', layout=Layout(width='400px'), options=('Caroline van der…

Output()

In [5]:
political_df = pd.read_csv("Political_Orientation_Data.csv")


df["Person"] = df["Person"].replace("Dilan Yesilgoz", "Dilan Yesilgöz")
dataframe_political = df.merge(political_df, on="Person", how="left")

In [6]:
# Ensure only numeric columns are used in the mean calculation
numeric_cols = ["Left/Right", "Progressive/Conservative"]
party_positions = dataframe_political.groupby("Party")[numeric_cols].mean().reset_index()


# Create the interactive scatter plot
fig = px.scatter(
    party_positions,
    x="Left/Right",
    y="Progressive/Conservative",
    text="Party",
    hover_data={"Left/Right": True, "Progressive/Conservative": True, "Party": False},
)

# Update marker style
fig.update_traces(marker=dict(size=10, color='blue'), textposition="top center")

# Update layout
fig.update_layout(
    title="Political Positioning of Parties",
    xaxis=dict(
        showticklabels=False,  # Hide numbers on x-axis
        zeroline=True,
        zerolinewidth=2,
        zerolinecolor="black",
        gridcolor="lightgray",
        tickmode="linear",
        tick0=-100,
        dtick=10,
    ),
    yaxis=dict(
        showticklabels=False,  # Hide numbers on y-axis
        zeroline=True,
        zerolinewidth=2,
        zerolinecolor="black",
        gridcolor="lightgray",
        tickmode="linear",
        tick0=-100,
        dtick=10,
    ),
    hovermode="closest",
    width=700,  # Square figure
    height=700,  # Square figure
)

# Add text labels for the axis
fig.add_annotation(x=-105, y=0, text="Left", showarrow=False, font=dict(size=14, color="black"))
fig.add_annotation(x=105, y=0, text="Right", showarrow=False, font=dict(size=14, color="black"))
fig.add_annotation(x=0, y=105, text="Progressive", showarrow=False, font=dict(size=14, color="black"))
fig.add_annotation(x=0, y=-105, text="Conservative", showarrow=False, font=dict(size=14, color="black"))

# Show interactive plot
fig.show()

In [9]:
# Count the number of photos per party
photo_counts = dataframe_political["Party"].value_counts().reset_index()
photo_counts.columns = ["Party", "Photo_Count"]

# Aggregate emotions per party and merge with photo counts
party_emotions = dataframe_political.groupby("Party")[emotion_columns].mean().reset_index()
party_emotions = party_emotions.merge(photo_counts, on="Party")

# Define political groups
left_parties = set(dataframe_political[dataframe_political["Left/Right"] < 0]["Party"].unique())
right_parties = set(dataframe_political[dataframe_political["Left/Right"] > 0]["Party"].unique())
progressive_parties = set(dataframe_political[dataframe_political["Progressive/Conservative"] > 0]["Party"].unique())
conservative_parties = set(dataframe_political[dataframe_political["Progressive/Conservative"] < 0]["Party"].unique())

# Create Dash App
app = dash.Dash(__name__)

# Layout of Dashboard with Styling
app.layout = html.Div([
    
    html.H1("Political Emotion Analysis Dashboard", style={
        "textAlign": "center", 
        "backgroundColor": "#f5f5f5", 
        "padding": "10px",
        "color": "#333",
        "borderRadius": "10px"
    }),

    html.Div(style={"backgroundColor": "#f9f9f9", "padding": "20px", "borderRadius": "10px"}, children=[

        # Dropdown Selector for Political Spectrum
        html.Label("Select Political Spectrum:", style={"fontWeight": "bold", "color": "#333"}),
        dcc.Dropdown(
            id="spectrum-selector",
            options=[
                {"label": "Left", "value": "Left"},
                {"label": "Right", "value": "Right"},
                {"label": "Progressive", "value": "Progressive"},
                {"label": "Conservative", "value": "Conservative"}
            ],
            multi=True,
            value=[],
            clearable=True,
            style={"width": "50%", "marginBottom": "10px"}
        ),

        # Party Selector
        html.Label("Select Parties:", style={"fontWeight": "bold", "color": "#333"}),
        dcc.Dropdown(
            id="party-selector",
            options=[{"label": party, "value": party} for party in party_emotions["Party"]],
            multi=True,
            value=[],
            clearable=True,
            style={"width": "50%", "marginBottom": "10px"}
        ),

        # Toggle button for Weighted vs. Unweighted
        html.Label("Choose Averaging Method:", style={"fontWeight": "bold", "color": "#333"}),
        dcc.RadioItems(
            id="weight-toggle",
            options=[
                {"label": "Unweighted (Equal Contribution)", "value": "unweighted"},
                {"label": "Weighted by Number of Photos", "value": "weighted"}
            ],
            value="unweighted",
            inline=True,
            style={"marginBottom": "20px", "color": "#333"}
        ),

        # Radar Chart (Spider Plot)
        dcc.Graph(id="radar-chart", style={"backgroundColor": "#ffffff", "borderRadius": "10px"})
    ])
], style={"backgroundColor": "#f5f5f5", "minHeight": "100vh", "padding": "30px"})

# Callback for updating available parties based on spectrum selection (AND logic)
@app.callback(
    Output("party-selector", "options"),
    Input("spectrum-selector", "value")
)
def update_party_options(selected_spectrum):
    # Start with all parties
    filtered_parties = set(party_emotions["Party"])

    # Apply AND logic: Keep only parties that match **all** selected categories
    if "Left" in selected_spectrum:
        filtered_parties &= left_parties
    if "Right" in selected_spectrum:
        filtered_parties &= right_parties
    if "Progressive" in selected_spectrum:
        filtered_parties &= progressive_parties
    if "Conservative" in selected_spectrum:
        filtered_parties &= conservative_parties

    return [{"label": party, "value": party} for party in sorted(filtered_parties)]

# Callback for Radar Chart Updates
@app.callback(
    Output("radar-chart", "figure"),
    [Input("party-selector", "value"),
     Input("spectrum-selector", "value"),
     Input("weight-toggle", "value")]
)
def update_radar_chart(selected_parties, selected_spectrum, weight_option):
    # Determine which parties to include
    filtered_parties = set(selected_parties)

    # If no specific parties selected, use the parties that match ALL selected spectrum categories
    if not filtered_parties:
        filtered_parties = set(party_emotions["Party"])  # Start with all parties

        # Apply AND logic
        if "Left" in selected_spectrum:
            filtered_parties &= left_parties
        if "Right" in selected_spectrum:
            filtered_parties &= right_parties
        if "Progressive" in selected_spectrum:
            filtered_parties &= progressive_parties
        if "Conservative" in selected_spectrum:
            filtered_parties &= conservative_parties

    # Filter dataframe
    filtered_df = party_emotions[party_emotions["Party"].isin(filtered_parties)]

    # If no valid parties remain, return an empty plot
    if filtered_df.empty:
        fig_radar = go.Figure()
        fig_radar.update_layout(
            title="No Parties Match Selection",
            polar=dict(radialaxis=dict(visible=True, range=[0, 1])),
            showlegend=False
        )
        return fig_radar

    # Compute the average emotions
    if weight_option == "unweighted":
        avg_emotions = filtered_df[emotion_columns].mean()  # Equal weight per party
    else:  # Weighted by number of photos
        avg_emotions = (filtered_df[emotion_columns].T * filtered_df["Photo_Count"]).T.sum() / filtered_df["Photo_Count"].sum()

    # Scale emotions so that the highest one is set to 100%
    max_emotion = avg_emotions.max()
    if max_emotion > 0:
        avg_emotions = avg_emotions / max_emotion

    # Create Radar Chart
    fig_radar = go.Figure()
    fig_radar.add_trace(go.Scatterpolar(
        r=avg_emotions.values,
        theta=emotion_columns,
        fill='toself',
        name="Selected Group"
    ))

    fig_radar.update_layout(
        title=f"Emotion Distribution ({'Weighted' if weight_option == 'weighted' else 'Unweighted'})",
        polar=dict(radialaxis=dict(visible=True, range=[0, 1])),
        showlegend=True,
        paper_bgcolor="#ffffff",
        plot_bgcolor="#ffffff"
    )

    return fig_radar

# Run the Dashboard
if __name__ == '__main__':
    app.run(debug=True)

NameError: name 'emotion_columns' is not defined

In [None]:
import pandas as pd
import plotly.express as px
import ipywidgets as widgets
from IPython.display import display

# --- Load and prepare the data ---
emotion_df = pd.read_csv("deepface_emotions_combined.csv")
orientation_df = pd.read_csv("Political_Orientation_Data.csv")
df = pd.merge(emotion_df, orientation_df, on='Person', how='left')

df['Source'] = df['Folder'].apply(lambda x: 'Instagram' if x == 'Instagram' else 'News')
df = df[df['Source'] == 'Instagram'].copy()
df['Date'] = pd.to_datetime(df['Date'])
df['Orientation'] = df['Left/Right'].apply(lambda x: 'Left' if x < 0 else 'Right')
df['Ideology'] = df['Progressive/Conservative'].apply(lambda x: 'Progressive' if x > 0 else 'Conservative')

# --- Widgets ---
bin_freq_options = {
    "Monthly (4 bins max)": "1M",
    "Biweekly": "14D",
    "Weekly (default)": "7D",
    "Every 3 Days": "3D",
    "Daily": "1D"
}
bin_selector = widgets.Dropdown(
    options=list(bin_freq_options.keys()),
    value="Weekly (default)",
    description='Bin Size:',
    layout=widgets.Layout(width='300px')
)

group_selector = widgets.ToggleButtons(
    options=['All', 'Left', 'Right', 'Progressive', 'Conservative'],
    description='Group:',
    layout=widgets.Layout(width='100%')
)

person_mode = widgets.ToggleButtons(
    options=['Combined', 'Individual'],
    value='Combined',
    description='All Group Mode:',
    layout=widgets.Layout(width='400px')
)

person_selector = widgets.Dropdown(
    options=sorted(df['Person'].unique()),
    description='Person:',
    layout=widgets.Layout(width='400px')
)

# Hide/show widgets dynamically
def update_visibility(*args):
    if group_selector.value == 'All':
        person_mode.layout.display = 'block'
        person_selector.layout.display = 'block' if person_mode.value == 'Individual' else 'none'
    else:
        person_mode.layout.display = 'none'
        person_selector.layout.display = 'none'

group_selector.observe(update_visibility, names='value')
person_mode.observe(update_visibility, names='value')
update_visibility()

# --- Histogram update function ---
def update_histogram(group, mode, person, bin_label):
    freq = bin_freq_options[bin_label]

    if group == 'All':
        if mode == 'Combined':
            group_df = df.copy()
            title = "All Politicians Combined"
        else:
            group_df = df[df['Person'] == person].copy()
            title = f"{person}"
    elif group in ['Left', 'Right']:
        group_df = df[df['Orientation'] == group].copy()
        title = f"{group} Politicians Combined"
    else:
        group_df = df[df['Ideology'] == group].copy()
        title = f"{group} Politicians Combined"

    if group_df.empty:
        print("No data available.")
        return

    grouped = (
        group_df
        .set_index('Date')
        .groupby([pd.Grouper(freq=freq), 'Dominant Emotion'])
        .size()
        .reset_index(name='Count')
    )

    # Define emotion color map (consistent)
    emotion_colors = {
        'angry': '#e74c3c',
        'disgust': '#27ae60',
        'fear': '#8e44ad',
        'happy': '#f1c40f',
        'sad': '#3498db',
        'surprise': '#f39c12',
        'neutral': '#95a5a6'
    }

    fig = px.bar(
        grouped,
        x='Date',
        y='Count',
        color='Dominant Emotion',
        title=f"Instagram Emotion Histogram: {title} ({bin_label})",
        labels={'Date': 'Date', 'Count': 'Number of Photos'},
        color_discrete_map=emotion_colors,
        height=500
    )

    fig.update_layout(bargap=0.05)
    fig.show()

# --- Combine everything ---
ui = widgets.VBox([
    group_selector,
    person_mode,
    person_selector,
    bin_selector
])

out = widgets.interactive_output(
    update_histogram,
    {
        'group': group_selector,
        'mode': person_mode,
        'person': person_selector,
        'bin_label': bin_selector
    }
)

display(ui, out)


VBox(children=(ToggleButtons(description='Group:', layout=Layout(width='100%'), options=('All', 'Left', 'Right…

Output()

In [None]:
import pandas as pd
import plotly.express as px
import ipywidgets as widgets
from IPython.display import display

# --- Load and prepare the data ---
emotion_df = pd.read_csv("deepface_emotions_combined.csv")
orientation_df = pd.read_csv("Political_Orientation_Data.csv")
df = pd.merge(emotion_df, orientation_df, on='Person', how='left')

df['Source'] = df['Folder'].apply(lambda x: 'Instagram' if x == 'Instagram' else 'News')
df = df[df['Source'] == 'Instagram'].copy()
df['Date'] = pd.to_datetime(df['Date'])
df['Orientation'] = df['Left/Right'].apply(lambda x: 'Left' if x < 0 else 'Right')
df['Ideology'] = df['Progressive/Conservative'].apply(lambda x: 'Progressive' if x > 0 else 'Conservative')

# --- Fixed color mapping for consistent emotions ---
emotion_colors = {
    'angry': '#e74c3c',
    'disgust': '#27ae60',
    'fear': '#8e44ad',
    'happy': '#f1c40f',
    'sad': '#3498db',
    'surprise': '#f39c12',
    'neutral': '#95a5a6'
}

# --- Widgets ---
bin_freq_options = {
    "Monthly (4 bins max)": "1M",
    "Biweekly": "14D",
    "Weekly (default)": "7D",
    "Every 3 Days": "3D",
    "Daily": "1D"
}
bin_selector = widgets.Dropdown(
    options=list(bin_freq_options.keys()),
    value="Weekly (default)",
    description='Bin Size:',
    layout=widgets.Layout(width='300px')
)

group_selector = widgets.ToggleButtons(
    options=['All', 'Left', 'Right', 'Progressive', 'Conservative'],
    description='Group:',
    layout=widgets.Layout(width='100%')
)

person_mode = widgets.ToggleButtons(
    options=['Combined', 'Individual'],
    value='Combined',
    description='All Group Mode:',
    layout=widgets.Layout(width='400px')
)

person_selector = widgets.Dropdown(
    options=sorted(df['Person'].unique()),
    description='Person:',
    layout=widgets.Layout(width='400px')
)

# --- Dynamic visibility logic ---
def update_visibility(*args):
    if group_selector.value == 'All':
        person_mode.layout.display = 'block'
        person_selector.layout.display = 'block' if person_mode.value == 'Individual' else 'none'
    else:
        person_mode.layout.display = 'none'
        person_selector.layout.display = 'none'

group_selector.observe(update_visibility, names='value')
person_mode.observe(update_visibility, names='value')
update_visibility()

# --- Streamgraph plotting function ---
def update_streamgraph(group, mode, person, bin_label):
    freq = bin_freq_options[bin_label]

    if group == 'All':
        if mode == 'Combined':
            group_df = df.copy()
            title = "All Politicians Combined"
        else:
            group_df = df[df['Person'] == person].copy()
            title = f"{person}"
    elif group in ['Left', 'Right']:
        group_df = df[df['Orientation'] == group].copy()
        title = f"{group} Politicians Combined"
    else:
        group_df = df[df['Ideology'] == group].copy()
        title = f"{group} Politicians Combined"

    if group_df.empty:
        print("No data available.")
        return

    grouped = (
        group_df
        .set_index('Date')
        .groupby([pd.Grouper(freq=freq), 'Dominant Emotion'])
        .size()
        .reset_index(name='Count')
    )

    fig = px.area(
        grouped,
        x='Date',
        y='Count',
        color='Dominant Emotion',
        color_discrete_map=emotion_colors,
        line_group='Dominant Emotion',
        title=f"Instagram Emotion Streamgraph: {title} ({bin_label})",
        labels={'Date': 'Date', 'Count': 'Number of Photos'},
        height=500
    )

    fig.update_layout(
        xaxis=dict(dtick="M1", tickformat="%b\n%Y"),
        yaxis_title="Photo Count",
        legend_title="Emotion",
        hovermode="x unified"
    )

    fig.show()

# --- Interface layout ---
ui = widgets.VBox([
    group_selector,
    person_mode,
    person_selector,
    bin_selector
])

out = widgets.interactive_output(
    update_streamgraph,
    {
        'group': group_selector,
        'mode': person_mode,
        'person': person_selector,
        'bin_label': bin_selector
    }
)

display(ui, out)


VBox(children=(ToggleButtons(description='Group:', layout=Layout(width='100%'), options=('All', 'Left', 'Right…

Output()

In [None]:

df

Unnamed: 0,Image,Folder,Person,Dominant Emotion,Angry,Disgust,Fear,Happy,Sad,Surprise,...,Right Eye X,Right Eye Y,Face Confidence,Date,Party,Progressive/Conservative,Left/Right,Source,Orientation,Ideology
0,2488763_4334.jpg,NOS_t1,Caroline van der Plas,happy,0.021674,2.238934e-03,1.399683e-01,88.268042,2.571152,2.658652e-02,...,66,78,0.99,NaT,BBB,-42.0,9.0,News,Right,Conservative
1,2488814_4291.jpg,NOS_t1,Caroline van der Plas,happy,0.886472,1.115981e-01,2.079511e-02,75.512420,14.332163,4.981646e-05,...,10,14,1.00,NaT,BBB,-42.0,9.0,News,Right,Conservative
2,2488823_4282.jpg,NOS_t1,Caroline van der Plas,happy,0.886472,1.115981e-01,2.079511e-02,75.512420,14.332163,4.981646e-05,...,10,14,1.00,NaT,BBB,-42.0,9.0,News,Right,Conservative
3,2489252_4104.jpg,NOS_t1,Caroline van der Plas,neutral,0.001276,8.970216e-20,5.988629e-09,0.000821,0.000128,5.863522e-12,...,37,34,1.00,NaT,BBB,-42.0,9.0,News,Right,Conservative
4,2489745_3935.jpg,NOS_t1,Caroline van der Plas,fear,0.141854,5.449093e-02,7.251012e+01,0.775452,26.112507,8.078648e-03,...,16,22,1.00,NaT,BBB,-42.0,9.0,News,Right,Conservative
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1311,241023_wybren.png,Instagram,Wybren van Haga,angry,63.909817,4.707043e-10,1.618799e-03,34.575200,0.024561,1.417450e-03,...,60,104,1.00,2023-10-24,BVNL,-89.0,100.0,Instagram,Right,Conservative
1312,241223_wybren.png,Instagram,Wybren van Haga,happy,0.152928,3.349813e-05,2.974866e-03,79.397583,0.146784,7.851073e-03,...,19,44,0.99,2023-12-24,BVNL,-89.0,100.0,Instagram,Right,Conservative
1313,250923_wybren.png,Instagram,Wybren van Haga,happy,0.000430,1.548037e-07,1.392585e-03,54.519176,0.023393,1.038006e-02,...,21,80,1.00,2023-09-25,BVNL,-89.0,100.0,Instagram,Right,Conservative
1314,270923_wybren.png,Instagram,Wybren van Haga,happy,0.000041,3.659996e-08,2.015140e-05,70.926648,0.043823,1.708612e-05,...,28,85,1.00,2023-09-27,BVNL,-89.0,100.0,Instagram,Right,Conservative


In [None]:
import pandas as pd
import plotly.graph_objects as go
import ipywidgets as widgets
from IPython.display import display

# Load data
emotion_df = pd.read_csv("deepface_emotions_combined.csv")
orientation_df = pd.read_csv("Political_Orientation_Data.csv")
df = pd.merge(emotion_df, orientation_df, on='Person', how='left')

# Prepare columns
df['Source'] = df['Folder'].apply(lambda x: 'Instagram' if x == 'Instagram' else 'News')
df['Date'] = pd.to_datetime(df['Date'])
df['Orientation'] = df['Left/Right'].apply(lambda x: 'Left' if x < 0 else 'Right')
df['Ideology'] = df['Progressive/Conservative'].apply(lambda x: 'Progressive' if x > 0 else 'Conservative')

# Widgets
group_selector = widgets.ToggleButtons(
    options=['Orientation', 'Ideology'],
    value='Orientation',
    description='Group by:',
    layout=widgets.Layout(width='50%')
)

source_selector = widgets.ToggleButtons(
    options=['All', 'Instagram', 'News'],
    value='All',
    description='Data Source:',
    layout=widgets.Layout(width='50%')
)

def generate_multilevel_sankey(group_by, source_filter):
    # Filter source
    if source_filter != 'All':
        sankey_df = df[df['Source'] == source_filter]
    else:
        sankey_df = df.copy()

    # Aggregate flows: Emotion → Group → Person
    grouped = sankey_df.groupby(['Dominant Emotion', group_by, 'Person']).size().reset_index(name='Count')

    # Build label list
    emotions = grouped['Dominant Emotion'].unique().tolist()
    groups = grouped[group_by].unique().tolist()
    persons = grouped['Person'].unique().tolist()
    all_labels = emotions + groups + persons

    label_to_index = {label: idx for idx, label in enumerate(all_labels)}

    # Define emotion color palette
    emotion_colors = {
        'angry': '#D33F49',
        'disgust': '#7DAF75',
        'fear': '#735D78',
        'happy': '#F4D35E',
        'sad': '#577590',
        'surprise': '#F18F01',
        'neutral': '#B0BEC5'
    }

    node_colors = [emotion_colors.get(label, "#E0E0E0") for label in all_labels]
    links_source = []
    links_target = []
    links_value = []
    links_color = []

    # First level: Emotion → Group
    emotion_group = grouped.groupby(['Dominant Emotion', group_by])['Count'].sum().reset_index()
    for _, row in emotion_group.iterrows():
        source = label_to_index[row['Dominant Emotion']]
        target = label_to_index[row[group_by]]
        value = row['Count']
        links_source.append(source)
        links_target.append(target)
        links_value.append(value)
        links_color.append(emotion_colors.get(row['Dominant Emotion'], "#cccccc"))

    # Second level: Group → Person
    for _, row in grouped.iterrows():
        source = label_to_index[row[group_by]]
        target = label_to_index[row['Person']]
        value = row['Count']
        links_source.append(source)
        links_target.append(target)
        links_value.append(value)
        links_color.append(emotion_colors.get(row['Dominant Emotion'], "#cccccc"))

    # Build the Sankey
    fig = go.Figure(data=[go.Sankey(
        arrangement="snap",
        node=dict(
            pad=20,
            thickness=25,
            line=dict(color="white", width=1.5),
            label=all_labels,
            color=node_colors
        ),
        link=dict(
            source=links_source,
            target=links_target,
            value=links_value,
            color=links_color,
            hovertemplate='From %{source.label} to %{target.label}: %{value} images<extra></extra>'
        )
    )])

    fig.update_layout(
        title_text=f"Emotion → {group_by} → Politician (Source: {source_filter})",
        font_size=11,
        plot_bgcolor='white',
        paper_bgcolor='white',
        margin=dict(l=0, r=0, t=60, b=0)
    )

    return fig


# Hook up interactivity
widgets.interact(generate_multilevel_sankey, group_by=group_selector, source_filter=source_selector)

interactive(children=(ToggleButtons(description='Group by:', layout=Layout(width='50%'), options=('Orientation…

<function __main__.generate_multilevel_sankey(group_by, source_filter)>

In [8]:
import pandas as pd
import plotly.graph_objects as go
import ipywidgets as widgets
from IPython.display import display

# Load data
emotion_df = pd.read_csv("deepface_emotions_combined.csv")
orientation_df = pd.read_csv("Political_Orientation_Data.csv")
df = pd.merge(emotion_df, orientation_df, on='Person', how='left')

# Filter for Instagram only
df['Source'] = df['Folder'].apply(lambda x: 'Instagram' if x == 'Instagram' else 'News')
df = df[df['Source'] == 'Instagram'].copy()

# Parse dates and assign election-based periods
df['Date'] = pd.to_datetime(df['Date'])

bins = pd.to_datetime(['2000-01-01', '2023-11-01', '2023-12-01', df['Date'].max()])
labels = ['Before Election', 'Election Month', 'After Election']
df['Time Period'] = pd.cut(df['Date'], bins=bins, labels=labels)


# Political grouping columns
df['Orientation'] = df['Left/Right'].apply(lambda x: 'Left' if x < 0 else 'Right')
df['Ideology'] = df['Progressive/Conservative'].apply(lambda x: 'Progressive' if x > 0 else 'Conservative')

# Emotion color palette
emotion_colors = {
    'angry': '#D33F49',
    'disgust': '#7DAF75',
    'fear': '#735D78',
    'happy': '#F4D35E',
    'sad': '#577590',
    'surprise': '#F18F01',
    'neutral': '#B0BEC5'
}

# Widget to choose grouping
group_selector = widgets.ToggleButtons(
    options=['Orientation', 'Ideology'],
    value='Orientation',
    description='Group by:',
    layout=widgets.Layout(width='50%')
)

def generate_4level_sankey(group_by):
    # Group data: Time Period → Emotion → Group → Person
    grouped = df.groupby(['Time Period', 'Dominant Emotion', group_by, 'Person']).size().reset_index(name='Count')

    # Build all unique labels
    periods = grouped['Time Period'].dropna().unique().tolist()
    emotions = grouped['Dominant Emotion'].unique().tolist()
    groups = grouped[group_by].unique().tolist()
    persons = grouped['Person'].unique().tolist()
    all_labels = periods + emotions + groups + persons

    label_to_index = {label: idx for idx, label in enumerate(all_labels)}

    # Prepare link lists
    sources, targets, values, link_colors = [], [], [], []

    # Time → Emotion
    step1 = grouped.groupby(['Time Period', 'Dominant Emotion'])['Count'].sum().reset_index()
    for _, row in step1.iterrows():
        sources.append(label_to_index[row['Time Period']])
        targets.append(label_to_index[row['Dominant Emotion']])
        values.append(row['Count'])
        link_colors.append(emotion_colors.get(row['Dominant Emotion'], "#cccccc"))

    # Emotion → Group
    step2 = grouped.groupby(['Dominant Emotion', group_by])['Count'].sum().reset_index()
    for _, row in step2.iterrows():
        sources.append(label_to_index[row['Dominant Emotion']])
        targets.append(label_to_index[row[group_by]])
        values.append(row['Count'])
        link_colors.append(emotion_colors.get(row['Dominant Emotion'], "#cccccc"))

    # Group → Person
    for _, row in grouped.iterrows():
        sources.append(label_to_index[row[group_by]])
        targets.append(label_to_index[row['Person']])
        values.append(row['Count'])
        link_colors.append(emotion_colors.get(row['Dominant Emotion'], "#cccccc"))

    # Node colors
    node_colors = [emotion_colors.get(label, "#E0E0E0") for label in all_labels]

    # Build Sankey
    fig = go.Figure(data=[go.Sankey(
        arrangement="snap",
        node=dict(
            pad=20,
            thickness=25,
            line=dict(color="white", width=1.5),
            label=all_labels,
            color=node_colors
        ),
        link=dict(
            source=sources,
            target=targets,
            value=values,
            color=link_colors,
            hovertemplate='From %{source.label} to %{target.label}: %{value} images<extra></extra>'
        )
    )])

    fig.update_layout(
        title_text=f"Instagram Emotion Flow: Time → Emotion → {group_by} → Person",
        font_size=11,
        plot_bgcolor='white',
        paper_bgcolor='white',
        height=600
    )

    fig.show()

# Link widget to plotting
widgets.interact(generate_4level_sankey, group_by=group_selector)


interactive(children=(ToggleButtons(description='Group by:', layout=Layout(width='50%'), options=('Orientation…

<function __main__.generate_4level_sankey(group_by)>