<h2>Library</h2>

In [11]:
import pandas as pd
import numpy as np
import warnings

import plotly.graph_objects as go
import plotly.io as pio


warnings.filterwarnings('ignore')

<h2>Function</h2>

In [33]:
def bu_audiencetype_vehicle_sankey_plot(df, value_column, by_Dell, title):
    
    if by_Dell:
        df = df[(df['Segment ID'] != 0) & (df['Segment ID'] != '0')]
        
    if value_column == 'Segment ID':
        df_lvl1 = df.groupby(['BU', 'Audience Type'])[value_column].nunique().reset_index()
        df_lvl2 = df.groupby(['Audience Type', 'Display Dell Vehicle Mapped'])[value_column].nunique().reset_index()
    else:
        df_lvl1 = df.groupby(['BU', 'Audience Type'])[value_column].sum().reset_index()
        df_lvl2 = df.groupby(['Audience Type', 'Display Dell Vehicle Mapped'])[value_column].sum().reset_index()

    df_lvl1.rename(columns={'BU':'Source', 'Audience Type':'Target', value_column:'Value'}, inplace=True)
    df_lvl2.rename(columns={'Audience Type':'Source', 'Display Dell Vehicle Mapped':'Target', value_column:'Value'}, inplace=True)

    # Concatenate the dataframes into one
    concat_df = pd.concat([df_lvl1[['Source', 'Target', 'Value']],
                           df_lvl2[['Source','Target','Value']]],
                          ignore_index=True)

    unique_values = pd.unique(concat_df[['Source', 'Target']].values.ravel('K'))

    mapping_df = pd.DataFrame({'Label': unique_values.tolist(),
                               'Value': [i for i in range(len(unique_values))]})

    # Replace values based on the mappings
    concat_df['Source'] = concat_df['Source'].replace(mapping_df.set_index('Label')['Value'])
    concat_df['Target'] = concat_df['Target'].replace(mapping_df.set_index('Label')['Value'])

    # Your existing data
    source = concat_df['Source'].values.tolist()
    target = concat_df['Target'].values.tolist()
    value = concat_df['Value'].values.tolist()
    labels = mapping_df['Label'].values.tolist()

    # List of colors for each link based on some condition or data
    colors = []
    for tgts in source:
        if tgts == 0:
            colors.append("#FBEBFF")
        elif tgts == 1:
            colors.append('#FFEED2')
        elif tgts == 2:
            colors.append("#94DCF7")
        elif tgts == 3:
            colors.append("#F0F0F0")
        else:
            colors.append("#000000")  # Default color for other cases

    # Create links with specified colors
    link = dict(source=source, target=target, value=value, color=colors)

    # Create nodes
    node = dict(label=labels, pad=30, thickness=20)

    # Create a Sankey object
    chart = go.Sankey(link=link, node=node, arrangement="snap")

    # Build a figure
    fig = go.Figure(chart)

    # Add a title to the figure
    fig.update_layout(title_text=title)

    # Show the figure
    fig.show()
    
    html_file_path = 'html/' + title + ".html"
    pio.write_html(fig, file=html_file_path)


In [13]:
def audiencetype_vehicles_sankey_plot(df, value_column, by_Dell, title):
    
    if by_Dell:
        df = df[(df['Segment ID'] != 0) & (df['Segment ID'] != '0')]
    
    df_lvl1 = df.groupby(['Audience Type', 'Display Dell Vehicle Mapped'])[value_column].sum().reset_index()

    df_lvl1.rename(columns={'Audience Type':'Source', 'Display Dell Vehicle Mapped':'Target', value_column:'Value'}, inplace=True)

    unique_values = pd.unique(df_lvl1[['Source', 'Target']].values.ravel('K'))

    mapping_df = pd.DataFrame({'Label': unique_values.tolist()
                                              , 'Value': [i for i in range(len(unique_values))]})

    # Replace values based on the mappings
    df_lvl1['Source'] = df_lvl1['Source'].replace(mapping_df.set_index('Label')['Value'])
    df_lvl1['Target'] = df_lvl1['Target'].replace(mapping_df.set_index('Label')['Value'])


    # Sample data
    source = df_lvl1['Source'].values.tolist()
    target = df_lvl1['Target'].values.tolist()
    value = df_lvl1['Value'].values.tolist()
    labels = mapping_df['Label'].values.tolist()

    # list of colors for each link based on some condition or data
    colors = []
    for tgts in source:
        if tgts == 0:
            colors.append("#94DCF7")
        else:
            colors.append("#F0F0F0")  # Default color for other cases

    # Create links
    link = dict(source=source, target=target, value=value, color=colors)

    # Create nodes
    node = dict(label=labels, pad=30, thickness=20)

    # Create a Sankey object
    chart = go.Sankey(link=link, node=node, arrangement="snap")

    # Build a figure
    fig = go.Figure(chart)
    
    # Add a title to the figure
    fig.update_layout(title_text=title)

    fig.show()
    
    html_file_path = 'html/' + title + ".html"
    pio.write_html(fig, file=html_file_path)

In [14]:
def activation_sankey_plot(df, value_column, by_Dell, title):
    
    if by_Dell:
        df = df[(df['Segment ID'] != 0) & (df['Segment ID'] != '0')]
    
    # Fix issue with 'Audience Type Name': 'CRM-1PD\xa0CRM'
    df['Audience Type Name'] = df['Audience Type Name'].str.replace('CRM-1PD\xa0CRM', 'CRM-1PD CRM')

    # Group dimensions
    df_lvl1 = df.groupby(['Audience Type','Audience Type Name'])[value_column].sum().reset_index()
    df_lvl2 = df.groupby(['Audience Type','Audience Type Name','Audience Source'])[value_column].sum().reset_index()
    df_lvl3 = df.groupby(['Audience Type','Audience Type Name','Audience Source','Display Dell Vehicle Mapped'])[value_column].sum().reset_index()

    # Rename columns to Source, Target & Value
    df_lvl1.rename(columns={'Audience Type':'Source', 'Audience Type Name':'Target', value_column:'Value'}, inplace=True)
    df_lvl2.rename(columns={'Audience Type Name':'Source', 'Audience Source':'Target', value_column:'Value'}, inplace=True)
    df_lvl3.rename(columns={'Audience Source':'Source', 'Display Dell Vehicle Mapped':'Target', value_column:'Value'}, inplace=True)

    # Contact the dataframes in just one
    concat_df = pd.concat([
                           df_lvl1[['Source','Target','Value']]
                           , df_lvl2[['Source','Target','Value']]
                           , df_lvl3[['Source','Target','Value']]
                        ], ignore_index=True)

    unique_values = pd.unique(concat_df[['Source', 'Target']].values.ravel('K'))
    
    mapping_df = pd.DataFrame({'Label': unique_values.tolist()
                                          , 'Value': [i for i in range(len(unique_values))]})
    
    # Replace values based on the mappings
    concat_df['Source'] = concat_df['Source'].replace(mapping_df.set_index('Label')['Value'])
    concat_df['Target'] = concat_df['Target'].replace(mapping_df.set_index('Label')['Value'])

    # Sample data
    activation_source = concat_df['Source'].values.tolist()
    activation_target = concat_df['Target'].values.tolist()
    activation_value = concat_df['Value'].values.tolist()
    activation_labels = mapping_df['Label'].values.tolist()

    # list of colors for each link based on some condition or data
    colors = []
    for tgts in activation_source:
        if tgts == 0:
            colors.append("#AAA")
        elif tgts == 1:
            colors.append("#AAA")
        elif tgts == 2:
            colors.append("#BBB")
        elif tgts == 6:
            colors.append("#BBB")
        elif tgts == 7:
            colors.append("#CCC")
        elif tgts == 8:
            colors.append("#CCC")
        else:
            colors.append("#F0F0F0")  # Default color for other cases

    # Create links
    activation_link = dict(source=activation_source, target=activation_target, value=activation_value, color=colors)

    # Create nodes
    node = dict(label=activation_labels, pad=30, thickness=20)

    # Create a Sankey object
    chart = go.Sankey(link=activation_link, node=node, arrangement="snap")

    # Build a figure
    fig = go.Figure(chart)
    
    # Add a title to the figure
    fig.update_layout(title_text=title)

    fig.show()
    
    html_file_path = 'html/' + title + ".html"
    pio.write_html(fig, file=html_file_path)


<h2>Main</h2>

In [15]:
folder_path = r'C:\Users\Rafael_Fagundes\Downloads\compiled_data.csv'

df = pd.read_csv(folder_path, encoding='utf-8')

<h2>DataFrames</h2>

In [16]:
df = df[(df['Fiscal Quarter'] == '2024-Q3')]

csb_df = df[(df['BU'] == 'CSB') 
            & (df['Fiscal Quarter'] == '2024-Q3') 
            & (df['Country_x'] == 'United States')
            ]

b2b_df = df[(df['BU'] == 'B2B') 
            & (df['Fiscal Quarter'] == '2024-Q3') 
            & (df['Country_x'] == 'United States')
            ]

In [17]:
#Spend or Net Rev

<h2>Bu | Type | Vehicle (by Dell)</h2>

In [36]:
bu_audiencetype_vehicle_sankey_plot(df,'Segment ID',True, 'BU and Type and Vehicles - Audiences')
bu_audiencetype_vehicle_sankey_plot(df,'Spend',True, 'BU and Type and Vehicles - Spend')
bu_audiencetype_vehicle_sankey_plot(df,'Net Rev', True, 'BU and Type and Vehicles - Revenue')

<h2>Types | Vehicles</h2>

In [19]:
audiencetype_vehicles_sankey_plot(csb_df, 'Spend', False, 'CSB Type and Vehicles - Spend')
audiencetype_vehicles_sankey_plot(csb_df, 'Net Rev', False,'CSB Type and Vehicles - Revenue')

audiencetype_vehicles_sankey_plot(b2b_df, 'Spend', False, 'B2B Type and Vehicles - Spend')
audiencetype_vehicles_sankey_plot(b2b_df, 'Net Rev', False, 'B2B Type and Vehicles - Revenue')

<h2>Activation Paths (Type | Type Name | Source | Vehicle)</h2>

In [20]:
activation_sankey_plot(csb_df, 'Spend', True, 'CSB Activation Path - Spend')
activation_sankey_plot(csb_df, 'Net Rev', True, 'CSB Activation Path - Revenue')

activation_sankey_plot(b2b_df, 'Spend', True,'B2B Activation Path - Spend')
activation_sankey_plot(b2b_df, 'Net Rev', True,'B2B Activation Path - Revenue')