<h2>Library</h2>

In [37]:
import pandas as pd
import numpy as np
import plotly.graph_objects as go
import warnings

warnings.filterwarnings('ignore')

<h2>Main</h2>

In [38]:
folder_path = r'C:\Users\Rafael_Fagundes\Downloads\compiled_data.csv'

df = pd.read_csv(folder_path, encoding='utf-8')

In [39]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1711983 entries, 0 to 1711982
Data columns (total 48 columns):
 #   Column                                       Dtype  
---  ------                                       -----  
 0   Unnamed: 0                                   int64  
 1   Audience Type                                object 
 2   Tactic                                       object 
 3   Media Buy Name                               object 
 4   Campaign Name                                object 
 5   Fiscal Quarter                               object 
 6   Fiscal Week                                  object 
 7   Country_x                                    object 
 8   Display Funnel Mapped                        object 
 9   Display Dell Vehicle Mapped                  object 
 10  Display Business Unit Mapped                 object 
 11  Display Site Name Mapped                     object 
 12  Audience Type Name                           object 
 13  Impressions 

<h2>Audiences created by Dell (w/ Claravine ID)</h2>

In [40]:
# Dataframe filtered only by Claravine Ids and from FY24Q3
filtered_df = df[(df['Segment ID'] != 0) & (df['Segment ID'] != '0') & (df['Fiscal Quarter'] == '2024-Q3')]

In [41]:
# Checking the number of Unique Audiences in FY24Q3
filtered_df['Segment ID'].nunique()

138

In [42]:
agg_df_lvl1 = filtered_df.groupby(['BU','Audience Type'])['Segment ID'].nunique().reset_index()

agg_df_lvl2 = filtered_df.groupby(['BU', 'Audience Type', 'Display Dell Vehicle Mapped'])['Segment ID'].nunique().reset_index()

In [43]:
# Rename columens to Source, Target & Value
agg_df_lvl1.rename(columns={'BU':'Source', 'Audience Type':'Target', 'Segment ID':'Value'}, inplace=True)
agg_df_lvl2.rename(columns={'Audience Type':'Source', 'Display Dell Vehicle Mapped':'Target', 'Segment ID':'Value'}, inplace=True)

In [44]:
# Contact the dataframes in just one
concat_df = pd.concat([agg_df_lvl1[['Source', 'Target', 'Value']]
                       , agg_df_lvl2[['Source','Target','Value']]
                      ], ignore_index=True)

In [45]:
unique_sources_and_targets = pd.unique(concat_df[['Source', 'Target']].values.ravel('K'))
unique_sources_and_targets

array(['B2B', 'CSB', '1PD', '3PD', 'Display', 'Social', 'Display Dco',
       'Display Mobile', 'Lead Generation', 'Video', 'Audio'],
      dtype=object)

<table>
    <tr>
        <td>B2B</td><td>CSB</td><td>1PD</td><td>3PD</td><td>Display</td><td>Social</td><td>Video</td><td>Audio</td><td>Display Dco</td><td>Display Mobile</td><td>Lead Generation</td><td>CTV</td>
    </tr>
    <tr>
        <td>0</td><td>1</td><td>2</td><td>3</td><td>4</td><td>5</td><td>6</td><td>7</td><td>8</td><td>9</td><td>10</td><td>11</td>
    </tr>
</table>

In [46]:
mapping_df = pd.DataFrame({'Label': ['B2B', 'CSB', '1PD', '3PD', 'Display', 'Social', 'Video', 'Audio', 'Display Dco', 'Display Mobile', 'Lead Generation', 'CTV'],
              'Value': [ 0, 1, 2, 3,4,5,6,7,8,9,10,11]})

In [47]:
# Replace values based on the mappings
concat_df['Source'] = concat_df['Source'].replace(mapping_df.set_index('Label')['Value'])
concat_df['Target'] = concat_df['Target'].replace(mapping_df.set_index('Label')['Value'])

In [48]:
# Your existing data
source = concat_df['Source'].values.tolist()
target = concat_df['Target'].values.tolist()
value = concat_df['Value'].values.tolist()
labels = mapping_df['Label'].values.tolist()

# list of colors for each link based on some condition or data
colors = []
for tgts in source:
    if tgts == 0:
        colors.append("#FBEBFF")
    elif tgts == 1:
        colors.append('#FFEED2')
    elif tgts == 2:
        colors.append("#94DCF7")
    elif tgts == 3:
        colors.append("#F0F0F0")
    else:
        colors.append("#000000")  # Default color for other cases

# Create links with specified colors
link = dict(source=source, target=target, value=value, color=colors)

# Create nodes
node = dict(label=labels, pad=30, thickness=20)

# Create a Sankey object
chart = go.Sankey(link=link, node=node, arrangement="snap")

# Build a figure
fig = go.Figure(chart)

# Show the figure
fig.show()


<h2>CSB Audiences in FY24Q3</h2>

In [49]:
csb_df = df[(df['BU'] == 'B2B') & (df['Fiscal Quarter'] == '2024-Q3') & (df['Country_x'] == 'United States')]

csb_agg_df_lvl1 = csb_df.groupby(['Audience Type', 'Display Dell Vehicle Mapped'])['Spend'].sum().reset_index()
csb_agg_df_lvl1.head()

Unnamed: 0,Audience Type,Display Dell Vehicle Mapped,Spend
0,1PD,Display,526446.2
1,1PD,Social,237120.8
2,3PD,Display,1312468.0
3,3PD,Lead Generation,30624.72
4,3PD,Other,0.0


In [50]:
csb_agg_df_lvl1.rename(columns={'Audience Type':'Source', 'Display Dell Vehicle Mapped':'Target', 'Spend':'Value'}, inplace=True)

In [51]:
csb_unique_values = pd.unique(csb_agg_df_lvl1[['Source', 'Target']].values.ravel('K'))
print(csb_unique_values)
print(len(csb_unique_values))

['1PD' '3PD' 'Display' 'Social' 'Lead Generation' 'Other']
6


In [52]:
csb_mapping_df = pd.DataFrame({'Label': ['1PD','3PD','Direct','Display','Display Dco','Display Mobile','Email', 'Lead Generation','Other','Search','Social','Video','AFF','Affiliates', 'Audio','CTV'],
              'Value': [ 0, 1, 2, 3,4,5,6,7,8,9,10,11,12,13,14,15]})

In [53]:
# Replace values based on the mappings
csb_agg_df_lvl1['Source'] = csb_agg_df_lvl1['Source'].replace(csb_mapping_df.set_index('Label')['Value'])
csb_agg_df_lvl1['Target'] = csb_agg_df_lvl1['Target'].replace(csb_mapping_df.set_index('Label')['Value'])

In [54]:
# Sample data
csb_source = csb_agg_df_lvl1['Source'].values.tolist()
csb_target = csb_agg_df_lvl1['Target'].values.tolist()
csb_value = csb_agg_df_lvl1['Value'].values.tolist()
csb_labels = csb_mapping_df['Label'].values.tolist()

# list of colors for each link based on some condition or data
colors = []
for tgts in csb_source:
    if tgts == 0:
        colors.append("#94DCF7")
    else:
        colors.append("#F0F0F0")  # Default color for other cases

# Create links with specified colors
link = dict(source=source, target=target, value=value, color=colors)

# Create links
csb_link = dict(source=csb_source, target=csb_target, value=csb_value, color=colors)

# Create nodes
node = dict(label=csb_labels, pad=30, thickness=20)

# Create a Sankey object
chart = go.Sankey(link=csb_link, node=node, arrangement="snap", legend='legend3')

# Build a figure
fig = go.Figure(chart)

fig.show()


<h2>CSB Audiences in FY24Q3</h2>

In [55]:
b2b_df = df[(df['BU'] == 'B2B') & (df['Fiscal Quarter'] == '2024-Q3') & (df['Country_x'] == 'United States')]

b2b_agg_df_lvl1 = b2b_df.groupby(['Audience Type', 'Display Dell Vehicle Mapped'])['Spend'].sum().reset_index()

b2b_agg_df_lvl1.rename(columns={'Audience Type':'Source', 'Display Dell Vehicle Mapped':'Target', 'Spend':'Value'}, inplace=True)

In [56]:
b2b_unique_values = pd.unique(b2b_agg_df_lvl1[['Source', 'Target']].values.ravel('K'))
print(b2b_unique_values)
print(len(b2b_unique_values))

['1PD' '3PD' 'Display' 'Social' 'Lead Generation' 'Other']
6


In [57]:
b2b_mapping_df = pd.DataFrame({'Label': ['1PD','3PD','Display','Display Dco','Social','Video','Display Mobile','Lead Generation','Other'],
              'Value': [ 0, 1, 2, 3,4,5,6,7,8]})

In [58]:
# Replace values based on the mappings
b2b_agg_df_lvl1['Source'] = b2b_agg_df_lvl1['Source'].replace(b2b_mapping_df.set_index('Label')['Value'])
b2b_agg_df_lvl1['Target'] = b2b_agg_df_lvl1['Target'].replace(b2b_mapping_df.set_index('Label')['Value'])

In [59]:
# Sample data
b2b_source = b2b_agg_df_lvl1['Source'].values.tolist()
b2b_target = b2b_agg_df_lvl1['Target'].values.tolist()
b2b_value = b2b_agg_df_lvl1['Value'].values.tolist()
b2b_labels = b2b_mapping_df['Label'].values.tolist()

# list of colors for each link based on some condition or data
colors = []
for tgts in b2b_source:
    if tgts == 0:
        colors.append("#94DCF7")
    else:
        colors.append("#F0F0F0")  # Default color for other cases

# Create links
b2b_link = dict(source=b2b_source, target=b2b_target, value=b2b_value, color=colors)

# Create nodes
node = dict(label=b2b_labels, pad=30, thickness=20)

# Create a Sankey object
chart = go.Sankey(link=b2b_link, node=node, arrangement="snap")

# Build a figure
fig = go.Figure(chart)

fig.show()


<h2>Activation Path - CSB</h2>

In [60]:
csb_activation_df = df[(df['BU'] == 'CSB') 
                   & (df['Fiscal Quarter'] == '2024-Q3') 
                   & (df['Country_x'] == 'United States')
                   & (df['Segment ID'] != 0) & (df['Segment ID'] != '0')
                  ]

In [61]:
# Fix issue with 'Audience Type Name': 'CRM-1PD\xa0CRM'
csb_activation_df['Audience Type Name'] = csb_activation_df['Audience Type Name'].str.replace('CRM-1PD\xa0CRM', 'CRM-1PD CRM')

csb_activation_df['Audience Type Name'].value_counts()

Audience Type Name
CRM-1PD CRM                1701
DMP-3PD AUDIENCE            568
CATEGORY-3PD CONTEXTUAL       4
IN MARKET-3PD BEHAVIOR        1
Name: count, dtype: int64

In [62]:
# Group dimensions

csb_activation_df_lvl1 = csb_activation_df.groupby(['Audience Type','Audience Type Name'])['Spend'].sum().reset_index()

csb_activation_df_lvl2 = csb_activation_df.groupby(['Audience Type','Audience Type Name','Audience Source'])['Spend'].sum().reset_index()

csb_activation_df_lvl3 = csb_activation_df.groupby(['Audience Type','Audience Type Name','Audience Source','Display Dell Vehicle Mapped'])['Spend'].sum().reset_index()

In [63]:
# Rename columns to Source, Target & Value
csb_activation_df_lvl1.rename(columns={'Audience Type':'Source', 'Audience Type Name':'Target', 'Spend':'Value'}, inplace=True)

csb_activation_df_lvl2.rename(columns={'Audience Type Name':'Source', 'Audience Source':'Target', 'Spend':'Value'}, inplace=True)

csb_activation_df_lvl3.rename(columns={'Audience Source':'Source', 'Display Dell Vehicle Mapped':'Target', 'Spend':'Value'}, inplace=True)

In [64]:
# Contact the dataframes in just one
csb_concat_activation_df = pd.concat([
                       csb_activation_df_lvl1[['Source','Target','Value']]
                       , csb_activation_df_lvl2[['Source','Target','Value']]
                       , csb_activation_df_lvl3[['Source','Target','Value']]
                    ], ignore_index=True)

In [65]:
csb_activation_unique_values = pd.unique(csb_concat_activation_df[['Source', 'Target']].values.ravel('K'))
print(csb_activation_unique_values)
print(len(csb_activation_unique_values))

['1PD' '3PD' 'CRM-1PD CRM' 'CATEGORY-3PD CONTEXTUAL' 'DMP-3PD AUDIENCE'
 'IN MARKET-3PD BEHAVIOR' 'AIQ CRM' 'Liveramp' 'Neustar' 'The Trade Desk'
 'Lead Generation' 'Display' 'Display Mobile' 'Video' 'Audio']
15


In [66]:
csb_activation_mapping_df = pd.DataFrame({'Label': [
        '1PD', '3PD', 'CRM-1PD CRM', 'CRM-1PD-AIQ-NS', 'CRM-1PD\xa0CRM',
        'ABM-3PD AUDIENCE', 'CATEGORY-3PD CONTEXTUAL', 'DMP-3PD AUDIENCE',
        'IN MARKET-3PD BEHAVIOR', 'AIQ CRM', 'Liveramp', 'Lotame', 'Oracle', 'Neustar',
        'The Trade Desk', 'No ID', 'Display', 'Display Dco', 'Lead Generation',
        'Social', 'Display Mobile', 'Video', 'Audio'
    ], 'Value': [ 0, 1, 2, 3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22]})

In [67]:
# Replace values based on the mappings
csb_concat_activation_df['Source'] = csb_concat_activation_df['Source'].replace(csb_activation_mapping_df.set_index('Label')['Value'])
csb_concat_activation_df['Target'] = csb_concat_activation_df['Target'].replace(csb_activation_mapping_df.set_index('Label')['Value'])

In [68]:
# Sample data
csb_activation_source = csb_concat_activation_df['Source'].values.tolist()
csb_activation_target = csb_concat_activation_df['Target'].values.tolist()
csb_activation_value = csb_concat_activation_df['Value'].values.tolist()
csb_activation_labels = csb_activation_mapping_df['Label'].values.tolist()

# list of colors for each link based on some condition or data
colors = []
for tgts in csb_activation_source:
    if tgts == 0:
        colors.append("#AAA")
    elif tgts == 1:
        colors.append("#AAA")
    elif tgts == 2:
        colors.append("#BBB")
    elif tgts == 6:
        colors.append("#BBB")
    elif tgts == 7:
        colors.append("#CCC")
    elif tgts == 8:
        colors.append("#CCC")
    else:
        colors.append("#F0F0F0")  # Default color for other cases

# Create links
activation_link = dict(source=csb_activation_source, target=csb_activation_target, value=csb_activation_value, color=colors)

# Create nodes
node = dict(label=csb_activation_labels, pad=30, thickness=20)

# Create a Sankey object
chart = go.Sankey(link=activation_link, node=node, arrangement="snap")

# Build a figure
fig = go.Figure(chart)

fig.show()


In [69]:
b2b_activation_df = df[(df['BU'] == 'B2B') 
                   & (df['Fiscal Quarter'] == '2024-Q3') 
                   & (df['Country_x'] == 'United States')
                   & (df['Segment ID'] != 0) & (df['Segment ID'] != '0')
                  ]

# Fix issue with 'Audience Type Name': 'CRM-1PD\xa0CRM'
b2b_activation_df['Audience Type Name'] = b2b_activation_df['Audience Type Name'].str.replace('CRM-1PD\xa0CRM', 'CRM-1PD CRM')

b2b_activation_df['Audience Type Name'].value_counts()

# Group dimensions

b2b_activation_df_lvl1 = b2b_activation_df.groupby(['Audience Type','Audience Type Name'])['Spend'].sum().reset_index()

b2b_activation_df_lvl2 = b2b_activation_df.groupby(['Audience Type','Audience Type Name','Audience Source'])['Spend'].sum().reset_index()

b2b_activation_df_lvl3 = b2b_activation_df.groupby(['Audience Type','Audience Type Name','Audience Source','Display Dell Vehicle Mapped'])['Spend'].sum().reset_index()

# Rename columns to Source, Target & Value
b2b_activation_df_lvl1.rename(columns={'Audience Type':'Source', 'Audience Type Name':'Target', 'Spend':'Value'}, inplace=True)

b2b_activation_df_lvl2.rename(columns={'Audience Type Name':'Source', 'Audience Source':'Target', 'Spend':'Value'}, inplace=True)

b2b_activation_df_lvl3.rename(columns={'Audience Source':'Source', 'Display Dell Vehicle Mapped':'Target', 'Spend':'Value'}, inplace=True)

# Contact the dataframes in just one
b2b_concat_activation_df = pd.concat([
                       b2b_activation_df_lvl1[['Source','Target','Value']]
                       , b2b_activation_df_lvl2[['Source','Target','Value']]
                       , b2b_activation_df_lvl3[['Source','Target','Value']]
                    ], ignore_index=True)

In [70]:
b2b_activation_unique_values = pd.unique(b2b_concat_activation_df[['Source', 'Target']].values.ravel('K'))
print(b2b_activation_unique_values)
print(len(b2b_activation_unique_values))

['1PD' '3PD' 'CRM-1PD CRM' 'CRM-1PD-AIQ-NS' 'ABM-3PD AUDIENCE'
 'CATEGORY-3PD CONTEXTUAL' 'DMP-3PD AUDIENCE' 'AIQ CRM' 'Liveramp'
 'Oracle' 'Display' 'Social']
12


In [71]:
b2b_activation_mapping_df = pd.DataFrame({'Label': [
        '1PD', '3PD', 'CRM-1PD CRM', 'CRM-1PD-AIQ-NS', 'ABM-3PD AUDIENCE','CATEGORY-3PD CONTEXTUAL', 'DMP-3PD AUDIENCE', 'AIQ CRM', 'Liveramp','Oracle', 'Display', 'Social'
    ], 'Value': [ 0, 1, 2, 3,4,5,6,7,8,9,10,11]})

In [72]:
# Replace values based on the mappings
b2b_concat_activation_df['Source'] = b2b_concat_activation_df['Source'].replace(b2b_activation_mapping_df.set_index('Label')['Value'])
b2b_concat_activation_df['Target'] = b2b_concat_activation_df['Target'].replace(b2b_activation_mapping_df.set_index('Label')['Value'])

# Sample data
b2b_activation_source = b2b_concat_activation_df['Source'].values.tolist()
b2b_activation_target = b2b_concat_activation_df['Target'].values.tolist()
b2b_activation_value = b2b_concat_activation_df['Value'].values.tolist()
b2b_activation_labels = b2b_activation_mapping_df['Label'].values.tolist()

# list of colors for each link based on some condition or data
colors = []
for tgts in b2b_activation_source:
    if tgts == 0:
        colors.append("#AAA")
    elif tgts == 1:
        colors.append("#AAA")
    elif tgts == 2:
        colors.append("#BBB")
    elif tgts == 6:
        colors.append("#BBB")
    elif tgts == 7:
        colors.append("#CCC")
    elif tgts == 8:
        colors.append("#CCC")
    else:
        colors.append("#F0F0F0")  # Default color for other cases

# Create links
activation_link = dict(source=b2b_activation_source, target=b2b_activation_target, value=b2b_activation_value, color=colors)

# Create nodes
node = dict(label=b2b_activation_labels, pad=30, thickness=20)

# Create a Sankey object
chart = go.Sankey(link=activation_link, node=node, arrangement="snap")

# Build a figure
fig = go.Figure(chart)

fig.show()
