In [290]:
import pandas as pd
import numpy as np
import warnings

import plotly.graph_objects as go
import plotly.io as pio


warnings.filterwarnings('ignore')

In [291]:
def format_value(value):
    if value >= 1000000:
        return f'{value/1000000:.1f}M'
    elif value >= 1000:
        return f'{value/1000:.1f}K'
    else:
        return f'{value:.0f}'

In [292]:
folder_path = r'C:\Users\Rafael_Fagundes\Downloads\compiled_data.csv'

df = pd.read_csv(folder_path, encoding='utf-8')

df = df[(df['BU'] == 'CSB') 
            & (df['Fiscal Quarter'] == '2024-Q3') 
            & (df['Country_x'] == 'United States')]

df = df[(df['Segment ID'] != 0) & (df['Segment ID'] != '0')]

In [293]:
df_lvl1_seg = df.groupby(['Audience Type'])['Segment ID'].nunique().reset_index()
df_lvl2_seg = df.groupby(['Display Dell Vehicle Mapped'])['Segment ID'].nunique().reset_index()

df_lvl1_seg.rename(columns={'Audience Type':'Label', 'Segment ID':'Segments'}, inplace=True)
df_lvl2_seg.rename(columns={'Display Dell Vehicle Mapped':'Label', 'Segment ID':'Segments'}, inplace=True)

segment_df = pd.concat([df_lvl1_seg, df_lvl2_seg])


df_lvl1 = df.groupby(['Audience Type', 'Display Dell Vehicle Mapped'])['Spend'].sum().reset_index()

df_lvl1.rename(columns={'Audience Type':'Source', 'Display Dell Vehicle Mapped':'Target', 'Spend':'Value'}, inplace=True)

unique_values = pd.unique(df_lvl1[['Source', 'Target']].values.ravel('K'))

mapping_df = pd.DataFrame({'Label': unique_values.tolist()
                                          , 'Value': [i for i in range(len(unique_values))]})

# Replace values based on the mappings
df_lvl1['Source'] = df_lvl1['Source'].replace(mapping_df.set_index('Label')['Value'])
df_lvl1['Target'] = df_lvl1['Target'].replace(mapping_df.set_index('Label')['Value'])

# Contact the dataframes in just one
concat_df = pd.concat([
                       df_lvl1[['Source','Target','Value']]
                    ], ignore_index=True)

##########
df_source = concat_df.groupby('Source')['Value'].sum().reset_index()
total_sum_source = df_source['Value'].sum()
df_source['Percentage'] = (df_source['Value'] / total_sum_source) * 100
df_source.rename(columns={'Value':'Total','Source':'Value'}, inplace=True)

df_target = concat_df.groupby('Target')['Value'].sum().reset_index()
total_sum_target = df_target['Value'].sum()
df_target['Percentage'] = (df_target['Value'] / total_sum_target) * 100
df_target.rename(columns={'Value':'Total','Target':'Value'}, inplace=True)

totals_df = pd.concat([df_source, df_target])

merged_df = mapping_df.merge(totals_df, on='Value', how='left').merge(segment_df, on='Label', how='left')

merged_df['Label_x'] = merged_df['Label'] + " (" + merged_df['Segments'].apply(format_value) + ") " + "<br>" + merged_df['Percentage'].apply(lambda x: f'{x:.1f}%')+ " (" + merged_df['Total'].apply(format_value) + ")"
############



# Sample data
source = df_lvl1['Source'].values.tolist()
target = df_lvl1['Target'].values.tolist()
value = df_lvl1['Value'].values.tolist()
labels = merged_df['Label_x'].values.tolist()

# List of colors for each link based on some condition or data
colors = generate_gray_tones(len(source))

# Create links
link = dict(source=source, target=target, value=value, color=colors)

# Create nodes
node = dict(label=labels, pad=30, thickness=20)

# Create a Sankey object
chart = go.Sankey(link=link, node=node, arrangement="snap")

# Build a figure
fig = go.Figure(chart)

# Add a title to the figure
fig.update_layout(title_text='title', width=800)

fig.show()

#html_file_path = 'html/' + title + ".html"
#pio.write_html(fig, file=html_file_path)

In [294]:
merged_df

Unnamed: 0,Label,Value,Total,Percentage,Segments,Label_x
0,1PD,0,87258.86829,51.027808,27,1PD (27) <br>51.0% (87.3K)
1,3PD,1,83743.712169,48.972192,23,3PD (23) <br>49.0% (83.7K)
2,Display,2,65538.547136,38.326057,29,Display (29) <br>38.3% (65.5K)
3,Display Mobile,3,2344.488264,1.371025,3,Display Mobile (3) <br>1.4% (2.3K)
4,Lead Generation,4,0.0,0.0,4,Lead Generation (4) <br>0.0% (0)
5,Video,5,103119.545059,60.302918,22,Video (22) <br>60.3% (103.1K)
6,Audio,6,0.0,0.0,10,Audio (10) <br>0.0% (0)


In [295]:
df_lvl1_seg

Unnamed: 0,Label,Segments
0,1PD,27
1,3PD,23


In [296]:
df_lvl2_seg

Unnamed: 0,Label,Segments
0,Audio,10
1,Display,29
2,Display Mobile,3
3,Lead Generation,4
4,Video,22
