In [38]:
import pandas as pd
import numpy as np
import plotly.express as px
import matplotlib.pyplot as plt


In [32]:
#open user_table
users_ = pd.read_csv('00_user_table.csv')
df= users_

#open rest
home_pg = pd.read_csv('01_home_page_table.csv')
search_pg = pd.read_csv('02_search_page_table.csv')
payment_pg = pd.read_csv('03_payment_page_table.csv')
confirmation_pg = pd.read_csv('04_payment_confirmation_table.csv')

#flag each step in the funnel
pgs = {
    'home': home_pg,
    'search': search_pg,
    'payment': payment_pg,
    'confirmation': confirmation_pg
}

for i,df_all in pgs.items():
    df[f'{i}_pg'] = df['user_id'].isin(df_all['user_id']).astype(int)

#modify column "date" to "user_created_at"
df.rename(columns={'date':'user_created_at'},inplace=True)

#look if duplicates user_id values -> None 
#duplicated_user = users_[users_.duplicated(keep=False)]
#duplicated_user

#order df by user_created_at and by payment_confirmation
df = df.sort_values(by=['confirmation_pg', 'user_created_at'], ascending=[False, True])



In [45]:


# Assuming df is your DataFrame
# Create a funnel DataFrame
funnel_df = pd.DataFrame()

# Count the number of users at each funnel step, differentiated by sex
funnel_df['home_pg'] = df.groupby('sex')['home_pg'].sum()
funnel_df['search_pg'] = df.groupby('sex')['search_pg'].sum()
funnel_df['payment_pg'] = df.groupby('sex')['payment_pg'].sum()
funnel_df['confirmation_pg'] = df.groupby('sex')['confirmation_pg'].sum()

# Melt the DataFrame to long format for Plotly
funnel_df = funnel_df.reset_index()
funnel_df = funnel_df.melt(id_vars='sex', var_name='funnel_step', value_name='count')

# Plot the funnel chart
fig = px.funnel(funnel_df, 
                x='count', 
                y='funnel_step', 
                color='sex',
                title="Conversion Funnel by Sex",
                labels={'funnel_step': 'Funnel Step', 'count': 'Number of Users'})

fig.show()


In [46]:
import pandas as pd
import plotly.express as px

# Assuming df is your DataFrame
# Create a funnel DataFrame
funnel_df = pd.DataFrame()

# Count the number of users at each funnel step, differentiated by device
funnel_df['home_pg'] = df.groupby('device')['home_pg'].sum()
funnel_df['search_pg'] = df.groupby('device')['search_pg'].sum()
funnel_df['payment_pg'] = df.groupby('device')['payment_pg'].sum()
funnel_df['confirmation_pg'] = df.groupby('device')['confirmation_pg'].sum()

# Melt the DataFrame to long format for Plotly
funnel_df = funnel_df.reset_index()
funnel_df = funnel_df.melt(id_vars='device', var_name='funnel_step', value_name='count')

# Plot the funnel chart
fig = px.funnel(funnel_df, 
                x='count', 
                y='funnel_step', 
                color='device',
                title="Conversion Funnel by Device",
                labels={'funnel_step': 'Funnel Step', 'count': 'Number of Users'})

fig.show()


In [47]:
# Create a funnel DataFrame
funnel_df = pd.DataFrame()

# Count the number of users at each funnel step, differentiated by sex and device
funnel_df = df.groupby(['sex', 'device']).agg({
    'home_pg': 'sum',
    'search_pg': 'sum',
    'payment_pg': 'sum',
    'confirmation_pg': 'sum'
}).reset_index()

# Melt the DataFrame to long format for Plotly
funnel_df = funnel_df.melt(id_vars=['sex', 'device'], var_name='funnel_step', value_name='count')

# Plot the funnel chart
fig = px.funnel(funnel_df, 
                x='count', 
                y='funnel_step', 
                color='device', 
                facet_col='sex',
                title="Conversion Funnel by Device and Sex",
                labels={'funnel_step': 'Funnel Step', 'count': 'Number of Users'},
                facet_col_wrap=2)  # Adjust based on the number of facets

fig.show()

In [62]:
import pandas as pd
import plotly.express as px

# Assuming df is your DataFrame

# Create a combined category of sex and device
df['sex_device'] = df['sex'] + '-' + df['device']

# Create a funnel DataFrame
funnel_df = df.groupby('sex_device').agg({
    'home_pg': 'sum',
    'search_pg': 'sum',
    'payment_pg': 'sum',
    'confirmation_pg': 'sum'
}).reset_index()

# Calculate total users for percentages
total_users = funnel_df[['home_pg', 'search_pg', 'payment_pg', 'confirmation_pg']].sum().sum()

# Melt the DataFrame to long format for Plotly
funnel_df = funnel_df.melt(id_vars='sex_device', var_name='funnel_step', value_name='count')

# Calculate percentages
funnel_df['percentage'] = (funnel_df['count'] / total_users) * 100

# Prepare labels for the plot
funnel_df['label'] = funnel_df.apply(lambda row: f"{row['count']} ({row['percentage']:.1f}%)", axis=1)

# Define custom colors
color_map = {
    'male-desktop': '#001f3f',  # Navy blue for male desktop
    'male-mobile': '#add8e6',   # Light blue for male mobile
    'female-desktop': '#ff4d4d',  # Red-pink for female desktop
    'female-mobile': '#ffb3b3'   # Light pink for female mobile
}

# Plot the funnel chart
fig = px.funnel(funnel_df, 
                x='count', 
                y='funnel_step', 
                color='sex_device', 
                color_discrete_map=color_map,  # Use custom colors
                title="Conversion Funnel by Sex and Device",
                labels={'funnel_step': 'Funnel Step', 'count': 'Number of Users'},
                text='label')  # Show data labels

# Update layout to show text labels
fig.update_traces(textposition='inside', texttemplate='%{text}')

# Adjust layout for better width and height ratios
fig.update_layout(
    funnelmode='stack',
    autosize=False,
    width=800,  # Adjust width as needed
    height=1100,  # Adjust height as needed
    margin=dict(l=0, r=0, t=50, b=0)
)

fig.show()
