## Analysing responses from Market research survey

In [None]:
# import libraries
import pandas as pd
import plotly.io as pio
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots

pio.renderers.default = 'notebook'

### User A = Don't use Amenity Data


In [None]:
# import csv file as dataframe
userA_path = "userA_responses.csv"
userA_data = pd.read_csv(userA_path, delimiter=",")
userA_data.head(5)

#### Basic demographic data
0. Number of respondents in user A
1. County percentages
2. Sector percentages

In [None]:
# how many people in user A
userA_count = len(userA_data)
print("Number of respondents in User A:",userA_count)

In [None]:
# need to fix this, i think it is duplicated
## is it worth exploding in the preprocess? its causing me so many issues now
## not it not worth it, i removed it and added it below

# plot county & sector
countyA_count = userA_data["county"].value_counts()
sectorA_count = userA_data["sector"].value_counts()

# county & sector colors
county_colors = px.colors.qualitative.Set1 
sector_colors = px.colors.qualitative.Set2

# create first pie chart for "County"
pie_data_countyA = pd.DataFrame({'County': countyA_count.index, 'Count': countyA_count.values})
fig_countyA = px.pie(pie_data_countyA, names='County', values='Count', title="User A County Distribution", color_discrete_sequence=county_colors)

# show labels on the pie & pull out big slices
fig_countyA.update_traces(textinfo='label+percent', textposition='inside')
fig_countyA.update_traces(
    pull=[0.07 if count > 0.05 * countyA_count.sum() else 0 for count in countyA_count.values] 
)

# adjust layout
fig_countyA.update_layout(
    legend=dict(
        title="Counties",
        orientation="v",
        x=1.1,
        y=1,
        xanchor="left",
        yanchor="top",
        font=dict(size=10),
        bgcolor="rgba(255, 255, 255, 0.8)"
    )
)

# show county pie chart
fig_countyA.show()

# create second pie chart for "Sector"
pie_data_sectorA = pd.DataFrame({'Sector': sectorA_count.index, 'Count': sectorA_count.values})
fig_sectorA = px.pie(pie_data_sectorA, names='Sector', values='Count', title="User A Sector Distribution", color_discrete_sequence=sector_colors)

# show labels on the pie & pull out big slices
fig_sectorA.update_traces(textinfo='label+percent', textposition='inside')
fig_sectorA.update_traces(
    pull=[0.07 if count > 0.10 * sectorA_count.sum() else 0 for count in sectorA_count.values]  
)

# adjust layout
fig_sectorA.update_layout(
    legend=dict(
        title="Sectors",
        orientation="v",
        x=1.1,
        y=1,
        xanchor="left",
        yanchor="top",
        font=dict(size=10),
        bgcolor="rgba(255, 255, 255, 0.8)"
    )
)

# show sector pie chart
fig_sectorA.show()

#### Technology usage
1. Which device mostly used day-to-day?
2. How often do you use digital tools for navigation?

In [None]:
# function to explode multiple answer cols

def explode_multiple_answers(data, column_names, delimiter=';'):
    """
    Function to plot the count of devices in a specified column, handling multiple answers.
    
    Parameters:
    - data: DataFrame containing the survey data
    - column_name: The name of the column to analyze (e.g., 'device_personal')
    - delimiter: The delimiter separating multiple values (default is ';')
    
    Returns:
    - A Plotly bar chart
    """
    # Create a copy of the DataFrame to avoid modifying the original
    data_exploded = data.copy()
    
    # Loop through the list of columns and apply the strip, split and explode process
    for column_name in column_names:
        if column_name in data_exploded.columns:
            # Check if the delimiter exists in any of the rows of the column
            if data_exploded[column_name].str.contains(delimiter).any():
                # Split the values in the column by the delimiter & remove trailing white spaces
                data_exploded[column_name + '_exploded'] = (
                    data_exploded[column_name]
                    .str.split(delimiter)
                    .apply(lambda x: [item.strip() for item in x])
                )
                # Explode the column to create multiple rows
                data_exploded = data_exploded.explode(column_name + '_exploded')
            else:
                # If no delimiter is found, no need to split or explode
                data_exploded[column_name + '_exploded'] = data_exploded[column_name]  # Keep original values in a new column
    
    return data_exploded

# explode necessary cols
userAcols_to_explode = ["device_personal","why_impractical_demo_personal","other_amenity_personal","other_feature_personal"]
users_A_expl = explode_multiple_answers(userA_data,userAcols_to_explode)

# drop duplicates
users_A_expl1 = users_A_expl.drop_duplicates(subset=["id", "device_personal_exploded"]).copy()
users_A_expl2 = users_A_expl.drop_duplicates(subset=["id", "why_impractical_demo_personal_exploded"]).copy()
users_A_expl3 = users_A_expl.drop_duplicates(subset=["id", "other_amenity_personal_exploded"]).copy()
users_A_expl4 = users_A_expl.drop_duplicates(subset=["id", "other_feature_personal_exploded"]).copy()



In [None]:
# replacing other answers in device_personal with "Other"
device_list = ["Laptop","Smartphone","Desktop computer","Tablet"]
users_A_expl1["device_personal_exploded"] = users_A_expl1["device_personal_exploded"].apply(
    lambda x: x if x in device_list else "Other"
)

users_A_expl1["device_personal_exploded"].value_counts()

In [None]:
# Q1: plot device count
deviceA_count = users_A_expl1.groupby("device_personal_exploded")['id'].count()

# create a df for plotting & sort values ascending
bar_data_deviceA = pd.DataFrame({'Device': deviceA_count.index, 'Count': deviceA_count.values})
bar_data_deviceA = bar_data_deviceA.sort_values(by='Count', ascending=False)

# plot the bar chart
device_colors = px.colors.qualitative.Set3 
fig_deviceA = px.bar(bar_data_deviceA, x='Device', y='Count', title="User A Preferred Device")

# customizing layout
fig_deviceA.update_layout(
    width=800,
    height=500,
    legend=dict(
        title="Devices",
        orientation="h",
        x=1.1,
        y=1,
        xanchor="left",
        yanchor="top",
        font=dict(size=6),
        bgcolor="rgba(255, 255, 255, 0.8)"
    )
)

# show the plot
fig_deviceA.show()

In [None]:
# tool frequency plot - set count & color
toolfrequencyA_count = userA_data["freq_tool_personal"].value_counts()
toolfrequency_colors = px.colors.qualitative.Set3

# create df
pie_data_toolfreqA = pd.DataFrame({'Tool Frequency': toolfrequencyA_count.index, 'Count': toolfrequencyA_count.values})
fig_toolfreqA = px.pie(pie_data_toolfreqA, names='Tool Frequency', values='Count', title="User A - Digital Tool Usage Frequency for Navigation", color_discrete_sequence=toolfrequency_colors)

# push out the big slices
fig_toolfreqA.update_traces(
    pull=[0.05 if count > 0.2 * toolfrequencyA_count.sum() else 0 for count in toolfrequencyA_count.values]  # Pull out small slices slightly
)

# show labels on the pie & fix layout
fig_toolfreqA.update_traces(textinfo='label+percent', textposition='inside')
fig_toolfreqA.update_layout(
    legend=dict(
        title="Frequency",
        orientation="v",
        x=1.1,
        y=1,
        xanchor="left",
        yanchor="top",
        font=dict(size=10),
        bgcolor="rgba(255, 255, 255, 0.8)"
    )
)

# display pie chart
fig_toolfreqA.show()

In [None]:
# Q1: Device distribution count
deviceA_count = users_A_expl1.groupby("device_personal_exploded")['id'].count().reset_index()
deviceA_count.columns = ["Device","Preferred_Count"]

# Q2: Navigation Tool frequency use count
toolfrequencyA_count = users_A_expl1.groupby(["device_personal_exploded","freq_tool_personal"]).size().reset_index(name="Frequency_Count")
toolfrequency_colors = px.colors.qualitative.Set3
toolfrequencyA_count.columns = ['Device', 'Frequency', 'Frequency_Count']

# merge the datasets on 'Tool'
stackedA_data = pd.merge(toolfrequencyA_count, deviceA_count, on="Device", how="left")
stackedA_data = stackedA_data.sort_values(by="Preferred_Count", ascending=False)
stackedA_data['Tool'] = pd.Categorical(stackedA_data['Device'], categories=stackedA_data['Device'].unique(), ordered=True)

# plot stacked bar chart & adjust layout
fig_stackedA = px.bar(
    stackedA_data,
    x="Device",
    y="Frequency_Count",
    color="Frequency",
    title="User A - Preferred Device & Digital Tool Usage for Navigation",
    color_discrete_sequence=toolfrequency_colors,
    labels={"Frequency_Count": "Count"}
)
fig_stackedA.update_layout(
    width=800,
    height=500,
    barmode="stack",
    legend=dict(
        title="Frequency",
        orientation="v",
        x=1.1,
        y=1,
        xanchor="left",
        yanchor="top",
        font=dict(size=10),
        bgcolor="rgba(255, 255, 255, 0.8)"
    )
)

# display stacked bar chart
fig_stackedA.show()

#### How useful our product is
1. Usefulness distribution
2. Reasons why impractical

In [None]:
# Q1 & 2
# magpie usefulness & why not plot
product_usefulA_count = userA_data["demo_useful_personal"].value_counts()
product_useful_colors = px.colors.qualitative.Set3

# create df and plot pie chart
pie_data_productusefulA = pd.DataFrame({'Product Usefulness': product_usefulA_count.index, 'Count': product_usefulA_count.values})
fig_productusefulA = px.pie(pie_data_productusefulA, names='Product Usefulness', values='Count', title="User A Product Usefulness for accessing Amenity data", color_discrete_sequence=product_useful_colors)

# create the parent and child columns for the why table
userA_data['parent'] = userA_data['demo_useful_personal']
userA_data['child'] = userA_data.apply(
    lambda row: row['why_impractical_demo_personal'] if row['demo_useful_personal'] in ["Somewhat impractical", "Extremely impractical"]
    else row['demo_useful_personal'], axis=1)

# group & filter data to only include rows where the parent is "Somewhat" or "No"
userA_impractical_product = userA_data.groupby(['parent', 'child'])['id'].nunique().reset_index(name='count')
userA_impractical_product = userA_impractical_product[userA_impractical_product['parent'].isin(["Somewhat impractical", "Extremely impractical"])]

# create a 1x2 subplot layout with specific specs for each plot type
fig_productcombinedA = make_subplots(
    rows=1, cols=2, 
    column_widths=[0.4, 0.6],
    specs=[[{"type": "domain"}, {"type": "table"}]]  
)

# add the pie chart col1
fig_productcombinedA.add_trace(
    go.Pie(labels=pie_data_productusefulA['Product Usefulness'], 
           values=pie_data_productusefulA['Count'], 
           pull=[0.05 if count > 0.25 * product_usefulA_count.sum() else 0 for count in product_usefulA_count.values],
           textinfo='label+percent',
           textposition='inside',
           marker=dict(colors=product_useful_colors)),
    row=1, col=1
)

# add the table col2
fig_productcombinedA.add_trace(
    go.Table(
        header=dict(values=['Product Usefulness', 'Why']),
        cells=dict(values=[userA_impractical_product['parent'], 
                           userA_impractical_product['child']])
    ),
    row=1, col=2
)

# adjust layout for the combined figure
fig_productcombinedA.update_layout(
    title="User A - Magpie Usefulness for accessing Amenity data and Reasons for Impracticality",
    title_x=0.5,
    width=1300,
    height=600,
    showlegend=False
)

# display combined plot
fig_productcombinedA.show()


#### User emails

In [None]:
# retrieve emails from contact column
def retrieve_emails(dataframe,col_name):
    email_list = []
    for val in dataframe[col_name]:
        if "@" in val:
            email_list.append(val)
    return email_list

userA_contact = retrieve_emails(userA_data,"contact_personal")
print(userA_contact)

# save to txt file
with open('userA_emails.txt', 'x') as f:
    for items in userA_contact:
        f.write('%s\n' %items)
    print("File written successfully")
f.close()    

### User B = Use Amenity Data

In [None]:
# import csv file as dataframe
userB_path = "userB_responses.csv"
userB_data = pd.read_csv(userB_path, delimiter=",")
userB_data.head(5)

#### Basic demographic data
0. Number of respondents
1. County percentages
2. Sector percentages

In [None]:
# how many people in user B
userB_count = len(userB_data)
print("Number of respondents in User B:",userB_count)

In [None]:
# county & sector plot
countyB_count = userB_data["county"].value_counts()
sectorB_count = userB_data["sector"].value_counts()

# county & sector colors - use above defined
# create pie chart for "County"
pie_data_countyB = pd.DataFrame({'County': countyB_count.index, 'Count': countyB_count.values})
fig_countyB = px.pie(pie_data_countyB, names='County', values='Count', title="User B County Distribution", color_discrete_sequence=county_colors)

# show labels; pull out big slices & adjust layout
fig_countyB.update_traces(textinfo='label+percent', textposition='inside')
fig_countyB.update_traces(
    pull=[0.07 if count > 0.05 * countyB_count.sum() else 0 for count in countyB_count.values]  
)
fig_countyB.update_layout(
    legend=dict(
        title="Counties",
        orientation="v",
        x=1.1,
        y=1,
        xanchor="left",
        yanchor="top",
        font=dict(size=10),
        bgcolor="rgba(255, 255, 255, 0.8)"
    )
)

# display county pie chart
fig_countyB.show()

# create second pie chart for "Sector"
pie_data_sectorB = pd.DataFrame({'Sector': sectorB_count.index, 'Count': sectorB_count.values})
fig_sectorB = px.pie(pie_data_sectorB, names='Sector', values='Count', title="User B Sector Distribution", color_discrete_sequence=sector_colors)

# show labels; pull out big slices & adjust layout
fig_sectorB.update_traces(textinfo='label+percent', textposition='inside')
fig_sectorB.update_traces(
    pull=[0.07 if count > 0.10 * sectorB_count.sum() else 0 for count in sectorB_count.values]  
)
fig_sectorB.update_layout(
    legend=dict(
        title="Sectors",
        orientation="v",
        x=1.1,
        y=1,
        xanchor="left",
        yanchor="top",
        font=dict(size=10),
        bgcolor="rgba(255, 255, 255, 0.8)"
    )
)

# display sector pie chart
fig_sectorB.show()

#### Technology usage
1. Which device mostly used day-to-day?
2. What tool do you use to access data on amenities?
3. How often do you use that tool?
4. Are you satisfied with it? If not, why?

In [None]:
# explode concerned columns
userBcols_to_explode = ["device_work","type_amenity_data_work","type_tool_work","satisfaction_tool_work","why_impractical_demo_work","other_amenity_work"]

users_B_expl = explode_multiple_answers(userB_data,userBcols_to_explode)

# drop duplicates
users_B_expl1 = users_B_expl.drop_duplicates(subset=["id", "device_work_exploded"]).copy()
users_B_expl2 = users_B_expl.drop_duplicates(subset=["id", "type_amenity_data_work_exploded"]).copy()
users_B_expl3 = users_B_expl.drop_duplicates(subset=["id", "type_tool_work_exploded"]).copy()
users_B_expl4 = users_B_expl.drop_duplicates(subset=["id", "satisfaction_tool_work_exploded"]).copy()
users_B_expl5 = users_B_expl.drop_duplicates(subset=["id", "why_impractical_demo_work_exploded"]).copy()
users_B_expl6 = users_B_expl.drop_duplicates(subset=["id", "other_amenity_work_exploded"]).copy()


In [None]:
# replace other answers with "Other" - using defined list in userA code
users_B_expl1["device_work_exploded"] = users_B_expl1["device_work_exploded"].apply(
    lambda x: x if x in device_list else "Other"
)
# check for sanity
print(users_B_expl1.groupby("device_work_exploded")["id"].count())

In [None]:
## Q1
# plot device count
deviceB_count = users_B_expl1.groupby("device_work_exploded")['id'].count()

# create a df for plotting & sort values ascending
bar_data_deviceB = pd.DataFrame({'Device': deviceB_count.index, 'Count': deviceB_count.values})
bar_data_deviceB = bar_data_deviceB.sort_values(by='Count', ascending=False)

# plot the bar chart
fig_deviceB = px.bar(bar_data_deviceB, x='Device', y='Count', title="User B Preferred Device")

# customizing layout
fig_deviceB.update_layout(
    width=800,
    height=500,
    legend=dict(
        title="Devices",
        orientation="h",
        x=1.1,
        y=1,
        xanchor="left",
        yanchor="top",
        font=dict(size=6),
        bgcolor="rgba(255, 255, 255, 0.8)"
    )
)

# show the plot
fig_deviceB.show()

In [None]:
# replace other answers with "Other"
tool_list = ["Navigation app", "Government database", "City software"]
users_B_expl3["type_tool_work_exploded"] = users_B_expl3["type_tool_work_exploded"].apply(
    lambda x: x if x in tool_list else "Other"
)
# check for sanity
print(users_B_expl3.groupby("type_tool_work_exploded")["id"].count())

In [None]:
# Q2: Tool distribution count
toolB_count = users_B_expl3.groupby("type_tool_work_exploded")['id'].count().reset_index()
toolB_count.columns = ['Tool', 'Preferred_Count']

# Q3: Tool frequency count
toolfrequencyB_count = users_B_expl3.groupby(['type_tool_work_exploded', 'freq_tool_work']).size().reset_index(name='Frequency_Count')
toolfrequencyB_count.columns = ['Tool', 'Frequency', 'Frequency_Count']

# merge the datasets on 'Tool'
stacked_data = pd.merge(toolfrequencyB_count, toolB_count, on="Tool", how="left")
stacked_data = stacked_data.sort_values(by="Preferred_Count", ascending=False)
stacked_data['Tool'] = pd.Categorical(stacked_data['Tool'], categories=stacked_data['Tool'].unique(), ordered=True)

# plot stacked bar chart & adjust layout
fig_stacked = px.bar(
    stacked_data,
    x="Tool",
    y="Frequency_Count",
    color="Frequency",
    title="User B Tool Distribution by Frequency of Use",
    color_discrete_sequence=toolfrequency_colors,
    labels={"Frequency_Count": "Count"}
)
fig_stacked.update_layout(
    width=800,
    height=500,
    barmode="stack",
    legend=dict(
        title="Tool Usage Frequency",
        orientation="v",
        x=1.1,
        y=1,
        xanchor="left",
        yanchor="top",
        font=dict(size=10),
        bgcolor="rgba(255, 255, 255, 0.8)"
    )
)

# display stacked bar chart
fig_stacked.show()


In [None]:
# Q4
# tool satisfaction & why not plot
tool_satisfactionB_count = userB_data["satisfaction_tool_work"].value_counts()

# create df and plot pie chart
pie_data_toolsatisB = pd.DataFrame({'Tool Satisfaction': tool_satisfactionB_count.index, 'Count': tool_satisfactionB_count.values})
fig_toolsatisB = px.pie(pie_data_toolsatisB, names='Tool Satisfaction', values='Count', title="User B Tool Satisfaction for accessing Amenity data", color_discrete_sequence=product_useful_colors)

# create the parent and child columns for the why table
userB_data['parent'] = userB_data['satisfaction_tool_work']
userB_data['child'] = userB_data.apply(
    lambda row: row['why_unsatisfied_tool_work'] if row['satisfaction_tool_work'] in ["Somewhat", "No"]
    else row['satisfaction_tool_work'], axis=1)

# group & filter data to only include rows where the parent is "Somewhat" or "No"
userB_impractical_tool = userB_data.groupby(['parent', 'child'])['id'].nunique().reset_index(name='count')
userB_impractical_tool = userB_impractical_tool[userB_impractical_tool['parent'].isin(["Somewhat", "No"])]

# create a 1x2 subplot layout with specific specs for each plot type
fig_toolcombinedB = make_subplots(
    rows=1, cols=2, 
    column_widths=[0.4, 0.6],
    specs=[[{"type": "domain"}, {"type": "table"}]]  
)

# add the pie chart col1
fig_toolcombinedB.add_trace(
    go.Pie(labels=pie_data_toolsatisB['Tool Satisfaction'], 
           values=pie_data_toolsatisB['Count'], 
           pull=[0.05 if count > 0.25 * tool_satisfactionB_count.sum() else 0 for count in tool_satisfactionB_count.values],
           textinfo='label+percent',
           textposition='inside',
           marker=dict(colors=product_useful_colors)),
    row=1, col=1
)

# add the table col2
fig_toolcombinedB.add_trace(
    go.Table(
        header=dict(values=['Tool Satisfaction', 'Why']),
        cells=dict(values=[userB_impractical_tool['parent'], 
                           userB_impractical_tool['child']])
    ),
    row=1, col=2
)

# adjust layout for the combined figure
fig_toolcombinedB.update_layout(
    title="User B Tool Satisfaction for accessing Amenity data and Reasons for Unsatisfaction",
    title_x=0.5,
    width=1300,
    height=600,
    showlegend=False
)

# display combined plot
fig_toolcombinedB.show()


#### Type of amenity accessed

In [None]:
users_B_expl["type_amenity_data_work_exploded"].unique()

In [None]:
# replace other answers with "Other"
amenity_list = ["Recreational","Transport & Mobility","Healthcare & Safety", "Technological","Mechanical","Accessibility"]

users_B_expl2["type_amenity_data_work_exploded"] = users_B_expl2["type_amenity_data_work_exploded"].apply(
    lambda x: x if x in amenity_list else "Other"
)
# check for sanity
users_B_expl2.groupby("type_amenity_data_work_exploded")["id"].count()

In [None]:
# use exploded df
amenity_count = users_B_expl2['type_amenity_data_work_exploded'].value_counts().reset_index()
amenity_count.columns = ['Type of Amenity', 'Count']

# Create the bar chart
fig_typeamenB = px.bar(amenity_count, x='Type of Amenity', y='Count', 
             title='Type of Amenity Data Accessed', 
             labels={'Type of Amenity': 'Type of Amenity', 'Count': 'Count'})

# Show the plot
fig_typeamenB.show()

#### How useful our product is
1. Usefulness distribution
2. Reasons why impractical

In [None]:
# product usefulness & why not plot
product_usefulB_count = userB_data["demo_useful_work"].value_counts()

# create df and plot pie chart
pie_data_produseB = pd.DataFrame({'Product Usefulness': product_usefulB_count.index, 'Count': product_usefulB_count.values})
fig_produseB = px.pie(pie_data_produseB, names='Product Usefulness', values='Count', title="User B Thoughts on Product Usefulness", color_discrete_sequence=product_useful_colors)

# create the parent and child columns for the why table
userB_data['parent'] = userB_data['demo_useful_work']
userB_data['child'] = userB_data.apply(
    lambda row: row['why_impractical_demo_work'] if row['demo_useful_work'] in ["Somewhat impractical", "Extremely impractical"]
    else row['demo_useful_work'], axis=1)

# group & filter data to only include rows where the parent is "Somewhat" or "No"
userB_impractical_product = userB_data.groupby(['parent', 'child'])['id'].nunique().reset_index(name='count')
userB_impractical_product = userB_impractical_product[userB_impractical_product['parent'].isin(["Somewhat impractical", "Extremely impractical"])]

# create a 1x2 subplot layout with specific specs for each plot type
fig_productcombinedB = make_subplots(
    rows=1, cols=2, 
    column_widths=[0.4, 0.6],
    specs=[[{"type": "domain"}, {"type": "table"}]]  
)

# add the pie chart col1
fig_productcombinedB.add_trace(
    go.Pie(labels=pie_data_produseB['Product Usefulness'], 
           values=pie_data_produseB['Count'], 
           pull=[0.05 if count > 0.25 * product_usefulB_count.sum() else 0 for count in product_usefulB_count.values],
           textinfo='label+percent',
           textposition='inside',
           marker=dict(colors=product_useful_colors)),
    row=1, col=1
)

# add the table col2
fig_productcombinedB.add_trace(
    go.Table(
        header=dict(values=['Product Usefulness', 'Why']),
        cells=dict(values=[userB_impractical_product['parent'], 
                           userB_impractical_product['child']])
    ),
    row=1, col=2
)

# adjust layout for the combined figure
fig_productcombinedB.update_layout(
    title="User B Product Usefulness for accessing Amenity data and Reasons for Impracticality",
    title_x=0.5,
    width=1300,
    height=600,
    showlegend=False
)

# display combined plot
fig_productcombinedB.show()


#### User emails
For this weekend, retrieve county they live in, sector they work in and the type of amenity data they access

In [None]:
# retrieve emails from contact column
userB_contact = retrieve_emails(userB_data,"contact_work")
print(userB_contact)

# save to txt file
with open('userB_emails.txt', 'x') as g:
    for items in userB_contact:
        g.write('%s\n' %items)
    print("File written successfully")
g.close() 

### Both users

#### Demographic data
1. Use Amenity or not
2. Sector distribution

In [None]:
# plot userA count and userB count in pie chart
userA_count = len(userA_data)
userB_count = len(userB_data)
list_opt = ["Yes","No"]
list_count = [userB_count, userA_count]

# create df & plot pie chart 
pie_user_count = pd.DataFrame({'Use amenity': list_opt, 'Count': list_count})
fig_user_count = px.pie(pie_user_count, names='Use amenity', values='Count', title="Amenity data usage")

# show labels on the pie
fig_user_count.update_traces(textinfo='label+percent', textposition='inside')

# display all amenity data usage counts
fig_user_count.show()


In [None]:
# sector all users
# combine sector data from user A & B
all_sectors = pd.concat([userA_data["sector"], userB_data["sector"]])

# get value counts for all sectors & reset the index
sector_counts = all_sectors.value_counts().reset_index()
sector_counts.columns = ['sector', 'count'] 

# plot the pie chart
fig_allsector = px.pie(
    sector_counts,
    names='sector',          
    values='count',          
    title="Sector Distribution for All Users",
    color_discrete_sequence=sector_colors
)

# adjust layout
fig_allsector.update_traces(
    textinfo='percent+label',   
    pull=[0.05 if count > sector_counts['count'].mean() else 0 for count in sector_counts['count']]            
)

# display pie chart
fig_allsector.show()
