# Exploratory Data Analysis

##### Importing and Loading Data and Library

In [1]:
#### Importing library
import pandas as pd
import numpy as np
import plotly.express as px
import nbformat
import plotly.graph_objects as go

#### Loading Data
product_profile = pd.read_csv("product_profile.csv")
user_profile = pd.read_csv("user_profile.csv")

## Exploring User Profile Table

In [2]:
user_profile = user_profile.drop("Unnamed: 0", axis=1)
user_profile['total_spent'] = (user_profile['total_spent_Doohickey'] +
                               user_profile['total_spent_Gadget'] +
                               user_profile['total_spent_Gizmo'] +
                               user_profile['total_spent_Widget'])
user_profile.head()



Unnamed: 0,id,name,state,latitude,longitude,source,dayduration,age,total_spent_Doohickey,total_spent_Gadget,total_spent_Gizmo,total_spent_Widget,total_orders,discount_usage_proportion,email_provider,total_spent
0,1,Hudson Borer,NE,40.7132,-98.526,Twitter,2684,38,189.5193,389.5355,221.8629,1719.2326,11.0,0.272727,yahoo,2520.1503
1,2,Domenica Williamson,IA,41.5813,-92.6991,Affiliate,2500,57,,,,,,,yahoo,
2,3,Lina Heaney,MN,46.1197,-92.8416,Facebook,2786,63,896.4755,126.91,695.0698,510.8554,10.0,0.2,yahoo,2229.3107
3,4,Arnold Adams,CO,37.9203,-104.973,Google,2182,32,149.891,0.0,214.7897,150.5928,4.0,0.25,gmail,515.2735
4,5,Dominique Leffler,NY,42.349,-77.0567,Twitter,2716,50,0.0,0.0,332.208,0.0,1.0,0.0,hotmail,332.208


#### Checking Missing Values

In [3]:
print(user_profile.isna().sum())

id                             0
name                           0
state                          0
latitude                       0
longitude                      0
source                         0
dayduration                    0
age                            0
total_spent_Doohickey        754
total_spent_Gadget           754
total_spent_Gizmo            754
total_spent_Widget           754
total_orders                 754
discount_usage_proportion    754
email_provider                 0
total_spent                  754
dtype: int64


In [4]:
fig = px.scatter(user_profile, x='dayduration', y='discount_usage_proportion', 
                 title='Scatter Plot between Day Duration and Total Orders',
                 labels={'dayduration': 'Day Duration', 'total_orders': 'Total Orders'})
fig.show()

In [5]:
fig = px.scatter(user_profile, x='dayduration', y='total_spent_Doohickey', 
                 title='Scatter Plot between Day Duration and Total Orders',
                 labels={'dayduration': 'Day Duration', 'total_orders': 'Total Orders'})
fig.show()

In [6]:
fig = px.scatter(user_profile, x='dayduration', y='total_spent_Gadget', 
                 title='Scatter Plot between Day Duration and Total Orders',
                 labels={'dayduration': 'Day Duration', 'total_orders': 'Total Orders'})
fig.show()

In [8]:
import plotly.graph_objects as go

# Data
labels = ["Low Spending", "High Spending"]  # Assuming this is the order
discount_usage = [0.062, 0.102]  # Corresponding proportions

# Create a bar chart
fig = go.Figure(data=[go.Bar(x=labels, y=discount_usage, marker_color=['skyblue', 'royalblue', 'midnightblue'])])

# Customize the layout (optional)
fig.update_layout(
    title="Discount Usage Proportion by Customer Spending Level",
    xaxis_title="Customer Spending Level",
    yaxis_title="Proportion of Discount Usage",
    plot_bgcolor='white',  # Set plot background to white
    xaxis=dict(
        tickfont=dict(size=12),  # Adjust tick font size
        titlefont=dict(size=14)  # Adjust axis label font size
    ),
    yaxis=dict(
        tickfont=dict(size=12),  # Adjust tick font size
        titlefont=dict(size=14)  # Adjust axis label font size
    )
)

# Show the plot
fig.show()

In [9]:
# Data (replace with your actual data)
spending_levels = ["Low", "Medium", "High"]
discount_usage = [0.045, 0.102, 0.101]

# Create bar chart
fig = go.Figure(data=[go.Bar(x=spending_levels, y=discount_usage, marker_color=['skyblue', 'royalblue', 'midnightblue'])])

# Customize layout (optional)
fig.update_layout(
    title="Discount Usage Proportion by Spending Level",
    xaxis_title="Spending Level",
    yaxis_title="Discount Usage Proportion",
    plot_bgcolor='white',  # Set plot background to white
    xaxis=dict(
        tickfont=dict(size=12),  # Adjust tick font size
        titlefont=dict(size=14)  # Adjust axis label font size
    ),
    yaxis=dict(
        tickfont=dict(size=12),  # Adjust tick font size
        titlefont=dict(size=14)  # Adjust axis label font size
    )
)

# Show plot
fig.show()

In [44]:
# Data
total_orders = [1.64, 10.886, 20.914]
discount_usage = [0.045, 0.102, 0.101]
group_sizes =  [1230,979,291]
spending_levels = ["Low Spending", "Medium Spending", "High Spending"]

# Create a scatter plot with size representing total orders
fig = go.Figure(data=[go.Scatter(
    x=spending_levels,
    y=discount_usage,
    mode='markers',
    marker=dict(
        size=[size/10 for size in group_sizes],  # Scale size for visibility
        color=discount_usage,  # Color based on discount usage
        colorscale='Greens',  # Use the Greens color scale (gray to green)
        colorbar=dict(title='Discount Usage Proportion'),
        line=dict(  # Add outline
            color='black',  # Outline color
            width=1         # Outline width
        )
    )
)])

# Customize layout
fig.update_layout(
    title="Relationship between Total Orders, Discount Usage, and Customer Spending",
    xaxis_title="Customer Spending Level",
    yaxis_title="Discount Usage Proportion",
    xaxis=dict(
        tickfont=dict(size=12),
        titlefont=dict(size=14)
    ),
    yaxis=dict(
        tickfont=dict(size=12),
        titlefont=dict(size=14)
    )
)

# Show plot
fig.show()

In [47]:
import plotly.graph_objects as go

# Data
total_orders = [1.64, 10.886, 20.914]
discount_usage = [0.045, 0.102, 0.101]
spending_levels = ["Low Spending", "Medium Spending", "High Spending"]
group_sizes =  [1230,979,291]
# Create a scatter plot with size representing total orders
fig = go.Figure(data=[go.Scatter(
    x=spending_levels, 
    y=discount_usage, 
    mode='markers',
    marker=dict(
        size=[size / 10 for size in group_sizes],  # Scale size appropriately
        color=discount_usage,  # Color based on discount usage
        colorscale='BuGn' ,  # Use a color scale for better understanding
        colorbar=dict(title='Discount Usage Proportion'),
        line=dict(  # Add outline
            color='black',  # Outline color
            width=1         # Outline width
        )
    )
)])

# Customize layout
fig.update_layout(
    title="Relationship between Total Orders, Discount Usage, and Customer Spending",
    xaxis_title="Customer Spending Level",
    yaxis_title="Discount Usage Proportion",
    xaxis=dict(
        tickfont=dict(size=12),
        titlefont=dict(size=14)
    ),
    yaxis=dict(
        tickfont=dict(size=12),
        titlefont=dict(size=14)
    )
)
fig.update_layout(
    # ... other layout settings
    width=800,  # Width in pixels
    height=800,  # Height in pixels
)

# Show plot
fig.show()

In [18]:
# Data
total_orders = [1.64, 10.886, 20.914]
discount_usage = [0.045, 0.102, 0.101]
spending_levels = ["Low", "Medium", "High"]  # Simplified for color mapping

# Color mapping for spending levels
color_map = {"Low": "red", "Medium": "blue", "High": "green"}  # Customize colors

# Create scatter plot
fig = go.Figure(data=[go.Scatter(
    x=discount_usage,
    y=total_orders,
    mode='markers',
    marker=dict(
        size=20,  # Adjust size as needed
        color=[color_map[level] for level in spending_levels],  # Color by spending level
        # You could add a colorscale if you wanted a gradient based on a numerical value
        # color=discount_usage,  # Color based on discount usage
        # colorscale='Greens',  # Use the Greens color scale (gray to green)
        # colorbar=dict(title='Discount Usage Proportion')

    )
)])

# Customize layout
fig.update_layout(
    title="Relationship between Total Orders, Discount Usage, and Spending Level",
    xaxis_title="Discount Usage Proportion",
    yaxis_title="Total Orders",
    xaxis=dict(
        tickfont=dict(size=12),
        titlefont=dict(size=14)
    ),
    yaxis=dict(
        tickfont=dict(size=12),
        titlefont=dict(size=14)
    )
)
fig.update_layout(
    # ... other layout settings
    width=800,  # Width in pixels
    height=600,  # Height in pixels
)
# Show plot
fig.show()


In [35]:
import plotly.graph_objects as go
import plotly.graph_objects as go

# Data (same as before)
total_orders = [1.64, 10.886, 20.914]
discount_usage = [0.045, 0.102, 0.101]
spending_levels = ["Low", "Medium", "High"]
group_sizes = [1230, 979, 291]

# Color mapping for spending levels
color_map = {"Low": "lightgray", "Medium": "green", "High": "darkgreen"}

# Create scatter plot with hover information
fig = go.Figure(data=[go.Scatter(
    x=discount_usage,
    y=total_orders,
    mode='markers',
    marker=dict(
        size=[s / 10 for s in group_sizes],
        color=[color_map[level] for level in spending_levels],
        line=dict(  # Add outline
            color='black',  # Outline color
            width=1         # Outline width
        )
    ),
    # Add hover text to display all information
    hovertemplate=(
        "<b>Spending Level:</b> %{text}<br>"
        "<b>Group Size:</b> %{customdata[0]}<br>"  # Access customdata
        "<b>Discount Usage:</b> %{x}<br>"
        "<b>Total Orders:</b> %{y}<extra></extra>"  # <extra></extra> removes trace name
    ),
    text=spending_levels,  # Spending level for hover text
    customdata=list(zip(group_sizes)),  # Group sizes for hover text
)])


# Customize layout with legend
fig.update_layout(
    title="Relationship between Total Orders, Discount Usage, Spending Level, and Group Size",
    xaxis_title="Discount Usage Proportion",
    yaxis_title="Total Orders",
    xaxis=dict(
        tickfont=dict(size=12),
        titlefont=dict(size=14)
    ),
    yaxis=dict(
        tickfont=dict(size=12),
        titlefont=dict(size=14)
    ),
    legend_title="Spending Level",  # Add a legend title
    legend=dict(
        x=1,  # Position legend outside plot
        y=1,
        traceorder="normal",
        font=dict(
            family="sans-serif",
            size=12,
            color="black"
        ),
        bgcolor="lightgray",
        bordercolor="black",
        borderwidth=1
    )
)

# Add a trace for the legend (one trace per spending level)
for level, color in color_map.items():
    fig.add_trace(go.Scatter(
        x=[None],  # Empty trace for legend
        y=[None],
        mode="markers",
        marker=dict(color=color, size=20
                    ),  # Adjust marker size as needed
        name=level,  # Legend label
        showlegend=True  # Show in legend
    ))

fig.update_layout(
    # ... other layout settings
    width=1200,  # Width in pixels
    height=700,  # Height in pixels
)
# Show plot
fig.show()


In [53]:
import plotly.graph_objects as go

# Data
total_orders = [3.347, 17.147]
discount_usage = [0.062, 0.102]
group_sizes = [1747, 753]  # Group sizes for the clusters
cluster_labels = ["Low Spending", "High Spending"]  # Labels for the clusters

# Create scatter plot with size representing group sizes
fig = go.Figure(data=[go.Scatter(
    x=discount_usage,
    y=total_orders,
    mode='markers',
    marker=dict(
        size=[s / 20 for s in group_sizes],  # Scaled down for visibility
        color=discount_usage,  # Color based on discount usage
        colorscale='Greens',  # Gray to green color scale
        colorbar=dict(title='Discount Usage Proportion'),
        line=dict(
            color='black',
            width=1
        )
    ),
    text=cluster_labels,  # Text for hover information
    hovertemplate=(
        "<b>Cluster:</b> %{text}<br>"
        "<b>Group Size:</b> %{customdata[0]}<br>"
        "<b>Discount Usage:</b> %{x}<br>"
        "<b>Total Orders:</b> %{y}<extra></extra>"
    ),
    customdata=list(zip(group_sizes))
)])

# Customize layout
fig.update_layout(
    title="Relationship between Total Orders, Discount Usage, and Clusters",
    xaxis_title="Discount Usage Proportion",
    yaxis_title="Average Total Orders",
    xaxis=dict(
        tickfont=dict(size=12),
        titlefont=dict(size=14)
    ),
    yaxis=dict(
        tickfont=dict(size=12),
        titlefont=dict(size=14)
    )
)
fig.update_layout(
    # ... other layout settings
    width=600,  # Width in pixels
    height=600,  # Height in pixels
)
# Show plot
fig.show()