# Importing Libraries

In [1]:
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go

import seaborn as sns
from plotly.offline import iplot

pd.set_option("display.max_columns", None)
pd.options.display.float_format = "{:,.1f}".format

ModuleNotFoundError: No module named 'seaborn'

# Create The Vizualizations Functions

In [2]:
# This Function Will Be Used Later To Create Bar Charts..
# Actually!! it Comes At The End To Reduce The Redundant of The Code
def create_bar_chart(the_data, x_label, y_label, the_title):
    fig = px.bar(the_data,
             x = the_data.index,
             y = (the_data / sum(gender)) * 100,
             labels = {"index" : x_label,  "y" : y_label},
             text = the_data.apply(lambda x: f"{(x / sum(the_data)) * 100 : 0.1f}%"),  
             title = the_title,
             color = the_data.index,
             color_discrete_sequence=["#ADA2FF", "#C0DEFF", "#FCDDB0", "#FF9F9F", "#EDD2F3"],
            )

    fig.update_layout(
        showlegend = False,
         title = {
            "font": {
                "size": 26,
                "family": "tahoma",
            }
        }
    )

    return fig

In [3]:
# This Function Will Be Used Later To Create Pie Charts..
# Actually!! it Comes At The End To Reduce The Redundant of The Code
def create_pie_chart(the_data, the_title, the_colors  = ["#ADA2FF", "#C0DEFF", "#FCDDB0", "#FF9F9F"]):
    fig = px.pie(names = the_data.index, 
             values = the_data,
             title = the_title,
             color_discrete_sequence = the_colors,
            )



    fig.update_layout(
        showlegend = False,
         title = {
            "font": {
                "size": 28,
                "family": "tahoma",
            }
        }
    )
    
    return fig

# Loading The Data 🛢️

In [4]:
df = pd.read_csv("data/shopping_trends_updated.csv")

# Let's Get a Quick Overview!! 🧐

In [None]:
print(f"Number of Observations: {df.shape[0]}")
print(f"Number of Columns(Features): {df.shape[1]}")

In [None]:
df.info()

In [None]:
df.head(10)

In [None]:
df[["Age", "Purchase Amount (USD)", "Review Rating", "Previous Purchases"]].describe()

## Cleaning The Columns' Names From any Spaces!!🤗

In [9]:
df.columns = df.columns.str.replace(" ", "_")

In [None]:
df.columns

In [11]:
df.rename(columns={"Purchase_Amount_(USD)": "Price_in_USD"}, inplace=True)

# Now, It's Time To Dive Deeper Into *Important Columns*!!🤿

<h3 style = "padding: 12px;
             font: bold 20px tahoma;
             background-color: #000;
             color: #FFBB5C;
             border: 2px solid #AE445A;
             border-radius: 5px">
    ♠ Customer ID Column
</h3>

In [None]:
df["Customer_ID"].duplicated().sum()

In [None]:
df["Customer_ID"].unique()

<h3 style = "padding: 12px;
             font: bold 16px tahoma;
             background-color: #000;
             color: lightgreen;
             border: 2px solid #FF9B9B;
             border-radius: 5px;">
    Expected!!😁 It's a Column of Unique Values For Each Customer Record.
    <BR/>
    <BR/>
    So, I Think We Will Not Nedd This Column in Our Analysis.🤔
</h3>

In [None]:
df.columns

<h3 style = "padding: 12px;
             font: bold 20px tahoma;
             background-color: #000;
             color: #FFBB5C;
             border: 2px solid #AE445A;
             border-radius: 5px">
    ♠ Age Column
</h3>

In [None]:
df["Age"].describe()

In [None]:
fig = px.histogram(df["Age"], 
                   nbins=25, 
                   color_discrete_sequence=["#7B66FF"],
                   labels = {"value" :"Ages", "count" : "Frequency"},
                   title="The Distribution of Ages",text_auto=True)

fig.update_layout(
    showlegend = False,
     title = {
        "font": {
            "size": 26,
            "family": "tahoma",
        }
    }
)
fig.update_traces(
    textfont= {
        "family": "consolas",
        "size": 16,  
    },
    hovertemplate = "Age Range: %{x}<br>Frequency: %{y}",
    marker = {"line": {"color": "#333", "width":1}} 
)
    
iplot(fig)

<h3 style = "padding: 12px;
             font: bold 20px tahoma;
             background-color: #000;
             color: #FFBB5C;
             border: 2px solid #AE445A;
             border-radius: 5px">
    ♠ Gender Column👦👧
</h3>

In [None]:
gender = df["Gender"].value_counts()
(gender / sum(gender)) * 100

In [None]:
fig = create_bar_chart(the_data=gender, 
                       x_label="Gender", 
                       y_label="Frequency (%)", 
                       the_title="Percentage of Gender")
fig.update_traces(
    textfont= {
        "family": "consolas",
        "size": 16,  
    },
    hovertemplate = "Gender: %{x}<br>Frequency (%): %{y}%",
    marker = {"line": {"color": "#333", "width":2}} 
)
iplot(fig)

<h3 style = "padding: 12px;
             font: bold 16px tahoma;
             background-color: #000;
             color: lightgreen;
             border: 2px solid #FF9B9B;
             border-radius: 5px;">
    I Really Did Not Expect That!!😮
    <BR/>
    <BR/>
    I Assumed That, of Course, Females Would Have The Highest Percentage.😁😁
</h3>

<h3 style = "padding: 12px;
             font: bold 20px tahoma;
             background-color: #000;
             color: #FFBB5C;
             border: 2px solid #AE445A;
             border-radius: 5px">
    ♠ Item Purchased Column 🛒
</h3>

In [None]:
items = df["Item_Purchased"].value_counts()
(items / sum(items)) * 100

In [20]:
top_5_items = items.nlargest(5)

In [None]:
fig = px.bar(top_5_items, 
             x = top_5_items,
             y = top_5_items.index,
             orientation="h",
            color=top_5_items.index,
            labels={"x" : "Frequency of Sold Items", "y" : "Item"},
            text_auto=True,
            title="Top 5 Purchased Items")

fig.update_layout(
    showlegend = False,
     title = {
        "font": {
            "size": 28,
            "family": "tahoma",
        }
    }
)
fig.update_traces(
    textfont= {
        "family": "consolas",
        "size": 16,  
    },
    hovertemplate = "Item: %{y}<br>Frequency (%): %{x}",
    marker = {"line": {"color": "#444", "width":1}} 
)
iplot(fig)

<h3 style = "padding: 12px;
             font: bold 16px tahoma;
             background-color: #000;
             color: lightgreen;
             border: 2px solid #FF9B9B;
             border-radius: 5px;">
    I am really asking how that is possible!!😮🤯
    <BR/>
    The highest percentage of gender is male, and the top-purchased item is a blouse.
    <BR/>
So from my first sight, it makes no sense, of course we will analyze it later.😁😁
</h3>

<h3 style = "padding: 12px;
             font: bold 20px tahoma;
             background-color: #000;
             color: #FFBB5C;
             border: 2px solid #AE445A;
             border-radius: 5px">
    ♠ Category Column 👔👜
</h3>

In [None]:
category = df["Category"].value_counts()
(category / sum(category)) * 100

In [None]:
# The Main Function For This Pie Chart Created in The Top 
fig  = create_pie_chart(the_data=category, 
                        the_title="The Popularity of Each Category👔")

fig.update_traces(
    textfont= {
        "family": "tahoma",
        "size": 16,  
    },
    textinfo = "label+percent",
    hovertemplate = "Category: %{label}<br>Popularity (%): %{percent}",
    marker=dict(line=dict(color='#111', width=2))
)

iplot(fig)

<h3 style = "padding: 12px;
             font: bold 20px tahoma;
             background-color: #000;
             color: #FFBB5C;
             border: 2px solid #AE445A;
             border-radius: 5px">
    ♠ Price Column 💰
</h3>

In [None]:
df["Price_in_USD"].describe()

In [None]:
fig = px.box(y = df["Price_in_USD"],
            title="The Price Box Blot",
            height= 680, width=700, labels={"y" :"Price (USD)"})

iplot(fig)

<h3 style = "padding: 12px;
             font: bold 20px tahoma;
             background-color: #000;
             color: #FFBB5C;
             border: 2px solid #AE445A;
             border-radius: 5px">
    ♠ Location Column 🌏
</h3>

In [None]:
df["Location"].value_counts().nlargest(10)

<h3 style = "padding: 12px;
             font: bold 20px tahoma;
             background-color: #000;
             color: #FFBB5C;
             border: 2px solid #AE445A;
             border-radius: 5px">
    ♠ Size Column
</h3>

In [None]:
size = df["Size"].value_counts()
size

In [None]:
fig = create_bar_chart(the_data=size, 
                       x_label="Size", 
                       y_label="Frequency (%)", 
                       the_title="The Popularity of Each Size")

fig.update_traces(
    textfont= {
        "family": "consolas",
        "size": 16,  
    },
    hovertemplate = "Size: %{x}<br>Popularity (%): %{y}%",
    marker = {"line": {"color": "#333", "width":2}} 
)
iplot(fig)

<h3 style = "padding: 12px;
             font: bold 20px tahoma;
             background-color: #000;
             color: #FFBB5C;
             border: 2px solid #AE445A;
             border-radius: 5px">
    ♠ Review Rating Column
</h3>

In [None]:
df["Review_Rating"].describe()

In [None]:
fig = px.histogram(df["Review_Rating"], 
                   nbins=10,
                  title= "The Distribution Of Rating",
                  color_discrete_sequence= [ "#7B66FF"], labels= { "value" : "Rating"}, text_auto=True)

fig.update_layout(
    showlegend = False,
     title = {
        "font": {
            "size": 26,
            "family": "tahoma",
        }
    }
)
fig.update_traces(
    textfont= {
        "family": "consolas",
        "size": 16,  
    },
    hovertemplate = "Rating Range: %{x}<br>Frequency: %{y}",
    marker = {"line": {"color": "#333", "width":1}} 
)
    
iplot(fig)

<h3 style = "padding: 12px;
             font: bold 20px tahoma;
             background-color: #000;
             color: #FFBB5C;
             border: 2px solid #AE445A;
             border-radius: 5px">
    ♠ Shipping Type Column
</h3>

In [31]:
dfshipping_type = df["Shipping_Type"].value_counts()

In [None]:
fig = px.scatter(dfshipping_type, 
                size = dfshipping_type,
                color=dfshipping_type.index,
                template="plotly_dark",
                labels={"value" : "Frequency", "index" : "Shipping Type"},
                color_discrete_sequence=["#ADA2FF", "#C0DEFF", "#FCDDB0", "#FF9F9F", "#EDD2F3"],
                title="The Frequency of Each Shipping Type🛒",
                opacity=0.85)


fig.update_layout(showlegend=  False,
                 title = {
                     "font" : {
                         "size"  :25,
                         "family" : "tahoma"
                     }
                 }
)
fig.update_traces(
                 hovertemplate = "Shipping Type: %{x}<br>Popularity: %{y}"
                 )
iplot(fig)

<h3 style = "padding: 12px;
             font: bold 20px tahoma;
             background-color: #000;
             color: #FFBB5C;
             border: 2px solid #AE445A;
             border-radius: 5px">
    ♠ Discount Column
</h3>

In [None]:
discount = df["Discount_Applied"].value_counts()
discount / sum(discount) * 100

In [None]:
fig  = create_pie_chart(the_data=discount, 
                        the_title="The Frequency of Applied Discount!",
                        the_colors=["#C0DEFF", "#FF9F9F"]
                       )

fig.update_traces(
    textfont= {
        "family": "tahoma",
        "size": 16,  
    },
    textinfo = "label+percent",
    hovertemplate = "Have Discount: %{label}<br>Frequency (%): %{percent}",
    marker=dict(line=dict(color='#111', width=2))
)

iplot(fig)

<h3 style = "padding: 12px;
             font: bold 20px tahoma;
             background-color: #000;
             color: #FFBB5C;
             border: 2px solid #AE445A;
             border-radius: 5px">
    ♠ Payment Method Column
</h3>

In [None]:
payment = df["Payment_Method"].value_counts()
payment / sum(payment) * 100

In [None]:
fig = px.scatter(payment, 
                size = payment,
                color=payment.index,
                template="plotly_dark",
                labels={"value" : "Popularity", "index" : "Payment Method"},
                color_discrete_sequence=["#ADA2FF", "#C0DEFF", "#FCDDB0", "#FF9F9F", "#EDD2F3"],
                title="The Popularity of Each Payment Method",
                opacity=0.85)


fig.update_layout(showlegend=  False,
                 title = {
                     "font" : {
                         "size"  :25,
                         "family" : "tahoma"
                     }
                 }
)
fig.update_traces(hovertemplate = "Payment Method: %{x}<br>Popularity: %{y}")
iplot(fig)

# Now, It's Time To Our Lovely Part!!. Asking Questions and Get Our Insights🥰🤩


<h3 style = "padding: 15px;
             font: bold 20px tahoma;
             background-color: #000;
             color: gold;
             border: 3px solid #AE445A;
             border-radius: 7px">
    ♦ what is The Popularity of Each Category per Seasons?!⛅👔
</h3>

In [None]:
category_per_season = df.pivot_table(index = "Season", columns = df["Category"], values ="Category", aggfunc="count")
category_per_season

In [38]:
total = category_per_season[category_per_season.columns].sum(1)

category_per_season.loc["Fall"]  = round(category_per_season.loc["Fall"] / total.values[0] * 100)
category_per_season.loc["Spring"] = round(category_per_season.loc["Spring"] / total.values[1] * 100)
category_per_season.loc["Summer"] = round(category_per_season.loc["Summer"] / total.values[2] * 100)
category_per_season.loc["Winter"] = round(category_per_season.loc["Winter"] / total.values[3] * 100)

In [None]:
fig = px.bar(category_per_season, 
            barmode="group",
            x = category_per_season.index,
            y = category_per_season.columns,
            color_discrete_sequence=["#ADA2FF", "#C0DEFF", "#FCDDB0", "#FF9F9F", "#EDD2F3"],
            template="plotly_dark",
            text_auto="%0.0f",
             title="The Popularity in PCT(%) of Category per Season",
             labels={"value" : "Popularity (%)"}
            )


fig.update_layout(
    title = {
        "font" : {
            "size" : 24,
            "family" : "tahoma"
        }
    }
)
fig.update_traces(
    textfont= {
        "family": "tahoma",
        "size": 16,  
        "color" : "#444"
    },
    hovertemplate = "%{label}<br>Popularity (%): %{y}%",
    marker = {"line": {"color": "#333", "width":2}} 
)

iplot(fig)

<h3 style = "padding: 15px;
             font: bold 20px tahoma;
             background-color: #000;
             color: gold;
             border: 3px solid #AE445A;
             border-radius: 7px">
    ♦ What is Sales For Each Category?!💎💰👕
</h3>

In [None]:
sales_per_category = df.groupby("Category")["Price_in_USD"].sum().sort_values()[::-1]
pd.DataFrame(sales_per_category)

In [None]:
fig = px.bar(sales_per_category,
         x = sales_per_category.index,
         y = sales_per_category,
         labels = {"y" : "Sales USD"},
         text_auto="0.4s",  
         title = "The Sales USD per Category",
         color = sales_per_category.index,
         color_discrete_sequence=["#ADA2FF", "#C0DEFF", "#FCDDB0", "#FF9F9F", "#EDD2F3"],
             template="plotly_dark"
        )

fig.update_layout(
    showlegend = False,
     title = {
        "font": {
            "size": 26,
            "family": "tahoma",
        }
    }
)
fig.update_traces(
 textfont= {
        "family": "tahoma",
        "size": 16,  
        "color" : "#444"
    },
    hovertemplate = "Category:%{x}<br>Sales USD: %{y}",
)
iplot(fig)

<h3 style = "padding: 15px;
             font: bold 19px tahoma;
             background-color: #000;
             color: gold;
             border: 3px solid #AE445A;
             border-radius: 7px">
    ♦ Now, I Want To Get The Frequenies of Each Size by for Each Gender ?!?👦👕
</h3>

In [None]:
size_by_gender = df.pivot_table(index = "Gender", columns = df["Size"], values = "Size", aggfunc="count")
size_by_gender = size_by_gender.stack().reset_index()
size_by_gender.rename(columns={0: "Count"}, inplace=True)
size_by_gender

In [None]:
fig = px.sunburst(size_by_gender, path=['Gender', 'Size'],
                  values='Count',
                  color_discrete_sequence=[
                      "#FF0060", "#00DFA2", "#0079FF", "#F6FA70", "#EDD2F3"],
                  title="The Frequency of Size By Gender",
                  template="plotly_dark",
                  )

fig.update_layout(margin=dict(t=100, l=0, r=0, b=50))
fig.update_layout(
    title={
        "font": {
            "size": 26,
            "family": "tahoma"
        }
    },
    hoverlabel={
        "bgcolor": "#222",
        "font_size": 15,
        "font_family": "tahoma"
    }
)

fig.update_traces(
    textinfo='label+percent entry',
    textfont={
        "family": "tahoma",
        "size": 15,
    },
    hovertemplate="State: %{label}<br>Frequency: %{value:.0f}",
)

iplot(fig)

<h3 style = "padding: 15px;
             font: bold 19px tahoma;
             background-color: #000;
             color: gold;
             border: 3px solid #AE445A;
             border-radius: 7px">
    ♦ Let's Find Out The Total Purchase Transactions Were Done in Each Location!?🛒🌏
</h3>

In [None]:
purchases_by_loc = df.groupby("Location")["Previous_Purchases"].sum().nlargest(10)
purchases_by_loc

In [None]:
fig = px.bar(purchases_by_loc,
             orientation="h",
         x = purchases_by_loc,
         y = purchases_by_loc.index,
         labels = {"x" : "Total Purchases"},
         text_auto="0.4s",  
         title = "Number of Purchases Per Top 10 State",
         color = purchases_by_loc.index,
         color_discrete_sequence=["#C0DEFF"],
             template="plotly_dark"
        )

fig.update_layout(
    showlegend = False,
     title = {
        "font": {
            "size": 24,
            "family": "tahoma",
        }
    }
)
fig.update_traces(
 textfont= {
        "family": "tahoma",
        "size": 14,  
        "color" : "#444"
    },
    hovertemplate = "State:%{y}<br>Total Purchases: %{x:0.4s}",
)
iplot(fig)

<h3 style = "padding: 15px;
             font: bold 19px tahoma;
             background-color: #000;
             color: gold;
             border: 3px solid #AE445A;
             border-radius: 7px">
    ♦ Regarding the two customer subscription status types, how many transactions have been completed using the applicable discount?!?💯🤗
</h3>

In [None]:
discount_by_status = pd.crosstab(index = df["Subscription_Status"], 
                                columns=df["Discount_Applied"], 
                                values=df["Discount_Applied"], 
                                 aggfunc="count", normalize=0) * 100

discount_by_status

In [None]:
fig = px.bar(discount_by_status,
             text_auto="0.1f",  
             title = "Percentage(%) of Applied Discount for Each Subscription!!",
             color_discrete_sequence=[ "#FCDDB0", "#FF9F9F"],
             template="plotly_dark",
             labels = {"Subscription_Status" :"Subscription Status", "value": "Percentage(%)"},
        )

fig.update_layout(
     title = {
        "font": {
            "size": 22,
            "family": "tahoma",
        }
    }
)
fig.update_traces(
 textfont= {
        "family": "tahoma",
        "size": 16,  
        "color" : "#444"
    },
    hovertemplate = "Subscription Status:%{x}<br>Total Purchases: %{value:0.4s}",
)
iplot(fig)

<h3 style = "padding: 12px;
             font: bold 18px tahoma;
             background-color: #000;
             color: lightgreen;
             border: 2px solid #FF9B9B;
             border-radius: 5px;">
   Important Insight!!😮🧐
    <BR/>
    <BR/>
    ♠ From This Analysis, We Can Say That All Customers With a Subscription Purchased The Products With a Discount!😁🤩
    <BR/>
    <BR/>
    <span style = "color: gold" >
        ► Recommendation: We can send emails to unsubscribed customers in order to inspire them to get a subscription and get a discount!!🤩🥰
    </span>
</h3>

<h3 style = "padding: 15px;
             font: bold 19px tahoma;
             background-color: #000;
             color: gold;
             border: 3px solid #AE445A;
             border-radius: 7px">
    ♦ What is The Frequency of Purchases of Our Lovely Customers?!?🤩🥰🤗
</h3>

In [48]:
freq_purchases = df["Frequency_of_Purchases"].value_counts()

In [None]:
fig = px.bar(freq_purchases,
         x = freq_purchases.index,
         y = (freq_purchases / sum(freq_purchases)) * 100,
         labels = {"y" : "Frequency PCT(%)", "index": "Frequency of Purchases"},
         title = "Frequency of Purchases of Our Lovely Customers",
         color = freq_purchases.index,
         color_discrete_sequence=["#ADA2FF", "#C0DEFF", "#FCDDB0", "#FF9F9F", "#EDD2F3", "#98EECC", "#FFA1CF"],
         template="plotly_dark",
         text = freq_purchases.apply(lambda x : f"{(x / sum(freq_purchases)) * 100:.1f}%")
        )

fig.update_layout(
    showlegend = False,
     title = {
        "font": {
            "size": 26,
            "family": "tahoma",
        }
    }
)
fig.update_traces(
 textfont= {
        "family": "tahoma",
        "size": 14,  
        "color" : "#444"
    },
    hovertemplate = "Category:%{x}<br>Sales USD: %{y}",
)
iplot(fig)

<h3 style = "padding: 12px;
             font: bold 18px tahoma;
             background-color: #000;
             color: lightgreen;
             border: 2px solid #FF9B9B;
             border-radius: 5px;">
   Important Insight !!😮🤗🧐
    <BR/>
    <BR/>
    ►► From this analysis, if we know the customers for each "frequency of purchase type", we can send good offers to these customers to encourage them to come again in their expected time! 😁🤩💰

</h3>