In [13]:
import pandas as pd
import plotly.graph_objects as go
import plotly.express as px
from plotly.subplots import make_subplots

In [2]:
df = pd.read_parquet("DSCB310 - UE2 - Shopping Carts.parquet")

In [3]:
df = df.reset_index(drop=True)

In [8]:
df_products = df.groupby("product_name").agg(coun = ("product_name","count")).reset_index().sort_values("coun",ascending=False)
df_products.head()

Unnamed: 0,product_name,coun
3369,Banana,89127
3177,Bag of Organic Bananas,70283
29385,Organic Strawberries,49961
26443,Organic Baby Spinach,45462
27836,Organic Hass Avocado,39290


In [47]:
fig = go.Figure()
for i in df_products.head(10).coun:
    df_an = df_products.head(10)[df_products.head(10).coun == i]

    if df_an.head().coun.values[0] > df_products.head(10).coun.median():
        color = px.colors.sequential.Purpor[5]

        fig.add_trace (go.Bar(
            x= df_an.product_name,
            y= df_an.coun,
            showlegend= True,
            marker= dict(color= color),
            width= 0.8,
            base= 0,
            legendgroup= f"{df_an.product_name.values}",
            name= f"{df_an.product_name.values[0]}",
            hovertemplate = f"<b>{df_an.product_name.values[0]}</b><br><br>" +
                "Bestellungen: %{y}<br>" +
                "<extra></extra>"
        ))
        
    else:
        color= px.colors.sequential.Greys[3]

        fig.add_trace (go.Bar(
            x= df_an.product_name,
            y= df_an.coun,
            showlegend= True,
            marker= dict(color= color),
            width= 0.8,
            base= 0,
            legendgroup= f"{df_an.product_name.values}",
            name= f"{df_an.product_name.values[0]}",
            hovertemplate = f"<b>{df_an.product_name.values[0]}</b><br><br>" +
                "Bestellungen: %{y}<br>" +
                "<extra></extra>"
        ))

fig.add_hline(
 y= df_products.head(10).coun.median(),
 opacity= 1,
 line_dash= "dot",
 line_color= px.colors.sequential.Greys[5],
 
)
fig.add_annotation(
    x=9,
    y=df_products.head(10).coun.median(),
    text=f"Median: {df_products.head(10).coun.median().round(2)}",
    font_color= px.colors.sequential.Greys[5],
    arrowcolor= px.colors.sequential.Greys[5],

)
fig.update_layout(title= dict(text= "<b>Top 10 Produkte nach Verkaufszahlen</b>", font_size= 20, xanchor= "center", yanchor= "top", y=0.935, x=0.475), legend_title_text = "<b>Produkte</b>", height= 500)
fig.update_xaxes(title_text="<b>Produtke</b>", ticks= "outside")
fig.update_yaxes(title_text="<b>Anzahl an Bestellungen</b>", ticks= "outside", showgrid= True, gridcolor= "grey")
fig.update_layout(plot_bgcolor= "white", xaxis= dict(linecolor= "black"), yaxis= dict(linecolor= "black"))
fig.show()

In [64]:
df_regional = df.groupby("county").agg(orders = ("order_id","nunique")).reset_index()
df_regional = df_regional.sort_values("orders",ascending=False)

In [65]:
df_regional.head()

Unnamed: 0,county,orders
3,Calaveras,43559
9,Glenn,35541
28,Orange,34496
8,Fresno,32685
13,Kern,31601


In [80]:
fig = go.Figure()

for county in df_regional.county.unique():

    if df_regional.loc[df_regional.county == county].orders.values >= df_regional.orders.mean():
        color= px.colors.sequential.Purpor[5]
    
        fig.add_trace (go.Bar(
            x= df_regional.loc[df_regional.county == county].county,
            y= df_regional.loc[df_regional.county == county].orders,
            showlegend= False,
            marker= dict(color= color),
            width= 0.8,
            name= f"{df_regional.loc[df_regional.county == county].county}",
            hovertemplate = f"<b>{df_regional.loc[df_regional.county == county].county.values[0]}</b><br><br>" +
                "Bestellungen: %{y}<br>" +
                "<extra></extra>"
        ))

    else:
        color= px.colors.sequential.Greys[3]

        fig.add_trace (go.Bar(
            x= df_regional.loc[df_regional.county == county].county,
            y= df_regional.loc[df_regional.county == county].orders,
            showlegend= False,
            marker= dict(color= color),
            width= 0.8,
            name= f"{df_regional.loc[df_regional.county == county].county}",
            hovertemplate = f"<b>{df_regional.loc[df_regional.county == county].county.values[0]}</b><br><br>" +
                "Bestellungen: %{y}<br>" +
                "<extra></extra>"
        ))


fig.add_annotation(
    x=55,
    y=df_regional[df_regional.county == "Placer"]["orders"].values[0],
    text="Problemfälle",
    font_color= px.colors.sequential.Greys[5],
    arrowcolor= px.colors.sequential.Greys[5]
)

fig.add_hline(
    y= df_regional.orders.mean(),
    line_dash= "dot",
    line_color= px.colors.sequential.Greys[5],
    opacity= 1
)

fig.add_annotation(
    x= 50,
    y= df_regional.orders.mean(),
    text=f"Median: {df_products.head(10).coun.median().round(2)}",
    font_color= px.colors.sequential.Greys[5],
    arrowcolor= px.colors.sequential.Greys[5]
)

fig.update_layout(title= dict(text= "<b>Bestellungen pro County</b>", font_size= 20, xanchor= "center", yanchor= "top", y=0.935, x=0.475), legend_title_text = "<b>Counties</b>", height= 500)
fig.update_xaxes(title_text="<b>Counties</b>", ticks= "outside")
fig.update_yaxes(title_text="<b>Anzahl an Bestellungen</b>", ticks= "outside", showgrid= True, gridcolor= "grey")
fig.update_layout(plot_bgcolor= "white", xaxis= dict(linecolor= "black"), yaxis= dict(linecolor= "black"))
fig.show()