In [60]:
import pandas as pd
import plotly.express as px

In [61]:
org_data = pd.read_parquet("DSCB310 - UE2 - Shopping Carts.parquet")

In [62]:
org_data = org_data.reset_index(drop= True)

In [63]:
df = org_data.copy()

#### **order_size**

In [64]:
df_order_size = df.groupby(["order_id"], as_index= False).agg(order_size = ("add_to_cart_order", "max"))

In [65]:
ordersize_bins = [1, 5, 10, 15, 20, 25, 30, 35, 150]
df_order_size["order_size_cut"] = pd.cut(df_order_size.order_size, bins= ordersize_bins, include_lowest= True)
df_order_size["order_size_cut"] = df_order_size.order_size_cut.astype("str")

In [66]:
df = df.merge(df_order_size, how= "left", on= "order_id")

#### **order_per_user**

In [67]:
df_orders_per_user = df.groupby(["user_id"], as_index= False).agg(orders= ("order_id", "nunique"))

In [68]:
orders_bins = [1, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100]
df_orders_per_user["orders_cut"] = pd.cut(df_orders_per_user.orders, bins= orders_bins, include_lowest= True)
df_orders_per_user["orders_cut"] = df_orders_per_user.orders_cut.astype("str")
df_orders_per_user.sort_values(by="orders_cut", ascending= True)

Unnamed: 0,user_id,orders,orders_cut
33280,183787,6,"(0.999, 10.0]"
17567,96958,9,"(0.999, 10.0]"
32454,179384,8,"(0.999, 10.0]"
32455,179390,4,"(0.999, 10.0]"
17563,96942,4,"(0.999, 10.0]"
...,...,...,...
9896,54957,100,"(90.0, 100.0]"
17216,95062,100,"(90.0, 100.0]"
19740,108946,100,"(90.0, 100.0]"
13290,73645,100,"(90.0, 100.0]"


In [69]:
df_orders_per_users = df_orders_per_user.drop(df_orders_per_user.columns[1], inplace= True, axis= 1)

In [70]:
df = df.merge(df_orders_per_user, how= "left", on= "user_id")

#### **days_since_prior_order**

In [71]:
df.groupby(["days_since_prior_order"], as_index= False).agg(orders= ("order_id", "nunique"))

Unnamed: 0,days_since_prior_order,orders
0,0.0,12005
1,1.0,26178
2,2.0,34983
3,3.0,38541
4,4.0,39479
5,5.0,38254
6,6.0,42605
7,7.0,57068
8,8.0,32510
9,9.0,21219


#### df

In [72]:
df

Unnamed: 0,order_id,product_id,add_to_cart_order,reordered,user_id,order_number,order_dow,order_hour_of_day,days_since_prior_order,tip,product_name,aisle_id,department_id,department,aisle,county,order_size,order_size_cut,orders_cut
0,1,13176,6,0,112108,4,4,10,9.0,1,Bag of Organic Bananas,24,4,produce,fresh fruits,Inyo,8,"(5.0, 10.0]","(0.999, 10.0]"
1,1,11109,2,1,112108,4,4,10,9.0,1,Organic 4% Milk Fat Whole Milk Cottage Cheese,108,16,dairy eggs,other creams cheeses,Inyo,8,"(5.0, 10.0]","(0.999, 10.0]"
2,1,10246,3,0,112108,4,4,10,9.0,1,Organic Celery Hearts,83,4,produce,fresh vegetables,Inyo,8,"(5.0, 10.0]","(0.999, 10.0]"
3,1,22035,8,1,112108,4,4,10,9.0,1,Organic Whole String Cheese,21,16,dairy eggs,packaged cheese,Inyo,8,"(5.0, 10.0]","(0.999, 10.0]"
4,1,49683,4,0,112108,4,4,10,9.0,1,Cucumber Kirby,83,4,produce,fresh vegetables,Inyo,8,"(5.0, 10.0]","(0.999, 10.0]"
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6132664,606914,28849,6,1,193225,18,5,13,4.0,1,No Salt Added Black Beans,59,15,canned goods,canned meals beans,Calaveras,8,"(5.0, 10.0]","(40.0, 50.0]"
6132665,606915,27966,1,0,107102,1,3,10,,0,Organic Raspberries,123,4,produce,packaged vegetables fruits,Amador,4,"(0.999, 5.0]","(30.0, 40.0]"
6132666,606915,27845,4,0,107102,1,3,10,,0,Organic Whole Milk,84,16,dairy eggs,milk,Amador,4,"(0.999, 5.0]","(30.0, 40.0]"
6132667,606915,26940,3,0,107102,1,3,10,,0,Organic Large Green Asparagus,83,4,produce,fresh vegetables,Amador,4,"(0.999, 5.0]","(30.0, 40.0]"


In [73]:
df.to_csv("orderdata2.csv")

In [74]:
colorscales_orig = [
                px.colors.sequential.Greys[1],
                px.colors.sequential.Greys[2],
                px.colors.sequential.Greys[3],
                px.colors.sequential.Purp[3],
                px.colors.sequential.Purp[4],
                px.colors.sequential.Purp[5],
                px.colors.sequential.Purpor[4],
                px.colors.sequential.Purpor[5],
                px.colors.sequential.Purpor[6],
                px.colors.sequential.RdPu[8]]

In [75]:
px.colors.sequential.Greys[1]

'rgb(240,240,240)'

In [76]:
px.colors.sequential.Greys[2]

'rgb(217,217,217)'

In [77]:
px.colors.sequential.Greys[3]

'rgb(189,189,189)'

In [79]:
px.colors.sequential.Purp[3]

'rgb(185, 152, 221)'

In [80]:
px.colors.sequential.Purp[4]

'rgb(159, 130, 206)'

In [81]:
px.colors.sequential.Purp[5]

'rgb(130, 109, 186)'

In [82]:
px.colors.sequential.Purpor[4]

'rgb(173, 95, 173)'

In [83]:
px.colors.sequential.Purpor[5]

'rgb(131, 75, 160)'

In [84]:
px.colors.sequential.Purpor[6]

'rgb(87, 59, 136)'

In [85]:
px.colors.sequential.RdPu[8]

'rgb(73,0,106)'