In [2]:
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots

In [3]:
df_org_data = pd.read_parquet("escooter_copy.parquet")

In [4]:
df_org_data.holiday = df_org_data.holiday.astype(bool)
df_org_data.workingday = df_org_data.workingday.astype(bool)
df_org_data.loc[df_org_data.temp >45, "temp"] = df_org_data.temp.div(100)

In [5]:
df = df_org_data.copy()

In [6]:
df = pd.get_dummies(df, prefix=["w"])

In [52]:
df_w = df.groupby(["registered_customer", pd.Grouper(key="datetime", freq= "M")]).agg(
               humidity= ("humidity", "mean"), 
               windspeed= ("windspeed", "mean"), 
               temp= ("temp", "mean"), 
               atemp= ("atemp", "mean"), 
               reg_customer= ("registered_customer", "count"), 
               holiday= ("holiday", "sum"), 
               workingday= ("workingday", "sum"),  
               clear_few_clouds = ("w_clear, few clouds", "sum"),
               w_cloudy_mist = ("w_cloudy, mist", "sum"),
               w_heacy_rain_thund_snow_icepal= ("w_heacy rain or thunderstorm or snow or ice pallets", "sum"), 
               w_light_snow_rain_thund= ("w_light snow or rain or thunderstorm", "sum")).reset_index()
df_w.head()

Unnamed: 0,registered_customer,datetime,humidity,windspeed,temp,atemp,reg_customer,holiday,workingday,clear_few_clouds,w_cloudy_mist,w_heacy_rain_thund_snow_icepal,w_light_snow_rain_thund
0,False,2020-01-31,52.760861,14.710368,10.176736,12.422854,2647,117,1326,1540.0,1005.0,0,102.0
1,False,2020-02-29,45.911487,17.619117,14.251152,17.257965,6259,185,2761,4512.0,1513.0,0,234.0
2,False,2020-03-31,47.123796,16.799967,16.479096,19.67836,13498,0,7045,9561.0,3721.0,0,216.0
3,False,2020-04-30,58.150989,17.985088,20.92243,24.541269,19922,642,9992,12852.0,5848.0,0,1222.0
4,False,2020-05-31,62.321952,13.895114,24.727379,28.805009,33623,0,15667,23724.0,8035.0,0,1864.0


In [38]:
df_wth = df.groupby(pd.Grouper(key="datetime", freq= "W")).agg(
               humidity= ("humidity", "mean"), 
               windspeed= ("windspeed", "mean"), 
               temp= ("temp", "mean"), 
               atemp= ("atemp", "mean"), 
               reg_customer= ("registered_customer", "count"), 
               holiday= ("holiday", "sum"), 
               workingday= ("workingday", "sum"),  
               clear_few_clouds = ("w_clear, few clouds", "sum"),
               w_cloudy_mist = ("w_cloudy, mist", "sum"),
               w_heacy_rain_thund_snow_icepal= ("w_heacy rain or thunderstorm or snow or ice pallets", "sum"), 
               w_light_snow_rain_thund= ("w_light snow or rain or thunderstorm", "sum")).reset_index()
df_wth.head()

Unnamed: 0,datetime,humidity,windspeed,temp,atemp,reg_customer,holiday,workingday,clear_few_clouds,w_cloudy_mist,w_heacy_rain_thund_snow_icepal,w_light_snow_rain_thund
0,2020-01-05,74.055431,16.068791,15.559339,18.805605,1786,0,0,470.0,1025.0,0,291.0
1,2020-01-12,45.588223,14.179146,8.481788,10.42979,9408,0,7627,7937.0,1423.0,0,48.0
2,2020-01-19,50.054626,14.580899,8.061622,9.903504,9025,0,6573,6373.0,2406.0,0,246.0
3,2020-01-26,52.770041,15.695682,8.580082,10.427223,7784,1000,5103,4302.0,3325.0,0,157.0
4,2020-02-02,67.422727,8.282825,8.351629,11.194581,10340,0,6319,5781.0,3602.0,25,932.0


In [41]:
fig2 = make_subplots( specs= [[{"secondary_y": True}]])


# Add Customer Total
fig2.add_trace(
 go.Scatter(x= df_wth.datetime,
            y= df_wth.reg_customer,
            name= "Total Customers"),
secondary_y= False
)

# Add Not Reg Customers
fig2.add_trace(
 go.Scatter(
  x= df_w.datetime,
  y= df_w.loc[df_w.registered_customer == False]["reg_customer"],
  name= "Unregistered Customers"
 )
)

# Add Reg Customers
fig2.add_trace(
 go.Scatter(
  x= df_w.datetime,
  y= df_w.loc[df_w.registered_customer == True]["reg_customer"],
  name= "Registered Customers"
 )
)

fig2.add_trace(
 go.Box(
  x= df_w.datetime,
  y= df_w.loc[df_w.registered_customer == True]["reg_customer"],
  name= "Registered Customers"
 ),
 secondary_y = True
)






fig2.show()


In [57]:
px.box(df_w, x= "datetime", y= "reg_customer", facet_col= "registered_customer")