In [2]:
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots

In [3]:
df_org_data = pd.read_parquet("escooter_copy.parquet")

In [4]:
df_org_data.holiday = df_org_data.holiday.astype(bool)
df_org_data.workingday = df_org_data.workingday.astype(bool)
df_org_data.loc[df_org_data.temp >45, "temp"] = df_org_data.temp.div(100)

In [5]:
df = df_org_data.copy()

In [6]:
df = pd.get_dummies(df, prefix=["w"])

In [59]:
df_w = df.groupby(["registered_customer", pd.Grouper(key="datetime", freq= "W")]).agg(
               humidity= ("humidity", "mean"), 
               windspeed= ("windspeed", "mean"), 
               temp= ("temp", "mean"), 
               atemp= ("atemp", "mean"), 
               reg_customer= ("registered_customer", "count"), 
               holiday= ("holiday", "sum"), 
               workingday= ("workingday", "sum"),  
               clear_few_clouds = ("w_clear, few clouds", "sum"),
               w_cloudy_mist = ("w_cloudy, mist", "sum"),
               w_heacy_rain_thund_snow_icepal= ("w_heacy rain or thunderstorm or snow or ice pallets", "sum"), 
               w_light_snow_rain_thund= ("w_light snow or rain or thunderstorm", "sum")).reset_index()
df_w.head()

Unnamed: 0,registered_customer,datetime,humidity,windspeed,temp,atemp,reg_customer,holiday,workingday,clear_few_clouds,w_cloudy_mist,w_heacy_rain_thund_snow_icepal,w_light_snow_rain_thund
0,False,2020-01-05,75.681818,16.941545,16.165714,19.56618,462,0,0,122.0,293.0,0,47.0
1,False,2020-01-12,42.836826,15.772793,9.044551,10.791407,668,0,546,547.0,121.0,0,0.0
2,False,2020-01-19,44.462908,15.305623,9.813234,11.712122,674,0,201,475.0,196.0,0,3.0
3,False,2020-01-26,46.770459,14.862526,8.011776,9.945649,501,117,237,259.0,229.0,0,13.0
4,False,2020-02-02,67.263435,7.021275,8.674373,11.81215,949,0,342,591.0,319.0,0,39.0


In [38]:
df_wth = df.groupby(pd.Grouper(key="datetime", freq= "W")).agg(
               humidity= ("humidity", "mean"), 
               windspeed= ("windspeed", "mean"), 
               temp= ("temp", "mean"), 
               atemp= ("atemp", "mean"), 
               reg_customer= ("registered_customer", "count"), 
               holiday= ("holiday", "sum"), 
               workingday= ("workingday", "sum"),  
               clear_few_clouds = ("w_clear, few clouds", "sum"),
               w_cloudy_mist = ("w_cloudy, mist", "sum"),
               w_heacy_rain_thund_snow_icepal= ("w_heacy rain or thunderstorm or snow or ice pallets", "sum"), 
               w_light_snow_rain_thund= ("w_light snow or rain or thunderstorm", "sum")).reset_index()
df_wth.head()

Unnamed: 0,datetime,humidity,windspeed,temp,atemp,reg_customer,holiday,workingday,clear_few_clouds,w_cloudy_mist,w_heacy_rain_thund_snow_icepal,w_light_snow_rain_thund
0,2020-01-05,74.055431,16.068791,15.559339,18.805605,1786,0,0,470.0,1025.0,0,291.0
1,2020-01-12,45.588223,14.179146,8.481788,10.42979,9408,0,7627,7937.0,1423.0,0,48.0
2,2020-01-19,50.054626,14.580899,8.061622,9.903504,9025,0,6573,6373.0,2406.0,0,246.0
3,2020-01-26,52.770041,15.695682,8.580082,10.427223,7784,1000,5103,4302.0,3325.0,0,157.0
4,2020-02-02,67.422727,8.282825,8.351629,11.194581,10340,0,6319,5781.0,3602.0,25,932.0


In [86]:
fig2 = make_subplots( specs= [[{"secondary_y": True}]])

# Add Customer Total
fig2.add_trace(
 go.Scatter(
  x= df_wth.datetime,
  y= df_wth.reg_customer,
  name= "Total Customers",
  legendgroup= "group1",
  legendgrouptitle_text= "Total Customers",
  mode= "lines"
  )
)

# Add Not Reg Customers
fig2.add_trace(
 go.Scatter(
  x= df_w.datetime,
  y= df_w.loc[df_w.registered_customer == False]["reg_customer"],
  name= "Unregistered Customers",
  legendgroup= "group1",
  mode= "markers"
  )
)

# Add Reg Customers
fig2.add_trace(
 go.Scatter(
  x= df_w.datetime,
  y= df_w.loc[df_w.registered_customer == True]["reg_customer"],
  name= "Registered Customers",
  legendgroup= "group1",
  mode= "markers"
 )
)

# Temperature-Trace
fig2.add_trace(
 go.Scatter(
  x= df_wth.datetime,
  y= df_wth.temp,
  name= "Temperature",
  legendgroup= "group2",
  legendgrouptitle_text= "Temperature & Humididty"
 ),
 secondary_y= True
)

# Humidity-Trace
fig2.add_trace(
 go.Scatter(
  x= df_wth.datetime,
  y= df_wth.humidity,
  name= "Humidity",
  legendgroup= "group2",
 ),
 secondary_y= True
)

fig2.update_layout(
 title= dict(text= "<b>Weekly Total Customer in relation to Temperature and Humidity</b>", xanchor= "center", yanchor= "top", y=0.9, x=0.5)
)

fig2.update_layout(legend=dict(groupclick="toggleitem"))
fig2.update_yaxes(title_text="Total Customers", secondary_y=False)
fig2.update_yaxes(title_text="Temperature °C / Humidity %", secondary_y=True)
fig2.update_xaxes(title_text="Datetime")
fig2.show()


In [117]:
fig3 = make_subplots(cols= 2, rows= 1)

# Add Not Reg Customers
fig3.add_trace(
 go.Scatter(
  x= df_w.datetime,
  y= df_w.loc[df_w.registered_customer == False]["reg_customer"],
  name= "Unregistered Customers",
  legendgroup= "group1",
  mode= "markers"
  ),
  row= 1,
  col= 1
)

# Add Reg Customers
fig3.add_trace(
 go.Scatter(
  x= df_w.datetime,
  y= df_w.loc[df_w.registered_customer == True]["reg_customer"],
  name= "Registered Customers",
  legendgroup= "group1",
  mode= "markers"
 ),
 row= 1,
 col= 2
)

In [103]:
px.histogram(df_w, x= "datetime", y= "reg_customer", color= "registered_customer", marginal= "box", facet_col= "registered_customer")