In [2]:
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots

In [3]:
df_org_data = pd.read_parquet("escooter_copy.parquet")

In [4]:
df_org_data.holiday = df_org_data.holiday.astype(bool)
df_org_data.workingday = df_org_data.workingday.astype(bool)
df_org_data.loc[df_org_data.temp >45, "temp"] = df_org_data.temp.div(100)

In [5]:
df = df_org_data.copy()

In [6]:
df = pd.get_dummies(df, prefix=["w"])

In [12]:
df_w = df.groupby(["registered_customer", pd.Grouper(key="datetime", freq= "W")]).agg(
               humidity= ("humidity", "mean"), 
               windspeed= ("windspeed", "mean"), 
               temp= ("temp", "mean"), 
               atemp= ("atemp", "mean"), 
               reg_customer= ("registered_customer", "count"), 
               holiday= ("holiday", "sum"), 
               workingday= ("workingday", "sum"),  
               clear_few_clouds = ("w_clear, few clouds", "sum"),
               w_cloudy_mist = ("w_cloudy, mist", "sum"),
               w_heacy_rain_thund_snow_icepal= ("w_heacy rain or thunderstorm or snow or ice pallets", "sum"), 
               w_light_snow_rain_thund= ("w_light snow or rain or thunderstorm", "sum")).reset_index()
df_w.head()

Unnamed: 0,registered_customer,datetime,humidity,windspeed,temp,atemp,reg_customer,holiday,workingday,clear_few_clouds,w_cloudy_mist,w_heacy_rain_thund_snow_icepal,w_light_snow_rain_thund
0,False,2020-01-05,75.681818,16.941545,16.165714,19.56618,462,0,0,122.0,293.0,0,47.0
1,False,2020-01-12,42.836826,15.772793,9.044551,10.791407,668,0,546,547.0,121.0,0,0.0
2,False,2020-01-19,44.462908,15.305623,9.813234,11.712122,674,0,201,475.0,196.0,0,3.0
3,False,2020-01-26,46.770459,14.862526,8.011776,9.945649,501,117,237,259.0,229.0,0,13.0
4,False,2020-02-02,67.263435,7.021275,8.674373,11.81215,949,0,342,591.0,319.0,0,39.0


In [13]:
# Customers-Scatter
fig = px.scatter(df_w, x= "datetime", 
              y= "reg_customer", 
              color= "registered_customer", 
              labels= dict(reg_customer="Total Customers", datetime="Datetime", registered_customer="Registered Customers"), 
              marginal_y = "box", 
              trendline= "rolling", 
              trendline_options= dict(window= 5, win_type="gaussian", function_args=dict(std=2)))
              #trendline= "lowess",
              #trendline_options= dict(frac= 0.2))

# Decline1
fig.add_vrect(x0="2020-7-26", 
              x1="2021-01-26", 
              col=1,
              annotation_text= "decline1 (W 20/21)", 
              annotation_position="top left",
              fillcolor="green", 
              opacity=0.2, 
              line_width=0,
              annotation_font_color = "black",
              annotation_font_size = 13) 

# Decline2 
fig.add_vrect(x0="2021-09-2", 
              x1="2022-01-2", 
              col=1,
              annotation_text= "decline2 (W 21/22)", 
              annotation_position="top left",
              fillcolor="green", 
              opacity=0.2, 
              line_width=0,
              annotation_font_color = "black",
              annotation_font_size = 13)
             
# Reg-Customer-True-Mean 
fig.add_hline(y= df_w.loc[df_w.registered_customer == True].reg_customer.mean(), 
              line_dash= "dot", 
              annotation_text= "Reg_Cust_True_Mean",
              annotation_position="bottom left",
              col= 1,
              opacity = 0.3,
              line_color = "red",
              annotation_font_color = "red")

# Reg-Customer-False-Mean
fig.add_hline(y= df_w.loc[df_w.registered_customer == False].reg_customer.mean(), 
              line_dash= "dot", 
              annotation_text= "Reg_Cust_False_Mean",
              annotation_position="bottom left",
              col= 1,
              opacity = 0.3,
              line_color = "blue",
              annotation_font_color = "blue")

fig.update_annotations(opacity= 0.4)
#fig.update_layout(template= "plotly_white")
#fig.update_layout(template= "simple_white")
#fig.update_layout(template= "plotly_dark")
fig.update_traces(showlegend=True)
fig.update_xaxes(title_text= "Box",row=1, col=2)
fig.update_layout(title= dict(text= "<b>Quantity Comparison of Weekly Registered and Unregistered Customers</b>", xanchor= "center", yanchor= "top", y=0.98, x=0.46),
                  height= 600, width= 1470, legend=dict(groupclick="toggleitem"), font= dict(size= 14))

In [14]:
df_wth = df.groupby(pd.Grouper(key="datetime", freq= "W")).agg(
               humidity= ("humidity", "mean"), 
               windspeed= ("windspeed", "mean"), 
               temp= ("temp", "mean"), 
               atemp= ("atemp", "mean"), 
               reg_customer= ("registered_customer", "count"), 
               holiday= ("holiday", "sum"), 
               workingday= ("workingday", "sum"),  
               clear_few_clouds = ("w_clear, few clouds", "sum"),
               w_cloudy_mist = ("w_cloudy, mist", "sum"),
               w_heacy_rain_thund_snow_icepal= ("w_heacy rain or thunderstorm or snow or ice pallets", "sum"), 
               w_light_snow_rain_thund= ("w_light snow or rain or thunderstorm", "sum")).reset_index()
df_wth.head()

Unnamed: 0,datetime,humidity,windspeed,temp,atemp,reg_customer,holiday,workingday,clear_few_clouds,w_cloudy_mist,w_heacy_rain_thund_snow_icepal,w_light_snow_rain_thund
0,2020-01-05,74.055431,16.068791,15.559339,18.805605,1786,0,0,470.0,1025.0,0,291.0
1,2020-01-12,45.588223,14.179146,8.481788,10.42979,9408,0,7627,7937.0,1423.0,0,48.0
2,2020-01-19,50.054626,14.580899,8.061622,9.903504,9025,0,6573,6373.0,2406.0,0,246.0
3,2020-01-26,52.770041,15.695682,8.580082,10.427223,7784,1000,5103,4302.0,3325.0,0,157.0
4,2020-02-02,67.422727,8.282825,8.351629,11.194581,10340,0,6319,5781.0,3602.0,25,932.0


In [15]:
fig1 = make_subplots( specs= [[{"secondary_y": True}]])

# Customers-Trace
fig1.add_trace(
 go.Scatter(x= df_wth.datetime,
            y= df_wth.reg_customer,
            name= "Customers"),
secondary_y= False
)

# Temperature-Trace
fig1.add_trace(
 go.Scatter(x= df_wth.datetime,
            y= df_wth.temp,
            name= "Temperature"),
 secondary_y= True
)

# Humidity-Trace
fig1.add_trace(
 go.Scatter(x= df_wth.datetime,
            y= df_wth.humidity,
            name= "Humidity"),
 secondary_y= True
)

fig1.add_vrect(x0="2020-02-16", 
              x1="2020-07-26", 
              col=1,
              row=1,
              annotation_text= "increase1 Temp 2020", 
              annotation_position="top left",
              fillcolor="red", 
              opacity=0.1, 
              line_width=0,
              annotation_font_color = "black",
              annotation_font_size = 13)


fig1.add_vrect(x0="2021-02-14", 
              x1="2021-07-11", 
              col=1,
              row=1,
              annotation_text= "increase2 Temp 2021", 
              annotation_position="top left",
              fillcolor="red", 
              opacity=0.1, 
              line_width=0,
              annotation_font_color = "black",
              annotation_font_size = 13)

# Start-Summer-2020
fig1.add_vline(x= "2020-05-21", line_dash= "dot", opacity= 0.3)

# Start-Summer-2021
fig1.add_vline(x= "2021-05-21", line_dash= "dot", opacity= 0.3)

# End-Summer-2020
fig1.add_vline(x= "2020-09-21", line_dash= "dot", opacity= 0.3)

# End-Summer-2021
fig1.add_vline(x= "2021-09-21", line_dash= "dot", opacity= 0.3)

#fig.add_hrect()


fig1.update_layout(
 title= dict(text= "<b>Weekly Total Customer in relation to Temperature and Humidity</b>", xanchor= "center", yanchor= "top", y=0.9, x=0.5),
 height= 600,
 width= 1350
)

fig1.update_annotations(opacity= 0.3)
fig1.update_yaxes(title_text="Total Customers", secondary_y=False)
fig1.update_yaxes(title_text="Temperature °C / Humidity %", secondary_y=True)
fig1.update_xaxes(title_text="Datetime")

#### increase rectangle plotten

#### increase von temp und customer in einem subplot und mit np.polyfit() ausrechnen