In [1]:
import pandas as pd
import plotly.express as px
import numpy as np
import missingno as msn
import datetime
from pandas_profiling import ProfileReport
import sklearn

In [2]:
df = pd.read_parquet("escooter_history_2022.parquet")


In [3]:
df_2 = df.groupby(pd.Grouper(key="datetime",freq="D")).agg({"registered_customer": "count"}).reset_index()

In [4]:
df_f = df.groupby(df.datetime.dt.date).agg(
 reg_customer = ("registered_customer", "mean"),
 temp = ("temp", "mean"),
 humidity= ("humidity", "mean")
).reset_index()

In [5]:
df_f.to_clipboard()

In [6]:
df_2["Spikes"] = (df_2.datetime.dt.day < 5) | (df_2.datetime.dt.day > 27)

In [7]:
df_2

Unnamed: 0,datetime,registered_customer,Spikes
0,2020-01-04,985,True
1,2020-01-05,801,False
2,2020-01-06,1349,False
3,2020-01-07,1562,False
4,2020-01-08,1600,False
...,...,...,...
726,2021-12-30,4231,True
727,2021-12-31,8144,True
728,2022-01-01,5932,True
729,2022-01-02,5182,True


In [8]:
px.box(df_2,x="Spikes",y="registered_customer",color="Spikes",
        labels= dict(registered_customer="Total Customers per Day ", Spikes="Month Beginning and End"))

In [9]:
px.line(df_2,x="datetime",y="registered_customer",
                labels= dict(registered_customer="Total Customers per Day", datetime="Datetime"),
                )

In [10]:
fig = px.line(df_2,x="datetime",y="registered_customer",
                labels= dict(registered_customer="Total Customers per Day", datetime="Datetime"),
                )

In [11]:
for j in df_2.datetime.dt.year.unique():
    for i in df_2.datetime.dt.month.unique():
        if j < 2022:
            if i < 12:
                fig.add_vrect(x0=f"{j}-{i}-27", 
                        x1=f"{j}-{i+1}-5", 
                        col=1,
                        annotation_text= "", 
                        fillcolor="green", 
                        opacity=0.3, 
                        line_width=0,
                        annotation_font_color = "black",
                        annotation_font_size = 13) 
            else:
                fig.add_vrect(x0=f"{j}-{i}-27", 
                        x1=f"{j+1}-{1}-5", 
                        col=1,
                        annotation_text= "", 
                        fillcolor="green", 
                        opacity=0.3, 
                        line_width=0,
                        annotation_font_color = "black",
                        annotation_font_size = 13) 
        else:
            pass

In [12]:
fig.update_layout(title= dict(text= "<b>Spikes on Total Costumers at Month Begin and End</b>", xanchor= "center", yanchor= "top", y=0.98, x=0.46),
                  height= 600, width= 1470, legend=dict(groupclick="toggleitem"), font= dict(size= 14))

In [13]:
fig.show()

In [14]:


px.scatter(df_2,x="datetime",y="registered_customer",color=df_2.datetime.dt.day,trendline="lowess")

In [15]:
df_2[df_2.datetime == "6-1-2020"]

Unnamed: 0,datetime,registered_customer,Spikes
149,2020-06-01,8688,True


In [16]:
df[df.datetime.between("2020-1-6", "2020-1-7")]

Unnamed: 0,datetime,holiday,workingday,weather,temp,atemp,humidity,windspeed,registered_customer
1786,2020-01-06 00:00:11,0.0,1.0,"clear, few clouds",9.02,9.850,44.0,23.9994,True
1787,2020-01-06 00:10:33,0.0,1.0,"clear, few clouds",9.02,9.850,44.0,23.9994,True
1788,2020-01-06 00:23:23,0.0,1.0,"clear, few clouds",9.02,9.850,44.0,23.9994,True
1789,2020-01-06 00:25:24,0.0,1.0,"clear, few clouds",9.02,9.850,44.0,23.9994,True
1790,2020-01-06 00:31:38,0.0,1.0,"clear, few clouds",9.02,9.850,44.0,23.9994,True
...,...,...,...,...,...,...,...,...,...
3130,2020-01-06 23:40:01,0.0,1.0,"clear, few clouds",7.38,10.605,55.0,7.0015,True
3131,2020-01-06 23:41:54,0.0,1.0,"clear, few clouds",7.38,10.605,55.0,7.0015,True
3132,2020-01-06 23:46:43,0.0,1.0,"clear, few clouds",7.38,10.605,55.0,7.0015,True
3133,2020-01-06 23:48:26,0.0,1.0,"clear, few clouds",7.38,10.605,55.0,7.0015,True


In [17]:
df[df.datetime.between("2021-3-31", "2021-4-1")]

Unnamed: 0,datetime,holiday,workingday,weather,temp,atemp,humidity,windspeed,registered_customer
1777813,2021-03-31 00:00:08,0.0,1.0,"clear, few clouds",14.76,17.425,37.0,12.9980,True
1777814,2021-03-31 00:01:25,0.0,1.0,"clear, few clouds",14.76,17.425,37.0,12.9980,True
1777815,2021-03-31 00:02:52,0.0,1.0,"clear, few clouds",14.76,16.665,40.0,19.0012,True
1777816,2021-03-31 00:03:06,0.0,1.0,"clear, few clouds",14.76,16.665,40.0,19.0012,True
1777817,2021-03-31 00:03:09,0.0,1.0,"clear, few clouds",14.76,16.665,40.0,19.0012,True
...,...,...,...,...,...,...,...,...,...
1789707,2021-03-31 23:58:43,0.0,1.0,"clear, few clouds",22.14,25.760,68.0,8.9981,True
1789708,2021-03-31 23:58:52,0.0,1.0,"clear, few clouds",22.14,25.760,68.0,8.9981,True
1789709,2021-03-31 23:58:54,0.0,1.0,"clear, few clouds",22.14,25.760,68.0,8.9981,True
1789710,2021-03-31 23:59:16,0.0,1.0,"clear, few clouds",22.14,25.760,68.0,8.9981,True


In [18]:
df_5 = df[df.datetime.between("2021-4-14", "2021-4-15")]

In [19]:
df_5.temp.unique()

array([13.94, 13.12, 12.3 , 11.48, 15.58, 14.76, 16.4 , 18.04, 17.22])

In [20]:
df_3 = df.groupby(pd.Grouper(key="datetime",freq="W")).agg({"registered_customer": ("count","sum")}).reset_index()

In [21]:
df_3.columns = ["datetime","Anzahl","Anzahl_registered"]
df_3["Anteil_registered"] = df_3["Anzahl_registered"]/df_3["Anzahl"]

In [22]:
df_3

Unnamed: 0,datetime,Anzahl,Anzahl_registered,Anteil_registered
0,2020-01-05,1786,1324,0.741321
1,2020-01-12,9408,8740,0.928997
2,2020-01-19,9025,8351,0.925319
3,2020-01-26,7784,7283,0.935637
4,2020-02-02,10340,9391,0.908221
...,...,...,...,...
101,2021-12-12,41785,37653,0.901113
102,2021-12-19,35966,32886,0.914364
103,2021-12-26,31871,29080,0.912428
104,2022-01-02,32812,29714,0.905583


In [23]:
df_3.corr()

Unnamed: 0,Anzahl,Anzahl_registered,Anteil_registered
Anzahl,1.0,0.989568,-0.431213
Anzahl_registered,0.989568,1.0,-0.312357
Anteil_registered,-0.431213,-0.312357,1.0


In [24]:
import plotly.graph_objects as go
from plotly.subplots import make_subplots

In [25]:
fig = make_subplots(specs=[[{"secondary_y": True}]])

In [26]:


fig.add_trace(
    go.Scatter(x=df_3["datetime"],y=df_3["Anteil_registered"],name="Anteil an registered customer"),
    secondary_y= False
)

In [27]:
fig.add_trace(
    go.Scatter(x=df_2["datetime"],y=df_2["registered_customer"],name="Anzahl an AUsleihen am Tag"),
    secondary_y= True
)

In [28]:
subfig = make_subplots(specs=[[{"secondary_y": True}]])

In [29]:
fig1 = px.scatter(df_3,x="datetime",y="Anteil_registered")

In [30]:
fig2 = px.scatter(df_3,x="datetime",y="Anzahl")

temperatur quantile 
und wetter
gruppen von leuten
chiquadrat auf wochenende und wetter

In [31]:
px.scatter(df_3,x="datetime",y="Anzahl",facet_row="Anteil_registered")

ValueError: Vertical spacing cannot be greater than (1 / (rows - 1)) = 0.009524.
The resulting plot would have 106 rows (rows=106).
Use the facet_row_spacing argument to adjust this spacing.