In [286]:
import pandas as pd
import plotly.express as px
import matplotlib
import plotly.graph_objects as go
from plotly.subplots import make_subplots

In [277]:
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))


Importing display from IPython.core.display is deprecated since IPython 7.14, please import from IPython display



In [278]:
df_org_data = pd.read_parquet("escooter_history_2022.parquet")

In [279]:
df_org_data.holiday = df_org_data.holiday.astype(bool)
df_org_data.workingday = df_org_data.workingday.astype(bool)
df_org_data.loc[df_org_data.temp >45, "temp"] = df_org_data.temp.div(100)

In [280]:
df = df_org_data.copy()
df = pd.get_dummies(df, prefix=["w"])

In [281]:
df_box = df.groupby(pd.Grouper(key="datetime",freq="D")).agg({"registered_customer": "count"}).reset_index()
df_box["month"] = df_box.datetime.dt.month
df_box["year"] = df_box.datetime.dt.year
df_box

Unnamed: 0,datetime,registered_customer,month,year
0,2020-01-04,985,1,2020
1,2020-01-05,801,1,2020
2,2020-01-06,1349,1,2020
3,2020-01-07,1562,1,2020
4,2020-01-08,1600,1,2020
...,...,...,...,...
726,2021-12-30,4231,12,2021
727,2021-12-31,8144,12,2021
728,2022-01-01,5932,1,2022
729,2022-01-02,5182,1,2022


In [430]:
df_box_2020 = df_box.loc[df_box.year == 2020]
df_box_2021 = df_box.loc[df_box.year == 2021]
df_box_2022 = df_box.loc[df_box.year == 2022]

In [431]:
fig= px.box(
 x= df_box_2020.month,
 y=df_box_2020.registered_customer,
 points= "all",
 labels= dict(x= "Month", y= "Total Customers")
)

fig.add_hline(
 y= df_box_2020.registered_customer.median(),
 opacity = 0.3,
 line_dash= "dot",
 line_color= "black",
 layer= "below",
 annotation_text= "Total Median 2020",
 annotation_position= "top left",
 annotation_font_color= "black",
)

fig.update_annotations(opacity= 0.3)

for month in df_box_2020.month.unique():
  prefix= "same as"
  color= "grey"
  if df_box_2020.registered_customer.median() < df_box_2020[df_box_2020.month == month].registered_customer.median():
   prefix= "above"
   color= "darkgreen"
  elif df_box_2020.registered_customer.median() > df_box_2020[df_box_2020.month == month].registered_customer.median():
   prefix= "below"
   color= "darkred"

  fig.add_annotation(
    text=f"{prefix} total median!", x=month, y= df_box_2020[df_box_2020.month == month].registered_customer.median(),
        arrowhead=1, showarrow=True, arrowcolor=color,        
        bgcolor='white',
        font=dict(size=10, color=color)
   )
fig.update_layout(title= dict(text= "<b>Boxplot of total monthly Customers</b>"))
fig.show()


#### kann ich noch ohne annotations sondern mit color vom boxplot zu machen 

In [432]:
color = []
for month in df_box_2020.month.unique():
  color= "grey"
  if df_box_2020.registered_customer.median() < df_box_2020[df_box_2020.month == month].registered_customer.median():
   color= "darkgreen"
  elif df_box_2020.registered_customer.median() > df_box_2020[df_box_2020.month == month].registered_customer.median():
   color= "darkgrey"

In [433]:
df_box_2020["color"] = color



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [434]:
df_box_2020.month

0       1
1       1
2       1
3       1
4       1
       ..
358    12
359    12
360    12
361    12
362    12
Name: month, Length: 363, dtype: int64

In [435]:
fig1 = make_subplots()

In [436]:

# for month in df_box_2020.month.unique():
#   color= "grey"
#   if df_box_2020.registered_customer.median() < df_box_2020[df_box_2020.month == month].registered_customer.median():
#    color= "darkgreen"
#   elif df_box_2020.registered_customer.median() > df_box_2020[df_box_2020.month == month].registered_customer.median():
#    color= "darkgrey"

fig1.add_trace(
    go.Box(
      x= df_box_2020.month,
      y= df_box_2020.registered_customer,
      boxpoints= "all",
      marker=dict(size=2, color='rgb(0, 0, 0)'),
      line= dict(width= 1),
      jitter= 0.5,
      whiskerwidth=0.2,
    )
)


fig1.add_hline(
 y= df_box_2020.registered_customer.median(),
 opacity = 0.3,
 line_dash= "dot",
 line_color= "black",
 layer= "below",
 annotation_text= "Total Median 2020",
 annotation_position= "top left",
 annotation_font_color= "black",
)

fig.update_layout(boxmode = "overlay")
fig1.update_annotations(opacity= 0.4)
fig1.update_layout(title= dict(text= "<b>Boxplot of total monthly Customers</b>"))
fig1.show()