In [286]:
import pandas as pd
import plotly.express as px
import matplotlib
import plotly.graph_objects as go
from plotly.subplots import make_subplots

In [277]:
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))


Importing display from IPython.core.display is deprecated since IPython 7.14, please import from IPython display



In [278]:
df_org_data = pd.read_parquet("escooter_history_2022.parquet")

In [279]:
df_org_data.holiday = df_org_data.holiday.astype(bool)
df_org_data.workingday = df_org_data.workingday.astype(bool)
df_org_data.loc[df_org_data.temp >45, "temp"] = df_org_data.temp.div(100)

In [280]:
df = df_org_data.copy()
df = pd.get_dummies(df, prefix=["w"])

In [281]:
df_box = df.groupby(pd.Grouper(key="datetime",freq="D")).agg({"registered_customer": "count"}).reset_index()
df_box["month"] = df_box.datetime.dt.month
df_box["year"] = df_box.datetime.dt.year
df_box

Unnamed: 0,datetime,registered_customer,month,year
0,2020-01-04,985,1,2020
1,2020-01-05,801,1,2020
2,2020-01-06,1349,1,2020
3,2020-01-07,1562,1,2020
4,2020-01-08,1600,1,2020
...,...,...,...,...
726,2021-12-30,4231,12,2021
727,2021-12-31,8144,12,2021
728,2022-01-01,5932,1,2022
729,2022-01-02,5182,1,2022


In [430]:
df_box_2020 = df_box.loc[df_box.year == 2020]
df_box_2021 = df_box.loc[df_box.year == 2021]
df_box_2022 = df_box.loc[df_box.year == 2022]

In [431]:
fig= px.box(
 x= df_box_2020.month,
 y=df_box_2020.registered_customer,
 points= "all",
 labels= dict(x= "Month", y= "Total Customers")
)

fig.add_hline(
 y= df_box_2020.registered_customer.median(),
 opacity = 0.3,
 line_dash= "dot",
 line_color= "black",
 layer= "below",
 annotation_text= "Total Median 2020",
 annotation_position= "top left",
 annotation_font_color= "black",
)

fig.update_annotations(opacity= 0.3)

for month in df_box_2020.month.unique():
  prefix= "same as"
  color= "grey"
  if df_box_2020.registered_customer.median() < df_box_2020[df_box_2020.month == month].registered_customer.median():
   prefix= "above"
   color= "darkgreen"
  elif df_box_2020.registered_customer.median() > df_box_2020[df_box_2020.month == month].registered_customer.median():
   prefix= "below"
   color= "darkred"

  fig.add_annotation(
    text=f"{prefix} total median!", x=month, y= df_box_2020[df_box_2020.month == month].registered_customer.median(),
        arrowhead=1, showarrow=True, arrowcolor=color,        
        bgcolor='white',
        font=dict(size=10, color=color)
   )
fig.update_layout(title= dict(text= "<b>Boxplot of total monthly Customers</b>"))
fig.show()


#### kann ich noch ohne annotations sondern mit color vom boxplot zu machen 

In [449]:
color = []
for month in df_box_2020.month.unique():
  color= "grey"
  if df_box_2020.registered_customer.median() < df_box_2020[df_box_2020.month == month].registered_customer.median():
   color= "red"
  elif df_box_2020.registered_customer.median() > df_box_2020[df_box_2020.month == month].registered_customer.median():
   color= "green"

In [450]:
df_box_2020["color"] = color



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [542]:
fig1 = make_subplots()
fig1.add_trace(
    go.Box(
      x= df_box_2020.month,
      y= df_box_2020.registered_customer,
      boxpoints= "all",
      marker=dict(size=2, color='rgb(0, 0, 0)'),
      line= dict(width= 1),
      jitter= 0.5,
      whiskerwidth=0.2,
      fillcolor = "grey"
      )
)

for month in df_box_2020.month.unique():
  # prefix= "same as"
  # color= "grey"
  if df_box_2020.registered_customer.median() < df_box_2020[df_box_2020.month == month].registered_customer.median():
   prefix= "above"
   color= "darkgreen"
  elif df_box_2020.registered_customer.median() > df_box_2020[df_box_2020.month == month].registered_customer.median():
   prefix= "below"
   color= "darkred"
   
  fig1.add_annotation(
    text=f"{prefix} total median!", x=month, y= df_box_2020[df_box_2020.month == month].registered_customer.median(),
        arrowhead=1, showarrow=True, arrowcolor=color,        
        bgcolor='white',
        font=dict(size=10, color=color)
   )

fig1.add_hline(
 y= df_box_2020.registered_customer.median(),
 opacity = 0.5,
 line_dash= "dot",
 line_color= "black",
 layer= "below",
 annotation_text= "Total Median 2020",
 annotation_position= "top left",
 annotation_font_color= "black",
)

fig1.update_annotations(opacity= 0.6)
fig1.update_layout(title= dict(text= "<b>Boxplot of total monthly Customers</b>"))
fig1.update_yaxes(title_text="Total Customers", title_font= dict(size= 14))
fig1.update_xaxes(title_text="Month", title_font= dict(size= 14))

fig1.show()

In [534]:
df_box_2020.loc[df_box_2020[df_box_2020.month == month].registered_customer.median() > df_box_2020.registered_customer.median(), 'higher_lower'] = "Higher"
df_box_2020.fillna(value= "Lower")

Unnamed: 0,datetime,registered_customer,month,year,color,higher_lower
0,2020-01-04 00:00:00,985.0,1.0,2020.0,green,Higher
1,2020-01-05 00:00:00,801.0,1.0,2020.0,green,Lower
2,2020-01-06 00:00:00,1349.0,1.0,2020.0,green,Lower
3,2020-01-07 00:00:00,1562.0,1.0,2020.0,green,Lower
4,2020-01-08 00:00:00,1600.0,1.0,2020.0,green,Lower
...,...,...,...,...,...,...
359,2020-12-28 00:00:00,2514.0,12.0,2020.0,green,Lower
360,2020-12-29 00:00:00,2225.0,12.0,2020.0,green,Lower
361,2020-12-30 00:00:00,2792.0,12.0,2020.0,green,Lower
362,2020-12-31 00:00:00,5554.0,12.0,2020.0,green,Lower


In [535]:
#fig1 = go.Figure()


fig1 = px.box(
  x= df_box_2020.month, y= df_box_2020.registered_customer, points= "all")
      #marker=dict(size=2, color='rgb(0, 0, 0)'),
      #line= dict(width= 1),
      #jitter= 0.5,
      #whiskerwidth=0.2,

# fig1.add_trace(
#     go.Box(
#       x= df_box_2020.month,
#       y= df_box_2020.registered_customer,
#       boxpoints= "all",
#       marker=dict(size=2, color='rgb(0, 0, 0)'),
#       line= dict(width= 1),
#       jitter= 0.5,
#       whiskerwidth=0.2,
#       fillcolor= df_box_2020.higher_lower
#     )
#   )

# fig1.add_trace(
#     go.Box(
#       x= df_box_2020.month,
#       y= df_box_2020.registered_customer,
#       boxpoints= "all",
#       marker=dict(size=2, color='rgb(0, 0, 0)'),
#       line= dict(width= 1),
#       jitter= 0.5,
#       whiskerwidth=0.2,
#       fillcolor = "grey"
#       )
# )

# for month in df_box_2020.month.unique():
#   prefix= "same as"
#   color= "grey"
#   if df_box_2020.registered_customer.median() < df_box_2020[df_box_2020.month == month].registered_customer.median():
#    prefix= "above"
#    color= "darkgreen"
#   elif df_box_2020.registered_customer.median() > df_box_2020[df_box_2020.month == month].registered_customer.median():
#    prefix= "below"
#    color= "darkred"
   
#   fig1.add_annotation(
#     text=f"{prefix} total median!", x=month, y= df_box_2020[df_box_2020.month == month].registered_customer.median(),
#         arrowhead=1, showarrow=True, arrowcolor=color,        
#         bgcolor='white',
#         font=dict(size=10, color=color)
#    )

# fig1.add_hline(
#  y= df_box_2020.registered_customer.median(),
#  opacity = 0.5,
#  line_dash= "dot",
#  line_color= "black",
#  layer= "below",
#  annotation_text= "Total Median 2020",
#  annotation_position= "top left",
#  annotation_font_color= "black",
# )

# fig1.update_annotations(opacity= 0.6)
# fig1.update_layout(title= dict(text= "<b>Boxplot of total monthly Customers</b>"))
# fig1.update_yaxes(title_text="Total Customers", title_font= dict(size= 14))
# fig1.update_xaxes(title_text="Month", title_font= dict(size= 14))

fig1.show()

KeyError: (nan, '', '', '')