In [2]:
import pandas as pd
import plotly.express as px
import matplotlib
import plotly.graph_objects as go
from plotly.subplots import make_subplots

In [3]:
from IPython.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))

In [4]:
df_org_data = pd.read_parquet("escooter_history_2022.parquet")

In [5]:
df_org_data.holiday = df_org_data.holiday.astype(bool)
df_org_data.workingday = df_org_data.workingday.astype(bool)
df_org_data.loc[df_org_data.temp >45, "temp"] = df_org_data.temp.div(100)

In [6]:
df = df_org_data.copy()
df = pd.get_dummies(df, prefix=["w"])

In [7]:
df_box = df.groupby(pd.Grouper(key="datetime",freq="D")).agg({"registered_customer": "count"}).reset_index()
df_box["month"] = df_box.datetime.dt.month
df_box["year"] = df_box.datetime.dt.year
df_box

Unnamed: 0,datetime,registered_customer,month,year
0,2020-01-04,985,1,2020
1,2020-01-05,801,1,2020
2,2020-01-06,1349,1,2020
3,2020-01-07,1562,1,2020
4,2020-01-08,1600,1,2020
...,...,...,...,...
726,2021-12-30,4231,12,2021
727,2021-12-31,8144,12,2021
728,2022-01-01,5932,1,2022
729,2022-01-02,5182,1,2022


In [8]:
df_box_2020 = df_box.loc[df_box.year == 2020]
df_box_2021 = df_box.loc[df_box.year == 2021]
df_box_2022 = df_box.loc[df_box.year == 2022]

In [9]:
fig= px.box(
 x= df_box_2020.month,
 y=df_box_2020.registered_customer,
 points= "all",
 labels= dict(x= "Month", y= "Total Customers"),
 color_discrete_map= dict(df_box_2020 = "rot")
)

fig.add_hline(
 y= df_box_2020.registered_customer.median(),
 opacity = 0.3,
 line_dash= "dot",
 line_color= "black",
 layer= "below",
 annotation_text= "Total Median 2020",
 annotation_position= "top left",
 annotation_font_color= "black",
)

fig.update_annotations(opacity= 0.3)

for month in df_box_2020.month.unique():
  prefix= "same as"
  color= "grey"
  if df_box_2020.registered_customer.median() < df_box_2020[df_box_2020.month == month].registered_customer.median():
   prefix= "above"
   color= "darkgreen"
  elif df_box_2020.registered_customer.median() > df_box_2020[df_box_2020.month == month].registered_customer.median():
   prefix= "below"
   color= "darkred"

  fig.add_annotation(
    text=f"{prefix} total median!", x=month, y= df_box_2020[df_box_2020.month == month].registered_customer.median(),
        arrowhead=1, showarrow=True, arrowcolor=color,        
        bgcolor='white',
        font=dict(size=10, color=color)
  )
  






fig.update_layout(title= dict(text= "<b>Boxplot of total monthly Customers</b>"))
fig.show()


#### kann ich noch ohne annotations sondern mit color vom boxplot zu machen 

In [10]:
color = []
for month in df_box_2020.month.unique():
  color= "grey"
  if df_box_2020.registered_customer.median() < df_box_2020[df_box_2020.month == month].registered_customer.median():
   color= "red"
  elif df_box_2020.registered_customer.median() > df_box_2020[df_box_2020.month == month].registered_customer.median():
   color= "green"

In [11]:
df_box_2020["color"] = color



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [12]:
fig1 = make_subplots(rows= 2, cols=1, subplot_titles= ("2020", "2021"))    # 2021 noch dazumachen

fig1.add_trace(
    go.Box(
      name= "2020",
      x= df_box_2020.month,
      y= df_box_2020.registered_customer,
      boxpoints= "all",
      marker=dict(size=2, color='rgb(0, 0, 0)'),
      line= dict(width= 1),
      jitter= 0.5,
      whiskerwidth=0.2,
      fillcolor = "lightgrey",
      hoveron = "boxes+points"
      ),
      col= 1,
      row= 1
)

for month in df_box_2020.month.unique():
  prefix= "same as"
  color= "grey"
  if df_box_2020.registered_customer.median() < df_box_2020[df_box_2020.month == month].registered_customer.median():
   prefix= "above"
   color= "darkgreen"
  elif df_box_2020.registered_customer.median() > df_box_2020[df_box_2020.month == month].registered_customer.median():
   prefix= "below"
   color= "darkred"
   
  fig1.add_annotation(
    text=f"{prefix} median 2020!", x=month, y= df_box_2020[df_box_2020.month == month].registered_customer.median(),
        arrowhead=1, showarrow=True, arrowcolor=color,        
        bgcolor='white',
        font=dict(size=10, color=color)
   )

fig1.add_hline(
 y= df_box_2020.registered_customer.median(),
 opacity= 0.5,
 line_dash= "dot",
 line_color= "grey",
 layer= "below",
 annotation_text= "Total Median 2020",
 annotation_position= "top left",
 annotation_font_color= "grey",
 annotation_opacity = 0.5,
 col= 1,
 row= 1
)

fig1.add_hline(
 y= df_box_2021.registered_customer.median(),
 opacity= 0.5,
 line_dash= "dot",
 line_color= "black",
 layer= "below",
 annotation_text= "Total Median 2021",
 annotation_position= "top left",
 annotation_font_color= "black",
 annotation_opacity = 0.5,
 col= 1,
 row= 1
)


fig1.add_trace(
    go.Box(
      name= "2021",
      x= df_box_2021.month,
      y= df_box_2021.registered_customer,
      boxpoints= "all",
      marker=dict(size=2, color='rgb(0, 0, 0)'),
      line= dict(width= 1),
      jitter= 0.5,
      whiskerwidth=0.2,
      fillcolor = "grey",
      hoveron = "boxes+points"
      ),
      col= 1,
      row= 2
)

for month in df_box_2021.month.unique():
  prefix= "same as"
  color= "grey"
  if df_box_2021.registered_customer.median() < df_box_2021[df_box_2021.month == month].registered_customer.median():
   prefix= "above"
   color= "darkgreen"
  elif df_box_2021.registered_customer.median() > df_box_2021[df_box_2021.month == month].registered_customer.median():
   prefix= "below"
   color= "darkred"
   
  fig1.add_annotation(
    text=f"{prefix} median 2021!", x=month, y= df_box_2021[df_box_2021.month == month].registered_customer.median(),
        arrowhead=1, showarrow=True, arrowcolor=color,        
        bgcolor='white',
        font=dict(size=10, color=color),
        col= 1,
        row= 2
   )



fig1.add_hline(
 y= df_box_2021.registered_customer.median(),
 opacity= 0.5,
 line_dash= "dot",
 line_color= "black",
 layer= "below",
 annotation_text= "Total Median 2021",
 annotation_position= "top left",
 annotation_font_color= "black",
 annotation_opacity = 0.5,
 col= 1,
 row= 2
)

fig1.add_hline(
 y= df_box_2020.registered_customer.median(),
 opacity= 0.5,
 line_dash= "dot",
 line_color= "grey",
 layer= "below",
 annotation_text= "Total Median 2020",
 annotation_position= "top left",
 annotation_font_color= "grey",
 annotation_opacity = 0.5,
 col= 1,
 row= 2
)


fig1.update_layout(title= dict(text= "<b>Boxplot of total monthly Customers 2020 and 2021</b>"), height= 770, legend_title_text= "Year")
#fig1.update_traces(visible= "legendonly")
fig1.update_yaxes(title_text="Total Customers", title_font= dict(size= 14)) #, showgrid= False)
fig1.update_xaxes(title_text="Month", title_font= dict(size= 14))

fig1.show()

In [37]:
fig1 = make_subplots(rows= 2, cols=1, subplot_titles= ("2020", "2021"))    # 2021 noch dazumachen

for month in df_box_2020.month.unique():

  if df_box_2020.registered_customer.median() < df_box_2020[df_box_2020.month == month].registered_customer.median():
    color = "darkgreen"

  elif df_box_2020.registered_customer.median() > df_box_2020[df_box_2020.month == month].registered_customer.median():
    color = "darkred"


  fig1.add_trace(
    go.Box(
      name= f"{month}",
      legendgroup= "group1",
      legendgrouptitle_text= "2020",
      x= df_box_2020.loc[df_box_2020.month == month].month,
      y= df_box_2020.loc[df_box_2020.month == month].registered_customer,
      boxpoints= "all",
      line= dict(width= 1),
      marker= dict(size=2, color= "rgb(0,0,0)"),
      jitter= 0.5,
      whiskerwidth=0.2,
      fillcolor = color,
      hoveron = "boxes+points",
      ),
      col= 1,
      row= 1
  )

fig1.add_hline(
 y= df_box_2021.registered_customer.median(),
 opacity= 0.5,
 line_dash= "dot",
 line_color= "black",
 layer= "below",
 annotation_text= "Median 2021: "f'{df_box_2021.registered_customer.median()}',
 annotation_position= "top left",
 annotation_font_color= "black",
 annotation_opacity = 0.5,
 col= 1,
 row= 1
)

fig1.add_hline(
 y= df_box_2020.registered_customer.median(),
 opacity= 0.5,
 line_dash= "dot",
 line_color= "black",
 layer= "below",
 annotation_text= "Median 2020: "f'{df_box_2020.registered_customer.median()}',
 annotation_position= "top left",
 annotation_font_color= "black",
 annotation_opacity = 0.5,
 col= 1,
 row= 1
)


for month in df_box_2021.month.unique():

  if df_box_2021.registered_customer.median() < df_box_2021[df_box_2021.month == month].registered_customer.median():
    color = "darkgreen"

  elif df_box_2021.registered_customer.median() > df_box_2021[df_box_2021.month == month].registered_customer.median():
    color = "darkred"

    if df_box_2020.registered_customer.median() < df_box_2021[df_box_2021.month == month].registered_customer.median():
      color = "orange"

  fig1.add_trace(
    go.Box(
      name= f"{month}",
      legendgroup= "group2",
      legendgrouptitle_text= "2021",
      x= df_box_2021.loc[df_box_2021.month == month].month,
      y= df_box_2021.loc[df_box_2021.month == month].registered_customer,
      boxpoints= "all",
      line= dict(width= 1),
      marker= dict(size=2, color= "rgb(0,0,0)"),
      jitter= 0.5,
      whiskerwidth=0.2,
      fillcolor = color,
      hoveron = "boxes+points"
      ),
      col= 1,
      row= 2
  )

fig1.add_hline(
 y= df_box_2021.registered_customer.median(),
 opacity= 0.5,
 line_dash= "dot",
 line_color= "black",
 layer= "below",
 annotation_text= "Median 2021: "f'{df_box_2021.registered_customer.median()}',
 annotation_position= "top left",
 annotation_font_color= "black",
 annotation_opacity = 0.5,
 col= 1,
 row= 2
)

fig1.add_hline(
 y= df_box_2020.registered_customer.median(),
 opacity= 0.5,
 line_dash= "dot",
 line_color= "black",
 layer= "below",
 annotation_text= "Median 2020: "f'{df_box_2020.registered_customer.median()}',
 annotation_position= "top left",
 annotation_font_color= "black",
 annotation_opacity = 0.5,
 col= 1,
 row= 2
)


fig1.update_layout(title= dict(text= "<b>Boxplot of total monthly Customers 2020 and 2021</b>", font_size= 20), height= 770, legend_title_text= "<b>Year</b>", legend_title_font_size= 15, legend= dict(groupclick= "toggleitem"))
fig1.update_yaxes(title_text="<b>Total Customers</b>", title_font= dict(size= 14), showgrid= False)
fig1.update_xaxes(title_text="<b>Month</b>", title_font= dict(size= 14))

fig1.show()