In [2]:
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots

In [3]:
df = pd.read_parquet("escooter_history_2022.parquet")
#dividing outliers by 100
df.loc[df.temp >45, "temp"] = df.temp.div(100)

In [4]:
# getting specifi values
df_33 = df.groupby(pd.Grouper(key="datetime",freq="D")).agg({"holiday":"mean","workingday":"mean"}).reset_index()
df_round = df_33.loc[(~df_33.workingday.isin([1,0]) | ~df_33.holiday.isin([0,1]))].round().reset_index(drop=True)

# comparing and alligning for every 1 or 0 value of holiday and workingday
df_holiday_1 = df_round.loc[df_round.holiday == 1]
df_holiday_1 = df_holiday_1.datetime.dt.date.to_list()
df.loc[df.datetime.dt.date.isin(df_holiday_1),"holiday"] = 1
df_holiday_0 = df_round.loc[df_round.holiday == 0]
df_holiday_0 = df_holiday_0.datetime.dt.date.to_list()
df.loc[df.datetime.dt.date.isin(df_holiday_0),"holiday"] = 0
df_workingday_1 = df_round.loc[df_round.workingday == 1]
df_workingday_1 = df_workingday_1.datetime.dt.date.to_list()
df.loc[df.datetime.dt.date.isin(df_workingday_1),"workingday"] = 1
df_workingday_0 = df_round.loc[df_round.workingday == 0]
df_workingday_0 = df_workingday_0.datetime.dt.date.to_list()
df.loc[df.datetime.dt.date.isin(df_workingday_0),"workingday"] = 0

In [5]:
# only taking values between 0.1% and 99.9% 
df = df.loc[df.humidity.between(df.humidity.quantile(0.001), df.humidity.quantile(0.999))]

In [6]:
df1 = df.copy()

In [7]:
df_bar_cust_type= df1.groupby(["registered_customer", pd.Grouper(key= "datetime", freq= "Y")]).agg(total_customers = ("registered_customer", "count")).reset_index()

In [8]:
df_bar_2020= df_bar_cust_type.loc[df_bar_cust_type.datetime.dt.year.isin([2020])].reset_index(drop=True)
df_bar_2021= df_bar_cust_type.loc[df_bar_cust_type.datetime.dt.year.isin([2021])].reset_index(drop= True)

In [56]:
fig_bar = make_subplots(rows= 1, cols= 2, shared_xaxes= True, shared_yaxes= True, horizontal_spacing= 0.01)

fig_bar.add_trace(
 go.Bar(
  x= df_bar_2020.loc[df_bar_2020.registered_customer == False].registered_customer,
  y= df_bar_2020.loc[df_bar_2020.registered_customer == False].total_customers,
  marker= dict(color= px.colors.sequential.Brwnyl[3]),
  name= "Unregistered 2020",
  legendgroup= "1",
  legendgrouptitle_text= "Unregistered:"
 ),
 row= 1,
 col= 1
)

fig_bar.add_trace(
 go.Bar(
  x= df_bar_2020.loc[df_bar_2020.registered_customer == True].registered_customer,
  y= df_bar_2020.loc[df_bar_2020.registered_customer == True].total_customers,
  marker= dict(color= px.colors.sequential.Brwnyl[5]),
  name= "Registered 2020",
  legendgroup= "2",
  legendgrouptitle_text= "Registered:"
 ),
 row= 1,
 col= 1
)

fig_bar.add_trace(
 go.Bar(
  x= df_bar_2021.loc[df_bar_2021.registered_customer == False].registered_customer,
  y= df_bar_2021.loc[df_bar_2021.registered_customer == False].total_customers,
  marker= dict(color= px.colors.sequential.Brwnyl[3]),
  name= "Unregistered 2021",
  legendgroup= "1"
 ),
 row= 1,
 col= 2
)

fig_bar.add_trace(
 go.Bar(
  x= df_bar_2021.loc[df_bar_2021.registered_customer == True].registered_customer,
  y= df_bar_2021.loc[df_bar_2021.registered_customer == True].total_customers,
  marker= dict(color= px.colors.sequential.Brwnyl[5]),
  name= "Registered 2021",
  legendgroup= "2"
 ),
 row= 1,
 col= 2
)

fig_bar.add_annotation(
 x= "false",
 y= df_bar_2020[df_bar_2020.registered_customer == False].total_customers.max(),
 arrowhead= 1,
 showarrow= True,
 arrowcolor= px.colors.sequential.Brwnyl[3],
 font= dict(size= 12, color= px.colors.sequential.Brwnyl[3]),
 text= f"{df_bar_2020[df_bar_2020.registered_customer == False].total_customers[0]} Customers",
 row= 1, col=1
)

fig_bar.add_annotation(
 x= "true",
 y= df_bar_2020[df_bar_2020.registered_customer == True].total_customers.max(),
 arrowhead= 1,
 showarrow= True,
 arrowcolor= px.colors.sequential.Brwnyl[5],
 font= dict(size= 12, color= px.colors.sequential.Brwnyl[5]),
 text= f"{df_bar_2020[df_bar_2020.registered_customer == True].total_customers[1]} Customers",
 row=1, col=1
)

fig_bar.add_annotation(
 x= "false",
 y= df_bar_2021[df_bar_2021.registered_customer == False].total_customers.max(),
 arrowhead= 1,
 showarrow= True,
 arrowcolor= px.colors.sequential.Brwnyl[3],
 font= dict(size= 12, color= px.colors.sequential.Brwnyl[3]),
 text= f"{df_bar_2021[df_bar_2021.registered_customer == False].total_customers[0]} Customers",
 col= 2, row=1
)

fig_bar.add_annotation(
 x= "true",
 y= df_bar_2021[df_bar_2021.registered_customer == True].total_customers.max(),
 arrowhead= 1,
 showarrow= True,
 arrowcolor= px.colors.sequential.Brwnyl[5],
 font= dict(size= 12, color= px.colors.sequential.Brwnyl[5]),
 text= f"{df_bar_2021[df_bar_2021.registered_customer == True].total_customers[1]} Customers",
 col=2,row= 1
)

fig_bar.update_layout(legend=dict(groupclick="toggleitem"),title= dict(text= "<b>Total Customer Comparison 2020/2021</b>", font_size= 20, xanchor= "center", yanchor= "top", y=0.9, x=0.43), legend_title_text = "<b>Registered Customers</b>" )
fig_bar.update_yaxes(title_text="<b>Total Customers</b>", secondary_y=False, col=1)
fig_bar.update_xaxes(title_text="<b>2020</b>", col= 1)
fig_bar.update_xaxes(title_text="<b>2021</b>", col= 2)
fig_bar.show()