In [15]:
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots

In [2]:
df = pd.read_parquet("escooter_history_2022.parquet")
#dividing outliers by 100
df.loc[df.temp >45, "temp"] = df.temp.div(100)

In [3]:
# getting specifi values
df_33 = df.groupby(pd.Grouper(key="datetime",freq="D")).agg({"holiday":"mean","workingday":"mean"}).reset_index()
df_round = df_33.loc[(~df_33.workingday.isin([1,0]) | ~df_33.holiday.isin([0,1]))].round().reset_index(drop=True)

# comparing and alligning for every 1 or 0 value of holiday and workingday
df_holiday_1 = df_round.loc[df_round.holiday == 1]
df_holiday_1 = df_holiday_1.datetime.dt.date.to_list()
df.loc[df.datetime.dt.date.isin(df_holiday_1),"holiday"] = 1
df_holiday_0 = df_round.loc[df_round.holiday == 0]
df_holiday_0 = df_holiday_0.datetime.dt.date.to_list()
df.loc[df.datetime.dt.date.isin(df_holiday_0),"holiday"] = 0
df_workingday_1 = df_round.loc[df_round.workingday == 1]
df_workingday_1 = df_workingday_1.datetime.dt.date.to_list()
df.loc[df.datetime.dt.date.isin(df_workingday_1),"workingday"] = 1
df_workingday_0 = df_round.loc[df_round.workingday == 0]
df_workingday_0 = df_workingday_0.datetime.dt.date.to_list()
df.loc[df.datetime.dt.date.isin(df_workingday_0),"workingday"] = 0

In [4]:
# only taking values between 0.1% and 99.9% 
df = df.loc[df.humidity.between(df.humidity.quantile(0.001), df.humidity.quantile(0.999))]

In [5]:
df1 = df.copy()

In [81]:
df_bar_cust_type= df1.groupby(["registered_customer", pd.Grouper(key= "datetime", freq= "Y")]).agg(total_customers = ("registered_customer", "count")).reset_index()

In [153]:
df_bar_2020= df_bar_cust_type.loc[df_bar_cust_type.datetime.dt.year.isin([2020])].reset_index(drop=True)
df_bar_2021= df_bar_cust_type.loc[df_bar_cust_type.datetime.dt.year.isin([2021])].reset_index(drop= True)

In [162]:
fig_bar = make_subplots(rows= 2, cols= 1)

fig_bar.add_trace(
 go.Bar(
  x= df_bar_2020.registered_customer,
  y= df_bar_2020.total_customers,
  marker= dict(color= ["green", "blue"]),
  name= "2020"
 ),
 row= 1,
 col= 1
)

fig_bar.add_trace(
 go.Bar(
  x= df_bar_2021.registered_customer,
  y= df_bar_2021.total_customers,
  marker = dict(color = ["red", "blue"]),
  name= "2021"
 ),
 row= 2,
 col= 1
)


fig_bar.add_annotation(
 x= "false",
 y= df_bar_2020[df_bar_2020.registered_customer == False].total_customers.max(),
 arrowhead= 1,
 showarrow= True,
 arrowcolor= "green",
 text= f"{df_bar_2020[df_bar_2020.registered_customer == False].total_customers[0]} Customers",
 row= 1, col=1
)

fig_bar.add_annotation(
 x= "true",
 y= df_bar_2020[df_bar_2020.registered_customer == True].total_customers.max(),
 arrowhead= 1,
 showarrow= True,
 arrowcolor= "green",
 text= f"{df_bar_2020[df_bar_2020.registered_customer == True].total_customers[1]} Customers",
 row=1, col=1
)

fig_bar.add_annotation(
 x= "false",
 y= df_bar_2021[df_bar_2021.registered_customer == False].total_customers.max(),
 arrowhead= 1,
 showarrow= True,
 arrowcolor= "green",
 text= f"{df_bar_2021[df_bar_2021.registered_customer == False].total_customers[0]} Customers",
 col= 1, row=2
)

fig_bar.add_annotation(
 x= "true",
 y= df_bar_2021[df_bar_2021.registered_customer == True].total_customers.max(),
 arrowhead= 1,
 showarrow= True,
 arrowcolor= "green",
 text= f"{df_bar_2021[df_bar_2021.registered_customer == True].total_customers[1]} Customers",
 col=1,row= 2
)

fig_bar.update_layout(legend=dict(groupclick="toggleitem"),title= dict(text= "<b>Total Customer Comparison 2020/2021</b>", font_size= 20, xanchor= "center", yanchor= "top", y=0.9, x=0.43), legend_title_text = "<b>Year</b>" )
fig_bar.update_yaxes(title_text="<b>Total Customers</b>", secondary_y=False)
fig_bar.update_xaxes(title_text="<b>Year</b>", row= 2)
fig_bar.show()