In [11]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import plotly.express as px
from datetime import datetime

df = pd.read_excel("BakeryData_Vilnius.xlsx")
stores = ["main street A","main street B","station A","station B"]
df["weekday_text"] = df['date'].dt.strftime('%A')
df["date"] = pd.to_datetime(df["date"]).dt.date
df

Unnamed: 0,date,weekday,main street A,main street B,station A,station B,weekday_text
0,2016-05-11,3,2.23,,,,Wednesday
1,2016-05-12,4,18.10,,,,Thursday
2,2016-05-13,5,15.85,,,,Friday
3,2016-05-14,6,14.22,,,,Saturday
4,2016-05-15,7,2.58,,,,Sunday
...,...,...,...,...,...,...,...
2572,2023-05-27,6,168.05,32.34,76.97,114.30,Saturday
2573,2023-05-28,7,44.62,32.85,80.21,91.25,Sunday
2574,2023-05-29,1,64.11,116.84,149.75,92.56,Monday
2575,2023-05-30,2,103.63,134.48,194.03,75.63,Tuesday


In [12]:
pre_covid = datetime(day = 1,month = 3,year = 2021).date()
after_covid = datetime(day = 1,month = 3,year = 2022).date()
df = df[(df["date"] < pre_covid) | (df["date"] > after_covid)]

In [13]:
for store in stores:
    fig = px.line(df, x="date", y=store, color="weekday_text", title=store)
    fig.show()

In [14]:
aggfuncList = {}

table = pd.pivot_table(df, values=stores, index='weekday_text', aggfunc=[np.mean, np.std])
table.columns = [f'{col}_{agg}' for col, agg in table.columns]
table



Unnamed: 0_level_0,mean_main street A,mean_main street B,mean_station A,mean_station B,std_main street A,std_main street B,std_station A,std_station B
weekday_text,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Friday,96.202753,122.09,153.224162,132.309362,13.459014,17.4123,29.017888,57.539247
Monday,54.950222,121.460556,152.209086,113.832766,19.336847,16.190518,27.752462,48.264783
Saturday,148.488956,35.314118,75.029848,103.749362,23.043453,5.308079,8.29349,11.284882
Sunday,56.420063,31.198824,75.15269,107.849787,21.335195,6.676195,8.08325,13.049768
Thursday,55.920316,122.245294,150.890609,115.332553,19.733532,14.300052,30.193218,42.141389
Tuesday,55.608444,128.156667,150.601574,120.03383,20.345778,19.212268,29.53048,46.692708
Wednesday,55.695678,130.295556,152.01899,135.590625,20.463847,11.115627,30.390583,60.714259


In [15]:
df.describe()

Unnamed: 0,weekday,main street A,main street B,station A,station B
count,2211.0,2211.0,122.0,1380.0,330.0
mean,4.001809,74.764202,99.367623,129.891326,118.437606
std,1.999547,38.677014,43.547492,42.839089,45.077501
min,1.0,1.35,18.7,50.24,42.51
25%,2.0,47.295,40.4425,84.1175,90.66
50%,4.0,62.95,118.115,136.02,108.89
75%,6.0,96.81,131.645,160.7775,129.3075
max,7.0,191.88,163.4,275.72,300.98


In [16]:
for store in stores:
    fig = px.histogram(df, x=store, color="weekday_text")
    fig.show()

In [17]:
from scipy import stats
import plotly.subplots as sp
import plotly.graph_objects as go

daysOfTheWeek = ["Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday"]

fig = sp.make_subplots(rows=len(stores), cols=len(daysOfTheWeek), subplot_titles=[day for day in daysOfTheWeek])

for i, store in enumerate(stores):
    for j, day in enumerate(daysOfTheWeek):
        qq = stats.probplot(df[df["weekday_text"] == day][store], dist='lognorm', sparams=(1))
        x = np.array([qq[0][0][0], qq[0][0][-1]])

        fig.add_trace(go.Scatter(x=qq[0][0], y=qq[0][1], mode='markers'), row=i+1, col=j+1)

        # Set custom axis labels for each plot
        if store==stores[0]:
            fig.update_xaxes(title_text=day, row=i+1, col=j+1)
        if day==daysOfTheWeek[0]:
            fig.update_yaxes(title_text=store, row=i+1, col=j+1)

        # Set x-axis range for each subplot
        x_range = [min(qq[0][0]), max(qq[0][0])]
        fig.update_xaxes(range=x_range, row=i+1, col=j+1)

    # Add line trace for each store
    fig.add_trace(go.Scatter(x=x, y=qq[1][1] + qq[1][0]*x, mode='lines'), row=i+1, col=1)

fig.update_layout(showlegend=False)
fig.show()

In [18]:
import plotly.graph_objects as go
from scipy import stats

daysOfTheWeek = ["Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday","Sunday"]
day = "Saturday"
store = "main street B"
qq = stats.probplot(df[df["weekday_text"]==day][store], dist='norm', sparams=(1))
x = np.array([qq[0][0][0], qq[0][0][-1]])

fig = go.Figure()
fig.add_scatter(x=qq[0][0], y=qq[0][1], mode='markers')
fig.add_scatter(x=x, y=qq[1][1] + qq[1][0]*x, mode='lines')
print(qq[1][1])
print(qq[1][0]*x)
print(x, qq[1][1] + qq[1][0]*x)
fig.layout.update(showlegend=False, title="Quantile-Quantile plot: {0} for {1}".format(store, day))
fig.show()



nan
[nan nan]
[-1.84925316  3.84925316] [nan nan]
