In [1]:
import numpy as np
import pandas as pd

BASE_TIME = pd.Timedelta('09:00:00')
SIZE = 10
left_time = np.round(np.random.randn(SIZE) * 10, 0)
left_time = 10 + np.absolute(left_time)

df = pd.DataFrame({
    "Data": pd.date_range("03/01/2024", periods=SIZE),
    "Espera": np.random.randint(15, 25, SIZE),
    "Duração da Viagem": 100 + np.random.randint(-5, 5, SIZE)
})

df["Horário de Saída"] = df["Data"] + BASE_TIME + pd.to_timedelta(left_time, unit='m')
df["Fim de Semana?"] = df["Data"].dt.weekday.isin([5, 6]).astype(np.int64)
df["Duração da Viagem"] = np.where(df["Fim de Semana?"] == 1, df["Duração da Viagem"] - 20, 
                                   df["Duração da Viagem"])

df

Unnamed: 0,Data,Espera,Duração da Viagem,Horário de Saída,Fim de Semana?
0,2024-03-01,15,100,2024-03-01 09:12:00,0
1,2024-03-02,24,78,2024-03-02 09:15:00,1
2,2024-03-03,15,81,2024-03-03 09:13:00,1
3,2024-03-04,18,95,2024-03-04 09:18:00,0
4,2024-03-05,17,101,2024-03-05 09:19:00,0
5,2024-03-06,15,100,2024-03-06 09:15:00,0
6,2024-03-07,23,95,2024-03-07 09:19:00,0
7,2024-03-08,20,104,2024-03-08 09:17:00,0
8,2024-03-09,21,83,2024-03-09 09:28:00,1
9,2024-03-10,24,78,2024-03-10 09:24:00,1


In [2]:
wd = df.copy()
wd["Data"] = wd["Data"].dt.strftime('%d/%m/%Y')
wd = wd.set_index("Data")

wd["Horário de Embarque"] = wd["Horário de Saída"] + pd.to_timedelta(wd["Espera"], unit='m')
wd["Horário de Chegada"] = wd["Horário de Embarque"] + pd.to_timedelta(wd["Duração da Viagem"], unit='m')

cols = wd.filter(like="Horário").columns
wd[cols] =  wd[cols].stack().dt.strftime('%H:%M').unstack()
wd = wd[cols].reset_index()

wd.head()

Unnamed: 0,Data,Horário de Saída,Horário de Embarque,Horário de Chegada
0,01/03/2024,09:12,09:27,11:07
1,02/03/2024,09:15,09:39,10:57
2,03/03/2024,09:13,09:28,10:49
3,04/03/2024,09:18,09:36,11:11
4,05/03/2024,09:19,09:36,11:17


In [3]:
print(wd.to_latex(index=False))

\begin{tabular}{llll}
\toprule
Data & Horário de Saída & Horário de Embarque & Horário de Chegada \\
\midrule
01/03/2024 & 09:12 & 09:27 & 11:07 \\
02/03/2024 & 09:15 & 09:39 & 10:57 \\
03/03/2024 & 09:13 & 09:28 & 10:49 \\
04/03/2024 & 09:18 & 09:36 & 11:11 \\
05/03/2024 & 09:19 & 09:36 & 11:17 \\
06/03/2024 & 09:15 & 09:30 & 11:10 \\
07/03/2024 & 09:19 & 09:42 & 11:17 \\
08/03/2024 & 09:17 & 09:37 & 11:21 \\
09/03/2024 & 09:28 & 09:49 & 11:12 \\
10/03/2024 & 09:24 & 09:48 & 11:06 \\
\bottomrule
\end{tabular}



In [4]:
ft = df.copy()

ft["Dia"] = ft["Data"].dt.weekday
ft["Final de Semana?"] = ft["Dia"].isin([5, 6]).astype(np.int64)
ft["Horário de Saída"] = ft["Horário de Saída"].dt.strftime('%M').astype("int64")

ft = ft[["Horário de Saída", "Final de Semana?", "Espera", "Dia", "Duração da Viagem"]]
ft

Unnamed: 0,Horário de Saída,Final de Semana?,Espera,Dia,Duração da Viagem
0,12,0,15,4,100
1,15,1,24,5,78
2,13,1,15,6,81
3,18,0,18,0,95
4,19,0,17,1,101
5,15,0,15,2,100
6,19,0,23,3,95
7,17,0,20,4,104
8,28,1,21,5,83
9,24,1,24,6,78


In [5]:
print(ft.to_latex(index=False))

\begin{tabular}{rrrrr}
\toprule
Horário de Saída & Final de Semana? & Espera & Dia & Duração da Viagem \\
\midrule
12 & 0 & 15 & 4 & 100 \\
15 & 1 & 24 & 5 & 78 \\
13 & 1 & 15 & 6 & 81 \\
18 & 0 & 18 & 0 & 95 \\
19 & 0 & 17 & 1 & 101 \\
15 & 0 & 15 & 2 & 100 \\
19 & 0 & 23 & 3 & 95 \\
17 & 0 & 20 & 4 & 104 \\
28 & 1 & 21 & 5 & 83 \\
24 & 1 & 24 & 6 & 78 \\
\bottomrule
\end{tabular}



In [6]:
lw = ft.copy()
WINDOW = 3

lw['H_0'] = lw['Horário de Saída']
lw['D_0'] = lw['Duração da Viagem']

for i in range(0, WINDOW+1):
    lw[f'H_{i}'] = ft['Horário de Saída'].shift(i)
    lw[f'D_{i}'] = ft['Duração da Viagem'].shift(i)

lw = lw.dropna()
lw = pd.concat([lw.iloc[:, 7:], lw.iloc[:, 5:7]], axis=1)

display(ft)
display(lw)
display(lw.dtypes)


Unnamed: 0,Horário de Saída,Final de Semana?,Espera,Dia,Duração da Viagem
0,12,0,15,4,100
1,15,1,24,5,78
2,13,1,15,6,81
3,18,0,18,0,95
4,19,0,17,1,101
5,15,0,15,2,100
6,19,0,23,3,95
7,17,0,20,4,104
8,28,1,21,5,83
9,24,1,24,6,78


Unnamed: 0,H_1,D_1,H_2,D_2,H_3,D_3,H_0,D_0
3,13.0,81.0,15.0,78.0,12.0,100.0,18,95
4,18.0,95.0,13.0,81.0,15.0,78.0,19,101
5,19.0,101.0,18.0,95.0,13.0,81.0,15,100
6,15.0,100.0,19.0,101.0,18.0,95.0,19,95
7,19.0,95.0,15.0,100.0,19.0,101.0,17,104
8,17.0,104.0,19.0,95.0,15.0,100.0,28,83
9,28.0,83.0,17.0,104.0,19.0,95.0,24,78


H_1    float64
D_1    float64
H_2    float64
D_2    float64
H_3    float64
D_3    float64
H_0      int64
D_0      int64
dtype: object

In [7]:
print(lw.to_latex(index=False, float_format="{:.0f}".format))

\begin{tabular}{rrrrrrrr}
\toprule
H_1 & D_1 & H_2 & D_2 & H_3 & D_3 & H_0 & D_0 \\
\midrule
13 & 81 & 15 & 78 & 12 & 100 & 18 & 95 \\
18 & 95 & 13 & 81 & 15 & 78 & 19 & 101 \\
19 & 101 & 18 & 95 & 13 & 81 & 15 & 100 \\
15 & 100 & 19 & 101 & 18 & 95 & 19 & 95 \\
19 & 95 & 15 & 100 & 19 & 101 & 17 & 104 \\
17 & 104 & 19 & 95 & 15 & 100 & 28 & 83 \\
28 & 83 & 17 & 104 & 19 & 95 & 24 & 78 \\
\bottomrule
\end{tabular}



In [8]:
# h_columns = [f'H_i' for i in range(0, WINDOW+1) if col.startswith('H')]
wf = lw.copy()
d_columns = [col for col in wf.columns if col != 'D_0' and col.startswith('D')]
h_columns = [col for col in wf.columns if col != 'H_0' and col.startswith('H')]

wf['H_m'] = wf[h_columns].mean(axis=1)
wf['D_m'] = wf[d_columns].mean(axis=1)

wf = pd.concat([wf.iloc[:,-2:], wf.iloc[:, -4:-2]], axis=1)
wf


Unnamed: 0,H_m,D_m,H_0,D_0
3,13.333333,86.333333,18,95
4,15.333333,84.666667,19,101
5,16.666667,92.333333,15,100
6,17.333333,98.666667,19,95
7,17.666667,98.666667,17,104
8,17.0,99.666667,28,83
9,21.333333,94.0,24,78


In [9]:
print(wf.to_latex(index=False, float_format="{:.2f}".format))

\begin{tabular}{rrrr}
\toprule
H_m & D_m & H_0 & D_0 \\
\midrule
13.33 & 86.33 & 18 & 95 \\
15.33 & 84.67 & 19 & 101 \\
16.67 & 92.33 & 15 & 100 \\
17.33 & 98.67 & 19 & 95 \\
17.67 & 98.67 & 17 & 104 \\
17.00 & 99.67 & 28 & 83 \\
21.33 & 94.00 & 24 & 78 \\
\bottomrule
\end{tabular}

