In [13]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os
import glob
from tqdm import tqdm
from pandas.tseries.offsets import DateOffset
#from torch_geometric.data import Data

#Lets start at src location
if os.path.exists("./src"):
    os.chdir("./src")


config = {
    "counter_files_path" : "../data/counters_temporal_data_2023-03-03T09-24-06/",
    "N_GRAPHS"           : 30*24,
    "F_IN"               : 7*24,
    "F_OUT"              : 7*24,
    "target_col"         : "Sum"
}

In [14]:
class TrafficDataset:
    def __init__(self, config):
        self.config = config
        self.X = None
        self.Y = None

    def prepare_data(self):
        #First prepare general matrix X for all counters
        counters_df = pd.DataFrame()
        for fname in glob.glob(self.config["counter_files_path"] + "*.csv"):
            counter_data = pd.read_csv(fname)
            counter_data['Date'] = pd.to_datetime(counter_data['Date']) 
            counter_data.index = counter_data['Date']
            counter_data = counter_data.sort_index(ascending=False)
            # We don't need to work with all past data.
            # Select enough data points to extract N_GRAPHS with F_IN and F_OUT timepoints
            
            counter_data = counter_data.iloc[0:(self.config["F_IN"]+self.config["F_OUT"]+self.config["N_GRAPHS"]-1), :]
            counter_id = fname.split('/')[-1].split('.csv')[0]

            if counters_df.empty:
                counters_df = pd.DataFrame(counter_data[self.config['target_col']])
                counters_df.columns = [counter_id]
            else:
                columns = list(counters_df.columns) + [counter_id]
                counters_df = pd.concat([counters_df, counter_data[self.config['target_col']]], axis=1)
                counters_df.columns = columns 

        return counters_df

            

In [15]:
td = TrafficDataset(config)
df = td.prepare_data()

In [16]:
df

Unnamed: 0_level_0,1016-1,1050-1,0837-1,0890-1,0178-1,1029-1,0853-1,0815-2,0870-2,0840-2,...,0802-1,1048-1,0802-2,0645-2,1007-2,1018-2,0875-2,1051-1,1022-2,0836-1
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2022-02-09 15:00:00+00:00,,,,,,,,,,,...,,,,,,,,,,
2022-02-09 16:00:00+00:00,,,,,,,,,,,...,,,,,,,,,,
2022-02-09 17:00:00+00:00,,,,,,,,,,,...,,,,,,,,,,
2022-02-09 18:00:00+00:00,,,,,,,,,,,...,,,,,,,,,,
2022-02-09 19:00:00+00:00,,,,,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2023-03-03 04:00:00+00:00,572.0,344.0,361.0,303.0,1403.0,1063.0,969.0,221.0,809.0,984.0,...,857.0,313.0,943.0,1607.0,508.0,719.0,631.0,258.0,108.0,1263.0
2023-03-03 05:00:00+00:00,1111.0,772.0,675.0,709.0,3104.0,1717.0,3037.0,371.0,1474.0,1692.0,...,1408.0,442.0,2178.0,3875.0,1056.0,1490.0,792.0,629.0,145.0,1707.0
2023-03-03 06:00:00+00:00,1415.0,842.0,691.0,744.0,3327.0,1615.0,3119.0,384.0,1179.0,1726.0,...,1530.0,397.0,2149.0,3523.0,1138.0,1892.0,662.0,656.0,149.0,1560.0
2023-03-03 07:00:00+00:00,1118.0,774.0,658.0,709.0,2420.0,1322.0,2225.0,325.0,998.0,1247.0,...,1356.0,362.0,1706.0,2106.0,1249.0,1411.0,559.0,592.0,204.0,1444.0
