In [94]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os
import glob
from tqdm import tqdm
from pandas.tseries.offsets import DateOffset
import preprocessing as preprocessing

import torch
print(torch.__version__)
from torch_geometric.data import Data

#Lets start at src location
if os.path.exists("./src"):
    os.chdir("./src")

config = {
    "counter_files_path"                : "C:\\Users\\markoi\\Desktop\\Project-DARS\\traffic-density-MarkoLocal\\data\\counters_temporal_data_2023-03-03T09-24-06\\",
    "counters_nontemporal_aggregated"   : "C:\\Users\\markoi\\Desktop\\Project-DARS\\traffic-density-MarkoLocal\\data\\counters_non_temporal_aggregated_data.csv",
    "N_GRAPHS"                          : 30*24,
    "F_IN"                              : 7*24,
    "F_OUT"                             : 7*24,
    "target_col"                        : "Sum"
}

1.13.1+cpu


In [116]:
import importlib
importlib.reload(preprocessing)

class TrafficDataset:
    def __init__(self, config):
        self.config = config
        self.X = None
        self.Y = None

    def prepare_data(self):
        #First prepare general matrix X for all counters
        counters_df = pd.DataFrame()
        for fname in glob.glob(self.config["counter_files_path"] + "*.csv"):
            counter_data = pd.read_csv(fname)
            counter_data = preprocessing.fill_gaps(counter_data)
            counter_data['Date'] = pd.to_datetime(counter_data['Date']) 
            counter_data.index = counter_data['Date']
            counter_data = counter_data.sort_index(ascending=False)
            # We don't need to work with all past data.
            # Select enough data points to extract N_GRAPHS with F_IN and F_OUT timepoints
            
            counter_data = counter_data.iloc[0:(self.config["F_IN"]+self.config["F_OUT"]+self.config["N_GRAPHS"]-1), :]
            counter_id = fname.split('\\')[-1].split('.csv')[0]

            if counters_df.empty:
                counters_df = pd.DataFrame(counter_data[self.config['target_col']])
                counters_df.columns = [counter_id]
            else:
                columns = list(counters_df.columns) + [counter_id]
                counters_df = pd.concat([counters_df, counter_data[self.config['target_col']]], axis=1)
                counters_df.columns = columns 


        #Prepare edge_index matrix
        counters_aggregated = pd.read_csv(self.config['counters_nontemporal_aggregated'])
        edge_index, n_node, num_edges = preprocessing.construct_edge_index(counters_aggregated)

        #Prepare matrices X [N_GRAPHS, F_IN, N_NODES] and Y [N_GRAPHS, F_OUT, N_NODES] 
        graphs = []
        for i in range(1, self.config["N_GRAPHS"]+1):
            g = Data()
            g.__num_nodes__ = n_node
            g.edge_index = edge_index

            train_test_chunk = counters_df.iloc[(-i-(self.config['F_IN']+self.config['F_OUT'])):(-i),:]
            g.x = torch.FloatTensor(train_test_chunk.iloc[:self.config['F_IN'],:].to_numpy())
            g.y = torch.FloatTensor(train_test_chunk.iloc[self.config['F_IN']:,:].to_numpy())
            graphs += [g]
        
        return graphs

In [None]:
td = TrafficDataset(config)
dataset = td.prepare_data()