In [None]:
"""
    Handles imports and global variables
"""

# import tensorflow as tf
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
import pickle
import torch.
from collections import defaultdict

data_location = "./../MoviaFTP/"

In [17]:
"""
    Declarations of secondary functions for handling the data
"""

"""
    Loads data from npy file
"""
def load_data(fp = '', pickle = False):
    data = np.load(fp, allow_pickle = pickle )
    return data

"""
    Flattens the OD matrix for the forward route:
"""
def transform_matrix_fwd(matrix):
    
    link_counts = []
    for i in range(len(matrix)-1):
        sum = np.sum(matrix[0:i+1, i+1:])
        link_counts.append(sum)
    return link_counts

"""
    Flattens the OD matrix for the reverse route:
"""
def transform_matrix_bwd(matrix):
    link_counts = []
    for i in range(len(matrix)-1):
        sum = np.sum(matrix[i+1:, 0:i+1])
        link_counts.append(sum)
    link_counts.reverse()
    return link_counts

"""
    Returns: tuple (fwd,bwd) of the route counts of the matrices.
    Uses the transform_matrix_fwd and transform_matrix_bwd functions
"""
def transform_all(matrices):
    all_counts_fwd = []
    all_counts_bwd = []
    for i in range(len(matrices)):
        all_counts_fwd.append(transform_matrix_fwd(matrices[i]))
        all_counts_bwd.append(transform_matrix_bwd(matrices[i]))
    return all_counts_fwd, all_counts_bwd   

"""
    Reads the dates and returns them as strings
"""
def date_to_string(dates):
    date = ""
    new_dates = []
    for d in dates:
        date = str(d)[2:15]
        new_dates.append(date)
    return new_dates

""" 
    Transforms the route counts to a format that can be easily 
    plotted and visualized.
    We create a time series of data (in 1 hour intervals) for each link.
"""
def transform_to_plot_data(route_counts, start, stop):
    if not stop <= np.shape(route_counts)[0]:
        print("Error- interval end longer than route count matrix")
        return -1

    link_dict = {}
    for i in range(np.shape(route_counts)[1]):
        links = []
        link_num = i+1

        for j in range(start, stop):
            links.append(route_counts[j][i])

        link_dict[link_num] = links

    return link_dict

"""
    Plot the counts at each link over a given time interval.
    When entering in the links you wish to plot, subtract 1. 
    i.e. links = [0,1,2] if you wish to plot links 1, 2, 3.
    It's just an indexing thing. 
"""
def plot_route_counts(route_counts, start, stop, links, plot_type = 'line'):
    
    link_dict = transform_to_plot_data(route_counts, start, stop)
    fig = plt.figure(figsize=(20,8))
    
    if plot_type == 'line':
        for i in links:
            plt.plot(dates[start:stop], link_dict[i+1], label='link'+str(i+1))
    elif plot_type == 'bar':

        for i in links:
            plt.bar(list(np.arange(start, stop)), list(link_dict[i+1]))
            plt.xticks(np.arange(start, stop), date_to_string(dates[start:stop]), rotation = 90)

    plt.legend()
    plt.show()


In [21]:
"""
    Main data handling script
"""
demand_matrices = load_data(data_location + 'od-demand-202010-150.npy')

stop_names = load_data(data_location + 'od_stop-202010-150.npy', pickle = True)
print("bus stops: ", stop_names)

dates = load_data(data_location + 'od_time-202010-150.npy')
dates.shape = [np.shape(dates)[0], 1]
print("dimensions of date data: ", np.shape(dates))

# Finally we transform the data from matrices to link counts
route_counts_fwd, route_counts_bwd = transform_all(demand_matrices)
print("dimension of transformed matrices: ", np.shape(route_counts_fwd))
# test = transform_matrix_fwd(demand_matrices[5700])
# print("test: ", test)
# print(demand_matrices[5700])

train_set_size = int(0.8*np.shape(route_counts_fwd)[0])
validation_set_size = int(0.1*np.shape(route_counts_fwd)[0])
test_set_size = np.shape(route_counts_fwd)[0] - train_set_size - validation_set_size
assert(train_set_size+validation_set_size+test_set_size == np.shape(route_counts_fwd)[0])
print(train_set_size, validation_set_size, test_set_size)

x_train_fwd = route_counts_fwd[0:train_set_size]
x_train_bwd = route_counts_bwd[0:train_set_size]
x_valid_fwd = route_counts_fwd[validation_set_size:]
x_valid_bwd = route_counts_bwd[validation_set_size:]

# Network input size in hours
net_input_size = 72 
# Network output size in hours
net_output_size = 6


bus stops:  ['KDST' 'EGEV' 'HHLS' 'HHM' 'GLHO' 'NÆST' 'KLBV' 'RYST' 'HKP' 'NPST1']
dimensions of date data:  (5786, 1)
dimension of transformed matrices:  (5786, 9)
4628 578 580
