In [1]:
import pandas as pd
import pickle
import networkx as nx
import numpy as np
import random
import matplotlib.pyplot as plt
import os
import gymnasium as gym
from stable_baselines3 import A2C
import re
import tensorboard




In [17]:
def process_date_folder(date_str: str) -> dict:
    date = date_str.split("-")
    file_path = f"./sunt/{date_str}/output/trips_time-series_{date[2]}-{date[1]}-{date[0]}_{date[2]}-{date[1]}-{date[0]}.parquet"
    if not os.path.exists(file_path):
        print(f"File not found for date: {file_path}")
        return None
    
    df = pd.read_parquet(file_path)
    time_per_stop = {}  # array where [total time, number of trips]
    
    print(f"Processing {date_str}...")
    i = 0
    for index, row in df.iterrows():
        if index == 0:
            continue

        previous_row = df.loc[index-1]
    
        if previous_row["linha_atend"] != row["linha_atend"]:
            continue

        stop_pair = (previous_row["stop_id"], row["stop_id"])

        if stop_pair not in time_per_stop:
            time_per_stop[stop_pair] = [0, 0]

        time_per_stop[stop_pair][0] += row["tempo_total"]
        time_per_stop[stop_pair][1] += 1

        if index / len(df.index) * 100 > i: 
            i += 1
            print(f"\r{i}% of {len(df.index)} rows for {date_str}", end='')
    
    return time_per_stop

In [15]:
# Get all date folders
base_path = "./sunt"
date_folders = [f for f in os.listdir(base_path) 
                if os.path.isdir(os.path.join(base_path, f)) 
                and re.match(r'\d{4}-\d{2}-\d{2}', f)
                ]
date_folders.sort()

# Process all dates and combine results
combined_averages = {}


for index, date_folder in enumerate(date_folders):
    print(f"{index}/{len(date_folders)}")
    result = process_date_folder(date_folder)

    if result == None:
        continue
    
    with open(f"./output/averages_{date_folder}.pkl", "wb") as f:
        pickle.dump(result, f)
    print(f"\nSaved results for {date_folder}")
    
    print(f"Combining {date_folder}...", end="\n\n")
    for stop_pair, (total_time, count) in result.items():
        if stop_pair not in combined_averages:
            combined_averages[stop_pair] = [0, 0]
        combined_averages[stop_pair][0] += total_time
        combined_averages[stop_pair][1] += count

print(f"Saving combined averages...")
with open(f"./output/combined_sum_amount.pkl", "wb") as f:
    pickle.dump(combined_averages, f)


final_averages = {
    stop_pair: total_time / count 
    for stop_pair, (total_time, count) in combined_averages.items()
}

final_averages

0/149
Processing 2024-03-01...
100% of 739370 rows for 2024-03-01
Saved results for 2024-03-01
Combining 2024-03-01...

1/149
Processing 2024-03-02...
100% of 520832 rows for 2024-03-02
Saved results for 2024-03-02
Combining 2024-03-02...

2/149
Processing 2024-03-03...
100% of 365040 rows for 2024-03-03
Saved results for 2024-03-03
Combining 2024-03-03...

3/149
Processing 2024-03-04...
100% of 740936 rows for 2024-03-04
Saved results for 2024-03-04
Combining 2024-03-04...

4/149
Processing 2024-03-05...
100% of 750433 rows for 2024-03-05
Saved results for 2024-03-05
Combining 2024-03-05...

5/149
Processing 2024-03-06...
100% of 751793 rows for 2024-03-06
Saved results for 2024-03-06
Combining 2024-03-06...

6/149
Processing 2024-03-07...
100% of 753069 rows for 2024-03-07
Saved results for 2024-03-07
Combining 2024-03-07...

7/149
Processing 2024-03-08...
100% of 735271 rows for 2024-03-08
Saved results for 2024-03-08
Combining 2024-03-08...

8/149
Processing 2024-03-09...
100% of 5

{('44782944', '44784618'): 96.22135675373136,
 ('44784618', '44784340'): 47.69062499564676,
 ('44784340', '44782337'): 164.54141864583335,
 ('44782337', '44784470'): 175.24628490183514,
 ('44784470', '45834599'): 168.76636904464286,
 ('45834599', '45834758'): 76.24211808493466,
 ('45834758', '44784339'): 118.91685546617981,
 ('44784339', '200498925'): 108.67062468361154,
 ('200498925', '44784471'): 51.794871653917326,
 ('44784471', '44784483'): 51.18155123604881,
 ('44784483', '46022126'): 48.75709820336273,
 ('46022126', '44783450'): 44.21211193603348,
 ('44783450', '46021858'): 42.20077676894769,
 ('46021858', '46021859'): 33.88050772409966,
 ('46021859', '44784482'): 29.638442467985755,
 ('44784482', '69621796'): 23.377619881368094,
 ('69621796', '43968748'): 20.25746282157379,
 ('43968748', '45834755'): 66.70414403080602,
 ('45834755', '192365034'): 41.810237470122956,
 ('192365034', '43968713'): 47.513071820372716,
 ('43968713', '44782577'): 99.9737934277266,
 ('44782577', '447825

In [2]:
with open('./sunt/graph_designer/graph_gtfs.gpickle', 'rb') as f:
    G = pickle.load(f)
G

<networkx.classes.digraph.DiGraph at 0x104ad05e0>

In [3]:
class RewardBaseClass():
    def getReward(self, state, action, target, graph):
        raise NotImplementedError

class StopConditionBaseClass():
    def isTerminated(self,state, action, target, graph):
        raise NotImplementedError

class DefaultReward(RewardBaseClass):
    def __init__(self) -> None:
        super().__init__()
        with open('./output/combined_sum_amount.pkl', 'rb') as f:
            self.waitTimeDict = pickle.load(f) 


    def getReward(self, state, action, target, graph):
        possible_actions = list(graph.neighbors(state))

        if action >= len(possible_actions):
            return -100000
        
        totalTime = self.waitTimeDict[(state, possible_actions[action])][0]
        amount = self.waitTimeDict[(state, possible_actions[action])][1]

        reward = -totalTime / amount

        if state == target:
            reward = reward/2

        return reward
    
class DefaultStopClass(StopConditionBaseClass):
    def isTerminated(self, state, action, target, graph):
        return state == target

    
        

In [66]:
class tccEnv(gym.Env):
    metadata = {"render_modes": ["human", "rgb_array"], "render_fps": 4}

    def __init__(self, network: nx.Graph, actions_amout: int, stopClass = None, rewardClass = None):
        self.network = network 

        self.state = list(self.network.nodes)[random.randint(0, self.network.number_of_nodes()-1)]
        self.target = list(self.network.nodes)[random.randint(0, self.network.number_of_nodes()-1)]

        while(self.state == self.target and self.network.number_of_nodes != 1):
            self.target = list(self.network.nodes)[random.randint(0, self.network.number_of_nodes()-1)]


        self.stop = DefaultStopClass() if stopClass is None else stopClass 
        self.reward = DefaultReward() if rewardClass is None else rewardClass 

        self.action_space = gym.spaces.Discrete(actions_amout)
        self.observation_space = gym.spaces.Box(low=0, high=np.array([self.network.number_of_nodes(), self.network.number_of_nodes()]), shape=(2,), dtype=np.int32)

        self.count = 0
        
    def reset(self,seed=None):
        super().reset(seed=seed)
        self.state = list(self.network.nodes)[random.randint(0, self.network.number_of_nodes()-1)]
        self.target = list(self.network.nodes)[random.randint(0, self.network.number_of_nodes()-1)]

        while(self.state == self.target and self.network.number_of_nodes != 1):
            self.target = list(self.network.nodes)[random.randint(0, self.network.number_of_nodes()-1)]
        
        self.count = 0

        obs = self.state
        return obs, {}
    
    

    def step(self,action):
        self.count += 1
        print(action)

        terminated = self.stop.isTerminated(self.state, action, self.target, self.network)
        
        reward = self.reward.getReward(self.state, action, self.target, self.network)

        possible_actions = list(self.network.neighbors(self.state))

        if len(possible_actions) == 0:
            terminated = True
            reward = -100000000
        else:
            self.state = possible_actions[action]

        obs = self.state
        
        return obs, reward, terminated, False, {"count" : self.count}



In [67]:
env = tccEnv(G, 9)

In [79]:
def run_q(episodes, env: tccEnv, is_training=True):

    if(is_training):
        q = np.zeros((env.network.number_of_nodes(), env.action_space.n))
    else:
        f = open('q.pkl', 'rb')
        q = pickle.load(f)
        f.close()

    # Hyperparameters
    learning_rate_a = 0.9   
    discount_factor_g = 0.9 
    epsilon = 1             


    reward_per_episode = np.zeros(episodes)

    dict = {}
    seen_nodes = 0
    for i in range(episodes):
        state = env.reset()[0]

        if state not in dict:
            dict[state] = seen_nodes
            seen_nodes += 1

        terminated = False

        while(not terminated):

            a = random.random()    

            if is_training and a< epsilon:
                action = random.randint(0, max(0,len(list(env.network.neighbors(env.state))) -1) )
                print(f"{action} {env.state} {list(env.network.neighbors(env.state))}")
            else:                
                q_state_idx = (state) 

                action = np.argmax(q[dict[q_state_idx]])
            
            new_state,reward,terminated,_,extra = env.step(action)
            
            if new_state not in dict:
                dict[new_state] = seen_nodes
                seen_nodes += 1

            q_state_action_idx = (dict[state],action)

            q_new_state_idx = (dict[new_state])

            if is_training:
                q[q_state_action_idx] = q[q_state_action_idx] + learning_rate_a * (
                        reward + discount_factor_g * np.max(q[q_new_state_idx]) - q[q_state_action_idx]
                )

            state = new_state

            # reward_per_episode[i] += reward

            # c = extra["count"]

            #print(f"{action} {new_state} {reward} {terminated} {c}")

        epsilon = max(epsilon - 1/episodes, 0)

    # env.close()

    # sum_reward = np.zeros(episodes)
    # for t in range(episodes):
    #     sum_reward[t] = np.mean(reward_per_episode[max(0, t-100):(t+1)]) 
    # plt.plot(sum_reward)
    # plt.savefig('q.png')

    # if is_training:
    #     # Save Q Table
    #     f = open("q.pkl","wb")
    #     pickle.dump(q, f)
    #     f.close()
    # return reward_per_episode


In [78]:
run_q(100, env, is_training=True)

0 59122403 ['59122400']
0
0 59122400 ['59122401']
0
2 59122401 ['269617783', '59122399', '59122402']
2
1 59122402 ['56314605', '82414047', '82414048']
1
0 82414047 ['153081313', '82414045', '82414046']
0
0 153081313 ['102324560']
0
0 102324560 ['153081312']
0
0 153081312 ['82414041']
0
0 82414041 ['69520249']
0
0 69520249 ['82414043']
0
0 82414043 ['69373013']
0
0 69373013 ['63768247', '82414044']
0
0 63768247 ['63768249', '63768250']
0
0 63768249 ['63768250']
0
0 63768250 ['63768252']
0
0 63768252 ['63768254']
0
0 63768254 ['44782596']
0
0 44782596 ['215205214', '45833890']
0
0 215205214 ['45833892']
0
1 45833892 ['129107065', '44783680']
1
0 44783680 ['44784356']
0
0 44784356 ['44165093']
0
0 44165093 ['44784358']
0
0 44784358 ['49367711']
0
1 49367711 ['269085280', '44783684', '45832860']
1
0 44783684 ['269085280', '45832860']
0
0 269085280 ['45832860']
0
0 45832860 ['45833706']
0
2 45833706 ['262857551', '262857552', '45833048']
2
2 45833048 ['44783678', '44784150', '46021751']
2
0

IndexError: list index out of range

In [76]:
terminated = False
for i in range(100):
    while(not terminated):
        action = random.randint(0, max(0,len(list(env.network.neighbors(env.state))) -1) )
        new_state,reward,terminated,_,extra = env.step(action)

        print(new_state, reward, terminated, extra)

0
92406083 -98.85136317381689 False {'count': 1637}
0
45833247 -150.68374191972575 False {'count': 1638}
0
45833834 -43.73652865461702 False {'count': 1639}
0
47568278 -80.91159424768604 False {'count': 1640}
0
46021628 -98.72761910585986 False {'count': 1641}
1
47565790 -97.55092937209878 False {'count': 1642}
0
47565760 -71.00962081035412 False {'count': 1643}
0
45833189 -86.44763065273288 False {'count': 1644}
0
44784410 -65.128349502574 False {'count': 1645}
0
45833757 -52.0172383912484 False {'count': 1646}
0
153089556 -37.133658382882885 False {'count': 1647}
0
44782875 -99.40199815122264 False {'count': 1648}
0
66118616 -301.4148681640927 False {'count': 1649}
0
149965406 -269.49585696277916 False {'count': 1650}
0
149965407 -94.10308105583127 False {'count': 1651}
0
45833873 -137.1785566557072 False {'count': 1652}
0
44782479 -159.64609180955335 False {'count': 1653}
1
45834165 -94.43296820775797 False {'count': 1654}
0
44782980 -82.95275824211228 False {'count': 1655}
0
447824

In [57]:
def train_sb3():
    # Where to store trained model and logs
    model_dir = "models"
    log_dir = "logs"
    os.makedirs(model_dir, exist_ok=True)
    os.makedirs(log_dir, exist_ok=True)

    env = tccEnv(G, 9)

    # Use Advantage Actor Critic (A2C) algorithm.
    # Use MlpPolicy for observation space 1D vector.
    model = A2C('MlpPolicy', env, verbose=1, device='cuda', tensorboard_log=log_dir)
   
    # This loop will keep training until you stop it with Ctr-C.
    # Start another cmd prompt and launch Tensorboard: tensorboard --logdir logs
    # Once Tensorboard is loaded, it will print a URL. Follow the URL to see the status of the training.
    # Stop the training when you're satisfied with the status.
    TIMESTEPS = 1000
    iters = 0
    while True:
        iters += 1

        model.learn(total_timesteps=TIMESTEPS, reset_num_timesteps=False) # train
        model.save(f"{model_dir}/a2c_{TIMESTEPS*iters}") # Save a trained model every TIMESTEPS

In [59]:
train_sb3()

Using cpu device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.


ImportError: Trying to log data to tensorboard but tensorboard is not installed.