In [1]:
from gymnasium.envs.registration import register
import gymnasium as gym
from q_learn_code.agent.Agent import Agent
import numpy as np
import pandas as pd
from tqdm import tqdm
import pickle as pk
import json

In [2]:
city = 'London'

In [3]:
data = pd.read_csv("data_process/_RawData/" + city + "/POIs.csv")
quantity_of_POIS = len(data)

POI_to_ID = {}
ID_to_POI = {}
index = 0
for poi in list(data['poiID']):
    POI_to_ID[poi] = index
    ID_to_POI[index] = poi
    index = index + 1

In [4]:
register(
     id="MARCPTR",
     entry_point="q_learn_code.envs:MARCPTR",
     max_episode_steps=300,
)

In [5]:
env = gym.make('q_learn_code.envs:MARCPTR')

In [6]:
env.set_size(quantity_of_POIS)

30

In [7]:
file = open('data_process/' + city + '/sequence', 'rb')
sequence = pk.load(file)

startpoint = {}
for item in sequence:
    start = sequence[item][0]
    end = sequence[item][-1]
    startpoint[item] = (start, end)
    
#startpoint

In [8]:
len(startpoint)

2226

In [9]:
number_of_users = len(startpoint)

In [10]:
def train_model(agent, n_episodes):
    #env = gym.wrappers.RecordEpisodeStatistics(env, deque_size=n_episodes)
    total_rw = []
    for episode in tqdm(range(n_episodes)):
        obs, info = env.reset()
        done = False

        # play one episode
        rw = 0
        while not done:
            action = agent.get_action(obs['agent'], env)

            next_obs, reward, terminated, truncated, info = env.step(action)
            
            rw = rw + reward

            # update the agent
            agent.update(obs['agent'], action, reward, terminated, next_obs['agent'])

            # update if the environment is done and the current obs
            done = terminated or truncated

            obs = next_obs
        total_rw.append(rw)
        #print(rw)
        #print(reward)

        agent.decay_epsilon()
    return np.array(total_rw)

In [13]:
# hyperparameters
learning_rate = 0.01
n_episodes = 100
start_epsilon = 1.0
epsilon_decay = start_epsilon / (n_episodes / 2)  # reduce the exploration over time
final_epsilon = 0.1
#################


In [14]:
reward = 0
Q_TABLES = {}
for item in startpoint:
    agent = Agent(learning_rate=learning_rate,initial_epsilon=start_epsilon,
              epsilon_decay=epsilon_decay,final_epsilon=final_epsilon, q_table_size=quantity_of_POIS)
    
    env.reset(start=POI_to_ID[startpoint[item][0]], end=POI_to_ID[startpoint[item][1]])
    env.define_user(item, city)
    reward = reward + train_model(agent, n_episodes)
    Q_TABLES[item] = agent.q_values
    

100%|████████████████████████████████████████████████████████████████████████| 100/100 [00:00<00:00, 562.00it/s]
100%|████████████████████████████████████████████████████████████████████████| 100/100 [00:00<00:00, 629.25it/s]
100%|████████████████████████████████████████████████████████████████████████| 100/100 [00:00<00:00, 538.24it/s]
100%|████████████████████████████████████████████████████████████████████████| 100/100 [00:00<00:00, 573.14it/s]
100%|████████████████████████████████████████████████████████████████████████| 100/100 [00:00<00:00, 528.46it/s]
100%|████████████████████████████████████████████████████████████████████████| 100/100 [00:00<00:00, 503.55it/s]
100%|████████████████████████████████████████████████████████████████████████| 100/100 [00:00<00:00, 515.35it/s]
100%|████████████████████████████████████████████████████████████████████████| 100/100 [00:00<00:00, 492.63it/s]
100%|████████████████████████████████████████████████████████████████████████| 100/100 [00:00<00

In [15]:
reward = reward / number_of_users

In [17]:
with open("results/" + city + "/reward","wb") as file:
    pk.dump(reward, file)

In [18]:
recomendation = {}

In [19]:
def get_shortest_path(q_values, start, end):
    if(start == end):
        return [start]
    next_obs, info = env.reset(start=start, end=end)
    #print(next_obs)
    done = False
    shortest_path = []
    shortest_path.append(start)
    while not done:
        action_index = np.argmax(q_values[next_obs['agent'][1]])
        #print('proximo POI', action_index)
        while(action_index in shortest_path):
            q_values[:, np.argmax(q_values[next_obs['agent'][1]])] = -1000
            action_index = np.argmax(q_values[next_obs['agent'][1]])
            #print('proximo POI - inside while', action_index)
        next_obs, reward, terminated, truncated, info = env.step(action_index)
        #print(next_obs, terminated)
        shortest_path.append(next_obs['agent'][1])
        done = terminated or truncated
    return shortest_path

In [20]:
biggest_seq = 0
recomendation = {}
for item in startpoint:
    q_values = Q_TABLES[item].copy()
    env.reset(start=POI_to_ID[startpoint[item][0]], end=POI_to_ID[startpoint[item][1]])
    sp = get_shortest_path(q_values, POI_to_ID[startpoint[item][0]], POI_to_ID[startpoint[item][1]])
    if len(sp) > biggest_seq:
        biggest_seq = len(sp)
    sp = [ID_to_POI[item] for item in sp]
    recomendation[item] = (sequence[item], sp)

In [21]:
recomendation

{'100153087@N08': ([8], [8]),
 '100228151@N03': ([3], [3]),
 '100243935@N07': ([6], [6]),
 '100251963@N05': ([12], [12]),
 '10028574@N02': ([1], [1]),
 '100404341@N06': ([5], [5]),
 '100470389@N06': ([3], [3]),
 '100595443@N07': ([5], [5]),
 '10073165@N03': ([11], [11]),
 '10076455@N04': ([19], [19]),
 '100940221@N03': ([26], [26]),
 '100978997@N07': ([5, 26], [5, 15, 22, 4, 10, 19, 18, 8, 30, 26]),
 '100987139@N04': ([2], [2]),
 '100991133@N03': ([5], [5]),
 '101053609@N05': ([28], [28]),
 '101090910@N07': ([16], [16]),
 '101133171@N06': ([26], [26]),
 '101212512@N07': ([16, 22, 1, 3],
  [16,
   8,
   22,
   20,
   4,
   25,
   6,
   9,
   1,
   15,
   19,
   7,
   23,
   30,
   21,
   24,
   18,
   29,
   2,
   27,
   26,
   10,
   13,
   5,
   11,
   17,
   14,
   12,
   28,
   3]),
 '10135721@N02': ([5], [5]),
 '101419012@N05': ([3], [3]),
 '101463652@N02': ([5], [5]),
 '10147029@N08': ([25], [25]),
 '101485105@N07': ([28], [28]),
 '101599161@N06': ([3], [3]),
 '101677044@N07': ([2

In [22]:
user_app_demand = {}
file = open("data_process/_RawData/" + city + "/user_apps.in", "r")
data = file.readlines()
for i in data:
    user = i.rsplit('"')[0].rsplit()[0]
    app = i.rsplit('"')[1]
    
    if(app == 'Augmented reality'):
        user_app_demand[user] = {'bandwidth':(1, 10), 'processing':(0.1, 1)}
    elif(app=='Video streaming'):
        user_app_demand[user] = {'bandwidth':(1.5, 25), 'processing':(0, 0)}
    elif(app=='Video streaming and augmented reality'):
        user_app_demand[user] = {'bandwidth':(2.5, 35), 'processing':(0.1, 1)}
    elif(app=='No application'):
        user_app_demand[user] = {'bandwidth':(0,0), 'processing':(0, 0)}

In [23]:
data = pd.read_csv("data_process/_RawData/" + city + "/touristsVisits.csv")
users_id = data['userID'].unique()
for i in users_id:
    if i not in user_app_demand:
        user_app_demand[i] = {'bandwidth': (0, 0), 'processing': (0, 0)}

In [24]:
len(user_app_demand)

2226

In [25]:
band_demand_for_user = dict.fromkeys(list(startpoint.keys()), None)
mec_demand_for_user = dict.fromkeys(list(startpoint.keys()), None)
for item in list(startpoint.keys()):
    band_demand_for_user[item] = []
    mec_demand_for_user[item] = []

for t in range(biggest_seq):
    for p in range(quantity_of_POIS):
    
        list_of_user = []
        list_of_b_dem = []
        list_of_m_dem = []
        for item in recomendation:
            #print(t)
            #print(recomendation[item][1][t])
            if (len(recomendation[item][1]) > t) and recomendation[item][1][t] == ID_to_POI[p]:
                #print(recomendation[item][1][t], ID_to_POI[p])
                list_of_user.append(item)
                list_of_b_dem.append(user_app_demand[item]['bandwidth'][1])
                list_of_m_dem.append(user_app_demand[item]['processing'][1])
                
                factor = 0.05
                while(np.sum(list_of_b_dem)) > 75:
                    list_of_b_dem = [x * (1-factor) for x in list_of_b_dem]
                    factor = factor + 0.05  
                factor = 0.05
                while(np.sum(list_of_m_dem)) > 75:
                    list_of_m_dem = [x * (1-factor) for x in list_of_m_dem]
                    factor = factor + 0.05     
        #print(list_of_dem)
        #print(list_of_user)
        for dem in range(len(list_of_b_dem)):
            band_demand_for_user[list_of_user[dem]].append(list_of_b_dem[dem])
            mec_demand_for_user[list_of_user[dem]].append(list_of_m_dem[dem])
            
#list_of_user

In [26]:
band_demand_for_user

{'100153087@N08': [0.010204103901545305],
 '100228151@N03': [0.0],
 '100243935@N07': [0.0],
 '100251963@N05': [18.16875],
 '10028574@N02': [4.0708372521926107e-07],
 '100404341@N06': [1.3146835323459085e-32],
 '100470389@N06': [2.1398138839777262e-21],
 '100595443@N07': [0.0],
 '10073165@N03': [1.6686346821512813e-11],
 '10076455@N04': [1.155255490438091e-12],
 '100940221@N03': [8.358295264227843e-21],
 '100978997@N07': [1.840556945284269e-32,
  0.00409323979001111,
  1.9481976782755028,
  0.052156082893900516,
  3.319750538351226,
  1.265938851343422,
  0.7079562058345437,
  0.4399036325646674,
  0.14696476518173449,
  1.5917319380862143],
 '100987139@N04': [4.661610259627653e-11],
 '100991133@N03': [1.3146835323459085e-32],
 '101053609@N05': [5.433932589062753e-06],
 '101090910@N07': [7.897623962991884e-15],
 '101133171@N06': [0.0],
 '101212512@N07': [7.897623962991884e-15,
  0.0005141970108314936,
  1.391569770196788,
  1.3097127248910942,
  0.08708068395685371,
  0.9670779347815551

In [27]:
len(mec_demand_for_user)

2226

In [28]:
len(recomendation)

2226

In [29]:
FINAL_RESULTS = {}

In [30]:
for item in users_id:
    FINAL_RESULTS[item] = {'original':recomendation[item][0], 
                           'recommendation':recomendation[item][1],
                           'demand_bandwidth':user_app_demand[item]['bandwidth'],
                           'allocated_bandwidth':band_demand_for_user[item],
                           'demand_mec':user_app_demand[item]['processing'],
                           'allocated_mec':mec_demand_for_user[item]}

In [31]:
with open('results/' + city + '/result.json', 'w') as fp:
    json.dump(FINAL_RESULTS, fp, indent=4, default=int)