In [None]:
try:
  from google.colab import drive
  IN_COLAB=True
except:
  IN_COLAB=False

if IN_COLAB:
  print("We're running Colab")

In [None]:
if IN_COLAB:
  # Mount the Google Drive at mount
  mount='/content/drive'
  print("Colab: mounting Google drive on ", mount)

  drive.mount(mount)

  # Switch to the directory on the Google Drive that you want to use
  import os
  drive_root = mount + "/My Drive/Thesis_coding_with_IAE"
  
  # Change to the directory
  print("\nColab: Changing directory to ", drive_root)
  %cd $drive_root

In [None]:
import pandas as pd
import scipy
import scipy.spatial
import networkx as nx
import numpy as np
from collections import deque
import collections
import pickle

# for plotting graphs
import matplotlib.pyplot as plt

from CommonDefs import Point, Edge 
from TwoDimConvexHull import TwoDimConvexHull, PrintTwoDimConvexHull
from Utils import nearestKNeighboursOfEdgeMidPt, checkTriangleForDelaunayCriteria
from Graph import Graph
from DTEnv import DTEnv
from DTDoubleDQNAgent import DTDoubleDQNAgent

In [None]:
pd.options.display.max_columns = None

In [None]:
import warnings
warnings.filterwarnings("ignore")

In [None]:
np.random.seed(30)
import random as rn
rn.seed(30)
from keras import backend as K
import tensorflow as tf
tf.random.set_seed(30)

In [None]:
df = pd.read_csv("D:\Thesis_Experiments\Data\DT_5_sorted.txt", sep=" ", header=None)
df.head()

In [None]:
df.shape

In [None]:
pointData = {}
for i in range(len(df)):
    pointId = 1
    points = []
    for j in range(0 , len(df.columns), 2):
        if df.loc[i, j] == "output":
            dtStartIdx = j + 1
            break
        else:
            points.append(Point(pointId, df.loc[i, j], df.loc[i, j + 1]))
            pointId = pointId + 1
    pointData[i] = points

#for key, value in pointData.items():
    #print('key: {}, value: {}'.format(key, value)


In [None]:
# Initialising the environment
env = DTEnv()

agent = DTDoubleDQNAgent(env)

# tracking average reward per episode = total rewards in an episode/ total steps in an episode
avg_reward = []

# tracking total rewards per episode
total_reward  = []

In [None]:
states_track = collections.defaultdict(dict)
def initialise_tracking_states(state, action):
    states_track[tuple(state)][tuple(action)] = []    #this is an array which will have appended values of that state-action pair for every 2000th episode   

In [None]:
# This function will append latest Q-values of the 6 Q-values which are being tracked for checking convergence
def save_tracking_states(agent):
    for state in states_track.keys():
        for action in states_track[state].keys():
            Q = agent.prediction(state, [action])
            states_track[state][action].extend(Q)      

In [None]:
#Defining a function to save the object as a pickle file
def save_obj(obj, name ):
    with open(name + '.pkl', 'wb') as f:
        pickle.dump(obj, f, pickle.HIGHEST_PROTOCOL)

In [None]:
initializeModel = True

numData = 200

episodeStart = 0
numEpisodes = 200 * 30
currentEpisode = episodeStart;

for i in range(0, numData):     
    num_states_tracked = 0

    # reset epsilon start value and memory for each new configuration but keep the model parameters
    # learned from the previous configuration
    agent.reset()
    
    for episode in range(currentEpisode, numEpisodes):

        # tracking total rewards, step count
        tot_reward = 0
        step_count = 0

        #Reset the environment/Clear the previous states
        env.reset()
        env.initialize(pointData[i])  
        
        if initializeModel:
            agent.initializeModel(env)
            initializeModel = False

        _, state = env.getStartState()
        terminal_state = False

        while not terminal_state:

            #Get the free edge from the list
            edgeToProcess = env.getEdgesToProcess()[0]
                        
            action, epsilon = agent.get_action(state, edgeToProcess, episode)            
            
            reward = env.getReward(edgeToProcess, action)        
            next_state = env.getNextState(edgeToProcess, action)
            env.removeProcessedEdge(edgeToProcess)
            terminal_state = env.isTerminalState()
            
            # save the sample <s, a, r, s'> to the replay memory
            agent.append_sample(state, action, reward, next_state, terminal_state)
            
            # every time step do the training
            agent.train_model()
            tot_reward += reward
            state = next_state
            step_count += 1
            if terminal_state:
                # every episode update the target model to be same with model
                agent.update_target_model()
                
            if reward > 0 and num_states_tracked < 3:
                initialise_tracking_states(state, action)
                save_tracking_states(agent)
                num_states_tracked += 1

            # Store the rewards
            if terminal_state and episode % 5 ==0:
                avg_reward.append(tot_reward/step_count)
                total_reward.append(tot_reward)
                print("pt Idx: ", i, "epi:", episode, "  score:", tot_reward, "  eps:", epsilon)
                
        if episode % 100 == 0:
            save_tracking_states(agent)  
            
        if episode % 100 == 0:
            agent.save("./DDQN_Delaunay.h5")
        
        if episode % 200 == 0: 
            save_obj(states_track,'DDQN_States_tracked')
    
        if episode % 1000 ==0 and episode !=0:
            plt.plot(list(range(len(avg_reward))), avg_reward)
            plt.show()
            
        if (episode % 30 == 0 and episode !=0):
            currentEpisode = episode + 1           
            break
    

In [None]:
agent.save("./DDQN_Delaunay11.h5")

In [None]:
i = 0
for key, value in states_track.items():
    if i > 3:
        break
    print(i)
    print(key)
    print (value)
    i += 1

In [None]:
plt.figure(0, figsize=(16,7))


xaxis = np.asarray(range(500))
plt.subplot(243)
plt.plot(xaxis,np.asarray(states_track[(0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 1, 0, 1, 0, 1, 0, 0, 1, 1, 0)][(0.09713178, 0.03444608, 0.27692298, 0.31709948, 0.03183285, 0.69482862, 0.6636030812134588, 0.4502759987258443, 0.33498928360322217, 1.9994953259621464, 0.4770073601791655, 0.6650899674484811, 0.14578541589254831)])[-500:])
plt.ylabel("Q-value")
plt.show


In [None]:
plt.figure(0, figsize=(16,7))


xaxis = np.asarray(range(500))
plt.subplot(243)
plt.plot(xaxis,np.asarray(states_track[(0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 1, 1, 0, 0, 1, 0)][(0.03054095, 0.60986665, 0.74407426, 0.61766639, 0.90472224, 0.57672152, 0.8748094234910715, 0.16578376234540373, 0.7135759387925181, 2.8811019884903466, 0.21166247836746213, 0.04882818673198578, 0.6564147570506149)])[-500:])
plt.ylabel("Q-value")
plt.show

In [None]:
print(states_track[(0, 0, 1, 1, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 1, 0, 0, 1, 1, 0)][(0.01843548, 0.67035792, 0.23844991, 0.16435803, 0.20708608, 0.10072192, 0.600061847959325, 0.07094536156931612, 0.5517628458751431, 2.2735202084796864, 0.7777300507851074, 0.09034239432499701, 0.6695466770829864)])

In [None]:
plt.figure(0, figsize=(16,7))


xaxis = np.asarray(range(154))
plt.subplot(243)
plt.plot(xaxis,np.asarray(states_track[(0, 0, 1, 1, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 1, 0, 0, 1, 1, 0)][(0.01843548, 0.67035792, 0.23844991, 0.16435803, 0.20708608, 0.10072192, 0.600061847959325, 0.07094536156931612, 0.5517628458751431, 2.2735202084796864, 0.7777300507851074, 0.09034239432499701, 0.6695466770829864)])[-500:])
plt.ylabel("Q-value")
plt.show

In [None]:
plt.plot(list(range(len(avg_reward))), avg_reward)
plt.show()