In [1]:
try:
  from google.colab import drive
  IN_COLAB=True
except:
  IN_COLAB=False

if IN_COLAB:
  print("We're running Colab")

In [2]:
if IN_COLAB:
  # Mount the Google Drive at mount
  mount='/content/drive'
  print("Colab: mounting Google drive on ", mount)

  drive.mount(mount)

  # Switch to the directory on the Google Drive that you want to use
  import os
  drive_root = mount + "/My Drive/Thesis_coding_with_IAE"
  
  # Change to the directory
  print("\nColab: Changing directory to ", drive_root)
  %cd $drive_root

In [3]:
import pandas as pd
import scipy
import scipy.spatial
import networkx as nx
import numpy as np
from collections import deque
import collections
import pickle

# for plotting graphs
import matplotlib.pyplot as plt

from CommonDefs import Point, Edge 
from TwoDimConvexHull import TwoDimConvexHull, PrintTwoDimConvexHull
from Utils import nearestKNeighboursOfEdgeMidPt, checkTriangleForDelaunayCriteria
from Graph import Graph
from DTEnv import DTEnv
from DTDoubleDQNAgent import DTDoubleDQNAgent

In [4]:
pd.options.display.max_columns = None

In [5]:
import warnings
warnings.filterwarnings("ignore")

In [6]:
np.random.seed(30)
import random as rn
rn.seed(30)
from keras import backend as K
import tensorflow as tf
tf.random.set_seed(30)

Using TensorFlow backend.


In [7]:
df = pd.read_csv("D:\Thesis_Experiments\Data\DT_10_sorted.txt", sep=",", header=None)
df.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56
0,0.018435,0.670358,0.101245,0.711891,0.207086,0.100722,0.23845,0.164358,0.321183,0.967308,0.399677,0.449653,0.417594,0.01015,0.920121,0.307752,0.939591,0.158107,0.965258,0.480327,output,1,2,5,1,2,6,1,3,4,3,4,7,2,5,6,1,4,6,4,6,7,5,6,10,6,7,8,6,8,10,7,8,9,8,9,10
1,0.108702,0.075571,0.154823,0.495594,0.327407,0.047181,0.529001,0.658846,0.64106,0.22447,0.645282,0.27444,0.66699,0.731345,0.718659,0.101208,0.808132,0.025225,0.993713,0.533967,output,1,2,3,2,3,6,2,4,6,2,4,7,4,6,7,3,5,8,3,5,6,5,6,8,3,8,9,6,8,10,6,7,10,8,9,10
2,0.048977,0.558356,0.090114,0.35836,0.4141,0.802224,0.457654,0.97264,0.740652,0.717631,0.851315,0.117623,0.873359,0.345104,0.905256,0.762454,0.931034,0.006254,0.940295,0.553189,output,1,2,3,1,3,4,2,3,7,3,4,5,3,5,7,4,5,8,2,6,7,2,6,9,5,8,10,5,7,10,6,7,9,7,9,10
3,0.048994,0.994292,0.134057,0.220541,0.169525,0.608264,0.177805,0.178609,0.429897,0.374512,0.537223,0.829864,0.561325,0.438124,0.642534,0.113659,0.717887,0.584209,0.851976,0.429216,output,1,2,3,2,4,5,1,3,6,2,3,5,4,5,8,3,6,7,3,5,7,5,7,8,6,7,9,7,8,10,7,9,10,6,9,10
4,0.048897,0.363199,0.093488,0.236462,0.12871,0.457687,0.142823,0.809028,0.233381,0.174737,0.288991,0.625024,0.395214,0.208379,0.567808,0.426278,0.880467,0.112534,0.962764,0.756392,output,1,2,3,1,3,4,2,3,5,3,4,6,3,6,7,3,5,7,4,6,10,5,7,9,6,7,8,6,8,10,7,8,9,8,9,10


In [8]:
pointData = {}
for i in range(len(df)):
    pointId = 1
    points = []
    for j in range(0 , len(df.columns), 2):
        if df.loc[i, j] == "output":
            dtStartIdx = j + 1
            break
        else:
            points.append(Point(pointId, df.loc[i, j], df.loc[i, j + 1]))
            pointId = pointId + 1
    pointData[i] = points

#for key, value in pointData.items():
    #print('key: {}, value: {}'.format(key, value)


In [9]:
# Initialising the environment
env = DTEnv()

agent = DTDoubleDQNAgent(env)

# tracking average reward per episode = total rewards in an episode/ total steps in an episode
avg_reward = []

# tracking total rewards per episode
total_reward  = []

In [10]:
states_track = collections.defaultdict(dict)
def initialise_tracking_states(state, action):
    states_track[tuple(state)][tuple(action)] = []    #this is an array which will have appended values of that state-action pair for every 2000th episode   

In [11]:
# This function will append latest Q-values of the 6 Q-values which are being tracked for checking convergence
def save_tracking_states(agent):
    for state in states_track.keys():
        for action in states_track[state].keys():
            Q = agent.prediction(state, [action])
            states_track[state][action].extend(Q)      

In [None]:
#Defining a function to save the object as a pickle file
def save_obj(obj, name ):
    with open(name + '.pkl', 'wb') as f:
        pickle.dump(obj, f, pickle.HIGHEST_PROTOCOL)

In [None]:
initializeModel = True

numData = int(1)

episodeStart = 0
numEpisodes = 5000
currentEpisode = episodeStart;

for i in range(0, numData):     
    num_states_tracked = 0

    # reset epsilon start value and memory for each new configuration but keep the model parameters
    # learned from the previous configuration
    agent.reset()
    
    for episode in range(currentEpisode, numEpisodes):

        # tracking total rewards, step count
        tot_reward = 0
        step_count = 0

        #Reset the environment/Clear the previous states
        env.reset()
        env.initialize(pointData[i])  
        print('current point data index {} : episode : {}'.format(i, episode))
        
        if initializeModel:
            agent.initializeModel(env)
            initializeModel = False

        _, state = env.getStartState()
        terminal_state = False

        while not terminal_state:

            #Get the free edge from the list
            edgeToProcess = env.getEdgesToProcess()[0]
                        
            action, epsilon = agent.get_action(state, edgeToProcess, episode)            
            
            '''if not action:
                break'''
            
            #print('edgeToProcess: {}-{}'.format(edgeToProcess[0], edgeToProcess[1]))
            #print('action chosen: {}'.format(env.getNodeIdFromPosAttr( action[4], action[5])))
            
            reward = env.getReward(edgeToProcess, action)        
            next_state = env.getNextState(edgeToProcess, action)
            env.removeProcessedEdge(edgeToProcess)
            terminal_state = env.isTerminalState()
            
            # save the sample <s, a, r, s'> to the replay memory
            agent.append_sample(state, action, reward, next_state, terminal_state)
            
            # every time step do the training
            agent.train_model()
            tot_reward += reward
            state = next_state
            step_count += 1
            if terminal_state:
                # every episode update the target model to be same with model
                agent.update_target_model()
                print('generated triangles: {}'.format(env.getGeneratedTriangles()))
                
            if reward > 0 and num_states_tracked < 3:
                initialise_tracking_states(state, action)
                save_tracking_states(agent)
                num_states_tracked += 1

            # Store the rewards
            if terminal_state and episode % 5 ==0:
                avg_reward.append(tot_reward/step_count)
                total_reward.append(tot_reward)
                print("episode:", episode, "  score:", tot_reward, "  memory length:",
                          len(agent.memory), "  epsilon:", epsilon)
                
        if episode % 20 == 0:
            save_tracking_states(agent)  
            
        if episode % 500 == 0:
            agent.save("./DDQN_Delaunay.h5")
        
        if episode % 500 == 0:
            save_obj(avg_reward,'DDQN_Rewards')   
            save_obj(states_track,'DDQN_States_tracked')
    
        if episode % 1000 ==0 and episode !=0:
            plt.plot(list(range(len(avg_reward))), avg_reward)
            plt.show()
            
        if (episode % 5000 == 0 and episode !=0):
            currentEpisode = episode + 1
            print('current episode: {}'.format(currentEpisode))            
            break
    

current point data index 0 : episode : 0
generated triangles: [[1, 5, 10], [1, 8, 10], [1, 4, 8], [8, 9, 10], [1, 3, 4], [4, 8, 9], [3, 4, 10], [4, 9, 10], [1, 3, 10], [2, 4, 10], [2, 3, 4]]
episode: 0   score: -9.299999999999999   memory length: 14   epsilon: 1.0
current point data index 0 : episode : 1
generated triangles: [[1, 5, 10], [1, 6, 10], [1, 3, 6], [4, 6, 10], [3, 6, 8], [2, 4, 6], [4, 7, 10], [3, 8, 9], [5, 6, 8], [2, 3, 4], [2, 5, 6], [3, 4, 7], [7, 8, 10], [3, 7, 9], [1, 5, 6], [3, 5, 8], [2, 3, 7], [1, 3, 4], [2, 4, 5], [6, 8, 10], [3, 5, 10], [2, 5, 7], [1, 4, 10], [4, 5, 7], [2, 3, 10], [1, 5, 7]]
current point data index 0 : episode : 2
generated triangles: [[1, 5, 9], [1, 7, 9], [5, 9, 10], [1, 4, 7], [1, 3, 4], [3, 4, 7], [2, 3, 4], [1, 2, 3], [2, 4, 5], [1, 2, 8], [2, 5, 6], [1, 6, 8], [2, 4, 8], [5, 6, 7], [1, 2, 6], [4, 7, 8], [2, 5, 7], [3, 6, 7], [7, 8, 10], [2, 6, 7], [3, 6, 8], [5, 7, 10], [8, 9, 10], [3, 8, 9], [2, 8, 9], [3, 4, 9], [2, 7, 9], [2, 4, 9]]
cu

generated triangles: [[6, 9, 10], [4, 6, 9], [2, 6, 10], [4, 5, 6], [3, 4, 9], [1, 2, 6], [2, 5, 10], [2, 4, 5], [5, 6, 7], [2, 3, 4], [3, 7, 9], [1, 2, 5], [1, 6, 7], [4, 5, 7], [3, 6, 7], [2, 3, 6], [2, 4, 7], [2, 3, 7]]
current point data index 0 : episode : 27
generated triangles: [[7, 9, 10], [1, 7, 10], [1, 3, 7], [1, 2, 10], [1, 2, 5], [2, 5, 10], [2, 5, 7], [2, 7, 8], [5, 7, 9], [2, 5, 8], [6, 7, 8], [1, 5, 9], [5, 8, 10], [4, 6, 7], [5, 6, 8], [1, 6, 9], [6, 8, 10], [2, 4, 6], [5, 6, 7], [1, 6, 7], [6, 9, 10], [2, 4, 8], [2, 6, 7], [3, 4, 8], [3, 4, 7], [3, 8, 10], [3, 5, 10], [3, 5, 6], [2, 3, 6], [2, 3, 4]]
current point data index 0 : episode : 28
generated triangles: [[3, 5, 10], [2, 3, 5], [3, 7, 10], [1, 2, 3], [1, 2, 5], [7, 8, 10], [1, 2, 4], [7, 8, 9], [8, 9, 10], [1, 3, 4], [2, 4, 7], [3, 4, 6], [1, 2, 7], [4, 7, 10], [3, 6, 7], [4, 6, 8], [1, 3, 7], [4, 9, 10], [6, 7, 10], [4, 7, 8], [3, 6, 8], [2, 4, 9], [3, 4, 8], [2, 8, 9], [2, 8, 10], [2, 7, 10]]
current point d

In [None]:
print(states_track)

In [None]:
plt.figure(0, figsize=(16,7))


xaxis = np.asarray(range(70))
plt.subplot(243)
plt.plot(xaxis,np.asarray(states_track[(0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 0, 1, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 1, 1, 0, 0)][(0.92012139, 0.30775196, 0.96525845, 0.48032701, 0.39967654, 0.44965282, 0.5394429496807445, 0.5664131026480621, 0.17838021770349452, 1.260794998552376, 0.3203645975030877, 1.5604330575343297, 0.2078553015555619)])[-500:])
plt.ylabel("Q-value")
plt.show


In [None]:
print(states_track[(0, 0, 1, 1, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 1, 0, 0, 1, 1, 0)][(0.01843548, 0.67035792, 0.23844991, 0.16435803, 0.20708608, 0.10072192, 0.600061847959325, 0.07094536156931612, 0.5517628458751431, 2.2735202084796864, 0.7777300507851074, 0.09034239432499701, 0.6695466770829864)])

In [None]:
plt.plot(list(range(len(avg_reward))), avg_reward)
plt.show()