In [None]:
import pandas as pd
import scipy
import scipy.spatial
import networkx as nx
import numpy as np
from collections import deque
import collections
import pickle

# for plotting graphs
import matplotlib.pyplot as plt

from CommonDefs import Point, Edge 
from TwoDimConvexHull import TwoDimConvexHull, PrintTwoDimConvexHull
from Utils import nearestKNeighboursOfEdgeMidPt, checkTriangleForDelaunayCriteria
from Graph import Graph
from DTEnv import DTEnv
from DTDQNAgent import DTDQNAgent

In [None]:
pd.options.display.max_columns = None

In [None]:
import warnings
warnings.filterwarnings("ignore")

In [None]:
np.random.seed(30)
import random as rn
rn.seed(30)
from keras import backend as K
import tensorflow as tf
tf.random.set_seed(30)

In [None]:
df = pd.read_csv("D:\Thesis_Experiments\Data\DT_5_sorted.txt", sep=" ", header=None)
df.head()
df.shape

In [None]:
pointData = {}
for i in range(len(df)):
    pointId = 1
    points = []
    for j in range(0 , len(df.columns), 2):
        if df.loc[i, j] == "output":
            dtStartIdx = j + 1
            break
        else:
            points.append(Point(pointId, df.loc[i, j], df.loc[i, j + 1]))
            pointId = pointId + 1
    pointData[i] = points

#for key, value in pointData.items():
    #print('key: {}, value: {}'.format(key, value)


In [None]:
# Initialising the environment
env = DTEnv()

agent = DTDQNAgent(env)

# tracking average reward per episode = total rewards in an episode/ total steps in an episode
avg_reward = []

# tracking total rewards per episode
total_reward  = []

In [None]:
states_track = collections.defaultdict(dict)
def initialise_tracking_states(state, action):
    states_track[tuple(state)][tuple(action)] = []    #this is an array which will have appended values of that state-action pair for every 2000th episode   

In [None]:
# This function will append latest Q-values of the 6 Q-values which are being tracked for checking convergence
def save_tracking_states(agent):
    for state in states_track.keys():
        for action in states_track[state].keys():
            Q = agent.prediction(state, [action])
            states_track[state][action].extend(Q)      

In [None]:
#Defining a function to save the object as a pickle file
def save_obj(obj, name ):
    with open(name + '.pkl', 'wb') as f:
        pickle.dump(obj, f, pickle.HIGHEST_PROTOCOL)

In [None]:
initializeModel = True


numData = int(df.shape[0] / 2)

episodeStart = 0
numEpisodes = df.shape[0] * 150
currentEpisode = episodeStart;

for i in range(0, numData):     
    num_states_tracked = 0
    
    # reset epsilon start value and memory for each new configuration but keep the model parameters
    # learned from the previous configuration
    agent.reset()
    
    for episode in range(currentEpisode, numEpisodes):

        # tracking total rewards, step count
        tot_reward = 0
        step_count = 0

        #Reset the environment/Clear the previous states
        env.reset()
        env.initialize(pointData[i])  
        print('current point data index {} : episode : {}'.format(i, episode))
        
        if initializeModel:
            agent.initializeModel(env)
            initializeModel = False

        _, state = env.getStartState()
        #env.drawGraph()
        terminal_state = False

        while not terminal_state:

            #Get the free edge from the list
            edgeToProcess = env.getEdgesToProcess()[0]
                        
            action, epsilon = agent.get_action(state, edgeToProcess, episode)            
            
            '''if not action:
                break'''
            
            #print('edgeToProcess: {}-{}'.format(edgeToProcess[0], edgeToProcess[1]))
            #print('action chosen: {}'.format(env.getNodeIdFromPosAttr( action[4], action[5])))
            
            reward = env.getReward(edgeToProcess, action)        
            next_state = env.getNextState(edgeToProcess, action)
            env.removeProcessedEdge(edgeToProcess)
            terminal_state = env.isTerminalState()
            
            # save the sample <s, a, r, s'> to the replay memory
            agent.append_sample(state, action, reward, next_state, terminal_state)

            # every time step do the training
            agent.train_model()
            tot_reward += reward
            state = next_state
            step_count += 1
            if terminal_state:
                print('generated triangles: {}'.format(env.getGeneratedTriangles()))
                
            if reward > 0 and num_states_tracked < 2:
                initialise_tracking_states(state, action)
                save_tracking_states(agent)
                num_states_tracked += 1

            # Store the rewards
            if terminal_state and episode % 10 ==0:
                avg_reward.append(tot_reward/step_count)
                total_reward.append(tot_reward)
                print("episode:", episode, "  score:", tot_reward, "  memory length:",
                          len(agent.memory), "  epsilon:", epsilon)
                
        if episode % 1000 == 0:   
            save_tracking_states(agent)  
            
        if episode % 1000 == 0:
            agent.save("./Delaunay.h5")
        
        if episode % 1000 == 0:
            save_obj(avg_reward,'Rewards')   
            save_obj(states_track,'States_tracked')
    
        if episode % 150 ==0 and episode !=0:
            plt.plot(list(range(len(avg_reward))), avg_reward)
            plt.show()
            
        if (episode % 150 == 0 and episode !=0):
            currentEpisode = episode + 1
            print('current episode: {}'.format(currentEpisode))            
            break
    

In [None]:
print(len(states_track))

In [None]:
print(states_track)