Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Done with adding the Neural Network, testing needs to be done #15

Open
wants to merge 3 commits into
base: 2-add-deep-q-brain-for-the-agents
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
72 changes: 63 additions & 9 deletions mnest/Entities.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,11 @@
from scipy.signal import convolve2d
from .Laws import *
import os
import random
# import tensorflow as tf
# from .perceptron import *
from .NeuralNetwork import Perceptron
import TestNeuralNetwork


data_file_path = os.path.join(os.path.dirname(__file__), 'data', 'random_seed.txt')
Expand All @@ -11,7 +16,7 @@

class Agent:
def __init__(self, world, layer_name, child, position: Vector2 = Vector2(0, 0), direction=E,
brain_type='Q-Table', action_list=('move', 'stay')):
brain_type='Deep-Q', action_list=('move', 'stay')):

self.world = world
self.layer_name = layer_name
Expand All @@ -23,7 +28,7 @@ def __init__(self, world, layer_name, child, position: Vector2 = Vector2(0, 0),

# Fun Fact.
# As We are storing the position of the element inside the layer. and not a copy of the values.
# It is stored as a reference and hence we do not have to update the world values everytime.
# It is stored as a reference and hence we do not have to update the world values every time.
# Frankly for all those who are reading this. I was about to write code to update the values when I accidentally
# ran the simulation to test and saw them updating automatically.
# I jumped* up and down around the room (* Literally.)
Expand Down Expand Up @@ -79,7 +84,7 @@ def move(self):
self.direction *= -1 # Flip direction.
# print('Reflect down')

# Right
# Down
if self.position.y >= self.world.r_length:
if self.world.periodic_boundary:
self.position = 0
Expand Down Expand Up @@ -134,7 +139,7 @@ def __init__(self, world, layer_name,
def disperse(self):
"""
Uses matrices and convolutions to disperse the essence
In general the rule of thumb is that the dispersion matrix con have values where the total of the values is 1.
In general the rule of thumb is that the dispersion matrix can have values where the total of the values is 1.
Also, the sum of all values that is dispersed(Total - Center_Value) = 1 - Center_Value. of the matrix
# we need the original layer
:return:
Expand All @@ -155,6 +160,21 @@ def decay(self, decay_type):
mask = self.world.layers[self.layer_name] < 0
self.world.layers[self.layer_name][mask] = 0

'''
# Defining The Q-Network Architecture
class QNetwork(tf.keras.Model):
def __init__(self, state_dim, action_dim):
super(QNetwork, self).__init__()
self.FirstLayer = tf.keras.layers.Dense(64, activation='relu')
self.SecondLayer = tf.keras.layers.Dense(64, activation='relu')
self.OutputLayer = tf.keras.layers.Dense(action_dim)

def call(self, state):
x = self.FirstLayer(state)
x = self.SecondLayer(x)
output = self.OutputLayer(x)
return output
'''

# AI for the Entities.
class Brain:
Expand All @@ -171,13 +191,19 @@ def __init__(self, brain_type: str, action_list: list, learning_rate=0.2,
self.min_exploration = min_exploration
self.discounted_return = discounted_return # Gamma or Lambda

# Neural Network variables
'''self.q_network = QNetwork(sta)'''

if self.brain_type == 'Q-Table':
self.q_table = {}
elif self.brain_type == 'Deep-Q':
pass
print("Deep-Q Started")
self.q_table = TestNeuralNetwork.Perceptron()
# pass
else:
print('There seems to be some mistake on the brain type.')

# state of add_state becomes the i/p for the perceptron
def add_state(self, state: str):
"""
This function is applicable to the Q-Table type Brain.
Expand Down Expand Up @@ -208,8 +234,10 @@ def predict_action(self, state: str):
self.add_state(state)

else:
''' Changes in PRedict_Action'''
# Exploit
if state in self.q_table:
# q_value is the array of output.
q_values = self.q_table[state] # q_values for that state
predict_list = np.where(q_values == max(q_values))[0] # list of all indices with max q_values
action = np.random.choice(predict_list)
Expand All @@ -225,8 +253,32 @@ def predict_action(self, state: str):
self.exploration_rate -= self.exploration_decay

elif self.brain_type == 'Deep-Q':
action = 0
pass
# action = 0
# print("Hello There 1")
# pass

'''# Calling Perceptron.predict()'''

# training = Perceptron(state)[0]
# action = np.argmax(training)

# Checking Exploration or Exploitation
if np.random.random() < self.exploration_rate:
# Explore
action = np.random.randint(len(self.action_list))

else:
# Exploit
action = TestNeuralNetwork.Perceptron.predict(state)

if self.exploration_rate > self.min_exploration:
self.exploration_rate -= self.exploration_decay

'''Exploration => Learning part, call predict for prediction. Fit is for learning. So fit will run only once, and predict every time (or something like that)

# Decaying exploration_rate
if self.exploration_rate > self.min_exploration:
self.exploration_rate -= self.exploration_decay '''

else:
action = None
Expand All @@ -237,7 +289,7 @@ def predict_action(self, state: str):

def learn(self, state_observed: str, action_taken: int, next_state: str, reward_earned: float):
"""

:param next_state:
:param state_observed:
:param action_taken:
Expand All @@ -258,7 +310,9 @@ def learn(self, state_observed: str, action_taken: int, next_state: str, reward_
values_next_state[action_taken] = new_value

elif self.brain_type == 'Deep-Q':
pass
TestNeuralNetwork.main()
# pass

else:
print('There seems to be some mistake on the brain type.')
pass
210 changes: 210 additions & 0 deletions mnest/NeuralNetwork.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,210 @@
import random
from typing import Any
import numpy as np
import sys
import h5py
import os
import pickle
# from .perceptron import *

# class NeuralNetwork(Perceptron.fit):
# def __init__(self, state_dim, action_dim):
# super(NeuralNetwork, self).__init__()
# self.FirstLayer = Perceptron.predict
# self.SecondLayer = Perceptron.predict


'''
1. Adam Optimizer
` 2. MeanSquareError `
3. QNetwork
4. DenseLayer
5. Agent
6. Neural Network

'''

global reward


class Optimizer:
pass



class MyMeanSquareError:
def __call__(self, y_true, y_pred):
return np.mean(np.square(y_true - y_pred))



class ModelCheckpoint:
# def save_model(self, model):
# # Save the model architecture
# model.save

def __init__(self, model):
self.model = model
self.step_count = 0

def saveModelWeights(self, step_count, save_path):
# Save Model Weights
with open(save_path + "_weights.pkl", "wb") as f:
pickle.dump(self.model.get_weights(), f)

# Save Step Count
with open(save_path + "_stepCounts.pkl", "wb") as f:
pickle.dump(self.step_count, f)

def loadModelWeights(self, save_path):
# Load Model Weights
with open(save_path + "_weights.pkl", "rb") as f:
weights = pickle.load(f)
self.model.set_weights(weights)

# Load Step Count
with open(save_path + '_stepCount.pkl', "rb") as f:
self.step_count = pickle.load(f)

def incrementStepCount(self):
self.step_count += 1

# This is Neural Network with 1 hidden layer
class Perceptron:
def __init__(self, data, weights = None):
print("Perceptron Init Ran")
random.shuffle(data)

# state would be 1 step at a time
inputs = np.array([[float(x) for x in row[0:-1]] for row in data])
self.inputs = np.hstack((inputs, [[1]] * len(inputs))) # Append 1 to each input row, for the bias weight
self.outputs = np.array([float(row[-1]) for row in data]) # Change no. of o/p to no. of actions
self.numInputs = len(self.inputs[0])

if weights == None:
weights = np.array([random.uniform(0, 100) \
for x in range(self.numInputs)])
weights[-1] = -1 # Set initial value of bias weights
self.weights = weights
self.error = float(sys.maxsize) # Initialise error to some very high value
self.smallestError = self.error
self.bestWeights = self.weights
self.fitHistory = []

def predict(self, x_i):
# Activation functions is the dot product of input vector and weight vector
y = np.dot(x_i, self.weights)
# Modify it so that gives array output
if y > 0:
return y
else :
return 0.01*y

'''
def fit(self, state_dim, action_dim, learning_rate=0.001, gamma=0.99, epsilon=1.0, epsilon_decay=0.999, epsilon_min=0.01, numIters = 100, breakSoon = True):
self.state_dim = state_dim
self.action_dim = action_dim
self.learning_rate = learning_rate
self.gamma = gamma
self.epsilon = epsilon
self.epsilon_decay = epsilon_decay
self.epsilon_min = epsilon_min
self.loss_fn = MyMeanSquareError(1, 0.83)
'''

'''
def nextMove(location, action):
row, col = location
if action == "UP":
return (row - 1, col)
elif action == "DOWN":
return (row + 1, col)
elif action == "LEFT":
return(row, col - 1)
elif action == "RIGHT":
return(row, col + 1)
else:
return location
'''

# Deciding Rewards & Punishment
def Reward(self, location):

self.location = location
if location == 'Target':
print("Reached Target")
return 100

elif location == "Home":
print("Reached Home")
return 200

# elif location != "Home" and location != "Target":
elif location != "Home" or location != "Target":
return -300

else:
return -1


def fit(self, state, lr = 0.5, numIters = 100, breakSoon = True):
errorList = []
for iter in range(numIters):
totalError = 0.0
for i in range(len(self.outputs)):

# Checking The Difference Between The Actual & Predicted Output
pred = self.predict(self.inputs[i])
# Error is the difference between true and predicted class
error = self.outputs[i] - pred


'''# Rewards Function'''
# Calling Reward Function
self.state = state
reward = Perceptron.Reward(state)
reward += reward
print("Total rewards: " + reward)


# Multiplying with the error yields a positive or negative adjustment depending on a positive or negative prediction error
self.weights = self.weights + \
lr * error * self.inputs[i]
# totalError += abs(error)**2
totalError += abs(error)

self.saveBestFit(self.weights, totalError)
if breakSoon:
if totalError == 0.0:
break
self.printWeights()
errorList.append(totalError)

# Store error history for the convenient plotting
self.fitHistory = errorList
self.error = totalError



# Store the best performing weights for reuse
def saveBestFit(self, w, e):
if e < self.smallestError:
self.smallestError = e
self.bestWeights = w

def printWeights(self):
print("\t".join(map(str, self.weights)), file=sys.stderr)

# Ideally we should split data into train/test sets to feed this method. For now, just use the data passed during initialization.
def test(self):
e = 0.0
for i in range(len(self.inputs)):
pred = self.predict(self.inputs[i])
e += self.outputs[i] - pred
print(e, file=sys.stdout)

def __str__(self) -> str:
s = "Inputs (1 sample): {}\n".format(self.inputs[0])
s += "weights: {}\n".format(self.weights)
s += "Error: {}\n".format(self.error)
return s
Loading