Bluejee · Yashraj2502 · Jul 31, 2023 · Aug 4, 2023 · Oct 17, 2023
diff --git a/mnest/Entities.py b/mnest/Entities.py
@@ -2,6 +2,11 @@
 from scipy.signal import convolve2d
 from .Laws import *
 import os
+import random
+# import tensorflow as tf
+# from .perceptron import *
+from .NeuralNetwork import Perceptron
+import TestNeuralNetwork
 
 
 data_file_path = os.path.join(os.path.dirname(__file__), 'data', 'random_seed.txt')
@@ -11,7 +16,7 @@
 
 class Agent:
     def __init__(self, world, layer_name, child, position: Vector2 = Vector2(0, 0), direction=E,
-                 brain_type='Q-Table', action_list=('move', 'stay')):
+                 brain_type='Deep-Q', action_list=('move', 'stay')):
 
         self.world = world
         self.layer_name = layer_name
@@ -23,7 +28,7 @@ def __init__(self, world, layer_name, child, position: Vector2 = Vector2(0, 0),
 
         # Fun Fact.
         # As We are storing the position of the element inside the layer. and not a copy of the values.
-        # It is stored as a reference and hence we do not have to update the world values everytime.
+        # It is stored as a reference and hence we do not have to update the world values every time.
         # Frankly for all those who are reading this. I was about to write code to update the values when I accidentally
         # ran the simulation to test and saw them updating automatically.
         # I jumped* up and down around the room (* Literally.)
@@ -79,7 +84,7 @@ def move(self):
                 self.direction *= -1  # Flip direction.
                 # print('Reflect down')
 
-        # Right
+        # Down
         if self.position.y >= self.world.r_length:
             if self.world.periodic_boundary:
                 self.position = 0
@@ -134,7 +139,7 @@ def __init__(self, world, layer_name,
     def disperse(self):
         """
         Uses matrices and convolutions to disperse the essence
-        In general the rule of thumb is that the dispersion matrix con have values where the total of the values is 1.
+        In general the rule of thumb is that the dispersion matrix can have values where the total of the values is 1.
         Also, the sum of all values that is dispersed(Total - Center_Value) = 1 - Center_Value. of the matrix
         # we need the original layer
         :return:
@@ -155,6 +160,21 @@ def decay(self, decay_type):
         mask = self.world.layers[self.layer_name] < 0
         self.world.layers[self.layer_name][mask] = 0
 
+'''
+# Defining The Q-Network Architecture
+class QNetwork(tf.keras.Model):
+    def __init__(self, state_dim, action_dim):
+        super(QNetwork, self).__init__()
+        self.FirstLayer = tf.keras.layers.Dense(64, activation='relu')
+        self.SecondLayer = tf.keras.layers.Dense(64, activation='relu')
+        self.OutputLayer = tf.keras.layers.Dense(action_dim)
+
+    def call(self, state):
+        x = self.FirstLayer(state)
+        x = self.SecondLayer(x)
+        output = self.OutputLayer(x)
+        return output
+'''
 
 # AI for the Entities.
 class Brain:
@@ -171,13 +191,19 @@ def __init__(self, brain_type: str, action_list: list, learning_rate=0.2,
         self.min_exploration = min_exploration
         self.discounted_return = discounted_return  # Gamma or Lambda
 
+        # Neural Network variables
+        '''self.q_network = QNetwork(sta)'''
+
         if self.brain_type == 'Q-Table':
             self.q_table = {}
         elif self.brain_type == 'Deep-Q':
-            pass
+            print("Deep-Q Started")
+            self.q_table = TestNeuralNetwork.Perceptron()
+            # pass
         else:
             print('There seems to be some mistake on the brain type.')
 
+    # state of add_state becomes the i/p for the perceptron
     def add_state(self, state: str):
         """
         This function is applicable to the Q-Table type Brain.
@@ -208,8 +234,10 @@ def predict_action(self, state: str):
                     self.add_state(state)
 
             else:
+                ''' Changes in PRedict_Action'''
                 # Exploit
                 if state in self.q_table:
+                    # q_value is the array of output. 
                     q_values = self.q_table[state]  # q_values for that state
                     predict_list = np.where(q_values == max(q_values))[0]  # list of all indices with max q_values
                     action = np.random.choice(predict_list)
@@ -225,8 +253,32 @@ def predict_action(self, state: str):
                 self.exploration_rate -= self.exploration_decay
 
         elif self.brain_type == 'Deep-Q':
-            action = 0
-            pass
+            # action = 0
+            # print("Hello There 1")
+            # pass
+
+            '''# Calling Perceptron.predict()'''
+
+#            training = Perceptron(state)[0]
+#            action = np.argmax(training)
+
+            # Checking Exploration or Exploitation
+            if np.random.random() < self.exploration_rate:
+                # Explore
+                action = np.random.randint(len(self.action_list))
+
+            else:
+                # Exploit
+                action = TestNeuralNetwork.Perceptron.predict(state)
+
+            if self.exploration_rate > self.min_exploration:
+                self.exploration_rate -= self.exploration_decay
+
+            '''Exploration => Learning part, call predict for prediction. Fit is for learning. So fit will run only once, and predict every time (or something like that)
+
+            # Decaying exploration_rate
+            if self.exploration_rate > self.min_exploration:
+                self.exploration_rate -= self.exploration_decay '''
 
         else:
             action = None
@@ -237,7 +289,7 @@ def predict_action(self, state: str):
 
     def learn(self, state_observed: str, action_taken: int, next_state: str, reward_earned: float):
         """
-
+        
         :param next_state:
         :param state_observed:
         :param action_taken:
@@ -258,7 +310,9 @@ def learn(self, state_observed: str, action_taken: int, next_state: str, reward_
             values_next_state[action_taken] = new_value
 
         elif self.brain_type == 'Deep-Q':
-            pass
+            TestNeuralNetwork.main()
+            # pass
+
         else:
             print('There seems to be some mistake on the brain type.')
             pass
diff --git a/mnest/NeuralNetwork.py b/mnest/NeuralNetwork.py
@@ -0,0 +1,210 @@
+import random
+from typing import Any 
+import numpy as np
+import sys
+import h5py
+import os
+import pickle
+# from .perceptron import *
+
+# class NeuralNetwork(Perceptron.fit):
+#     def __init__(self, state_dim, action_dim):
+#         super(NeuralNetwork, self).__init__()
+#         self.FirstLayer = Perceptron.predict
+#         self.SecondLayer = Perceptron.predict
+
+
+'''
+1. Adam Optimizer
+` 2. MeanSquareError `
+3. QNetwork
+4. DenseLayer
+5. Agent
+6. Neural Network
+
+'''
+
+global reward
+
+
+class Optimizer:
+    pass
+
+
+
+class MyMeanSquareError:
+    def __call__(self, y_true, y_pred):
+        return np.mean(np.square(y_true - y_pred))
+
+
+
+class ModelCheckpoint:
+    # def save_model(self, model):
+    #     # Save the model architecture
+    #     model.save
+
+    def __init__(self, model):
+        self.model = model
+        self.step_count = 0
+
+    def saveModelWeights(self, step_count, save_path):
+        # Save Model Weights
+        with open(save_path + "_weights.pkl", "wb") as f:
+            pickle.dump(self.model.get_weights(), f)
+
+        # Save Step Count
+        with open(save_path + "_stepCounts.pkl", "wb") as f:
+            pickle.dump(self.step_count, f)
+
+    def loadModelWeights(self, save_path):
+        # Load Model Weights
+        with open(save_path + "_weights.pkl", "rb") as f:
+            weights = pickle.load(f)
+        self.model.set_weights(weights)
+
+        # Load Step Count
+        with open(save_path + '_stepCount.pkl', "rb") as f:
+            self.step_count = pickle.load(f)
+
+    def incrementStepCount(self):
+        self.step_count += 1
+
+# This is Neural Network with 1 hidden layer
+class Perceptron:
+    def __init__(self, data, weights = None):
+        print("Perceptron Init Ran")
+        random.shuffle(data)
+
+        # state would be 1 step at a time
+        inputs = np.array([[float(x) for x in row[0:-1]] for row in data])
+        self.inputs = np.hstack((inputs, [[1]] * len(inputs))) # Append 1 to each input row, for the bias weight
+        self.outputs = np.array([float(row[-1]) for row in data]) # Change no. of o/p to no. of actions 
+        self.numInputs = len(self.inputs[0])
+
+        if weights == None:
+            weights = np.array([random.uniform(0, 100) \
+                            for x in range(self.numInputs)])
+            weights[-1] = -1        # Set initial value of bias weights
+        self.weights = weights
+        self.error = float(sys.maxsize)     # Initialise error to some very high value
+        self.smallestError = self.error
+        self.bestWeights = self.weights
+        self.fitHistory = []
+
+    def predict(self, x_i):
+        # Activation functions is the dot product of input vector and weight vector
+        y = np.dot(x_i, self.weights) 
+#         Modify it so that gives array output
+        if y > 0:
+            return y
+        else :
+            return 0.01*y
+
+    '''
+    def fit(self, state_dim, action_dim, learning_rate=0.001, gamma=0.99, epsilon=1.0, epsilon_decay=0.999, epsilon_min=0.01, numIters = 100, breakSoon = True):
+        self.state_dim = state_dim
+        self.action_dim = action_dim
+        self.learning_rate = learning_rate
+        self.gamma = gamma
+        self.epsilon = epsilon
+        self.epsilon_decay = epsilon_decay
+        self.epsilon_min = epsilon_min
+        self.loss_fn = MyMeanSquareError(1, 0.83)
+    '''        
+
+    '''
+    def nextMove(location, action):
+        row, col = location
+        if action == "UP":
+            return (row - 1, col)
+        elif action == "DOWN":
+            return (row + 1, col)
+        elif action == "LEFT":
+            return(row, col - 1)
+        elif action == "RIGHT":
+            return(row, col + 1)
+        else:
+            return location
+    '''
+
+    # Deciding Rewards & Punishment
+    def Reward(self, location):
+
+        self.location = location
+        if location == 'Target':
+            print("Reached Target")
+            return 100
+
+        elif location == "Home":
+            print("Reached Home")
+            return 200
+
+        # elif location != "Home" and location != "Target":
+        elif location != "Home" or location != "Target":
+            return -300
+
+        else:
+            return -1
+
+
+    def fit(self, state, lr = 0.5, numIters = 100, breakSoon = True):
+        errorList = []
+        for iter in range(numIters):
+            totalError = 0.0
+            for i in range(len(self.outputs)):
+
+                # Checking The Difference Between The Actual & Predicted Output
+                pred = self.predict(self.inputs[i])
+                # Error is the difference between true and predicted class
+                error = self.outputs[i] - pred
+
+
+                '''# Rewards Function'''
+                # Calling Reward Function
+                self.state = state
+                reward = Perceptron.Reward(state)
+                reward += reward
+                print("Total rewards: " + reward)
+
+
+                # Multiplying with the error yields a positive or negative adjustment depending on a positive or negative prediction error
+                self.weights = self.weights + \
+                                lr * error * self.inputs[i]
+                # totalError += abs(error)**2
+                totalError += abs(error)
+
+            self.saveBestFit(self.weights, totalError)
+            if breakSoon:
+                if totalError == 0.0:
+                    break
+            self.printWeights()
+            errorList.append(totalError)
+
+        # Store error history for the convenient plotting
+        self.fitHistory = errorList
+        self.error = totalError
+
+
+
+    # Store the best performing weights for reuse
+    def saveBestFit(self, w, e):
+        if e < self.smallestError:
+            self.smallestError = e
+            self.bestWeights = w
+
+    def printWeights(self):
+        print("\t".join(map(str, self.weights)), file=sys.stderr)
+
+    # Ideally we should split data into train/test sets to feed this method. For now, just use the data passed during initialization.
+    def test(self):
+        e = 0.0
+        for i in range(len(self.inputs)):
+            pred = self.predict(self.inputs[i])
+            e += self.outputs[i] - pred
+        print(e, file=sys.stdout)
+
+    def __str__(self) -> str:
+        s = "Inputs (1 sample): {}\n".format(self.inputs[0])
+        s += "weights: {}\n".format(self.weights)
+        s += "Error: {}\n".format(self.error)
+        return s