# Library Imports

In [4]:
import numpy as np              # Numpy numerical library
import pandas as pd             # Pandas for dataframes manipulation
import tensorflow as tf         # TensorFlow for neural networks and deep learning APIs

# Dataset Preparation

In [5]:
class DataLoader:
    # Take in a csv file and extracts features and labels
    def __init__(self, csv_filepath):                          
        self.df_samples = pd.read_csv(csv_filepath)                 # Create a pandas dataframe
        self.numpy_samples = self.df_samples.to_numpy()

        self.states_features = self.numpy_samples[:, 1:self.numpy_samples.shape[1]-1]           # Take the feature values for states, also ignore first column for IDs  
        self.feature_dim = self.states_features.shape[1]
        self.action_labels = self.numpy_samples[:, -1]                                          # The action labels separated from the labels

    def get_unique_rows(self):
        self.unique_rows = np.unique(self.states_features, axis = 0)
        return self.unique_rows

    def get_dataframe(self):    
        return self.df_samples

    def get_numpy(self):
        return self.numpy_samples
    
data = DataLoader("new_data.csv")
unique_rows = data.get_unique_rows()
print(unique_rows.shape)
print(len(unique_rows))

(97019, 26)
97019


# Model and Classes Definition

In [15]:
Q_VALUE_DIM = 1             # Value is a scalar for each datapoint
LEARNING_RATE = 0.001       # Gradient-descent learning rate
REPLAY_MEMORY_SIZE = 50     # Size for RL replay memory

# Creating our main class for our DQN
class DeepQNet:
    
    def __init__(self, dataset):
        self.input_dim = dataset.feature_dim + 1                    # State feature dim + 1 (for ground truth actions)
        
        self.model = self.create_model()                            # Main model that gets trained every step 
        self.model.summary()                                        # Printing model details
        self.target_model = self.create_model()                     # Target model we predict against each step
        self.target_model.set_weights(self.model.get_weights())     # To make all the initial weights the same

        # Logging or TensorBoard here
        # # # # # # # # # # # # # # #

        # Used to count when to update target network with main network's weights
        self.target_update_counter = 0
    
    def create_model(self):
        # Definition of the neural network architecture mentioned in the paper (3 relu feedforward layers)
        model = tf.keras.Sequential()
        model.add(tf.keras.layers.Input(self.input_dim))                        # Input dimension of the state-vector
        model.add(tf.keras.layers.Dense(128, activation= "relu"))
        model.add(tf.keras.layers.Dense(128, activation= "relu"))
        model.add(tf.keras.layers.Dense(Q_VALUE_DIM, activation= "relu"))       # Output is value function
        model.compile(loss="mse", optimizer=tf.optimizers.Adam(lr= LEARNING_RATE), metrics=['accuracy'])
        return model

    def get_reward(self, a_predicted, a_label):
        if a_predicted == a_label:
            return 1
        else: return 0

    # Function to implement the epsilon-greedy policy selection
    def greedy(self, epsilon, action_values):
        p = np.random.uniform(low=0.0, high=1.0)
        
        if p < epsilon:             # Take the greedy action
            return np.argmax(action_values)            
        
        else:                       # Take an exploration action
            return np.random.randint(low=0, high=len(action_values))
              

dq_net = DeepQNet(dataset= data)

Model: "sequential_14"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_24 (Dense)             (None, 128)               3584      
_________________________________________________________________
dense_25 (Dense)             (None, 128)               16512     
_________________________________________________________________
dense_26 (Dense)             (None, 1)                 129       
Total params: 20,225
Trainable params: 20,225
Non-trainable params: 0
_________________________________________________________________


In [16]:
for i in range(1, -1):
    print(i)