In [1]:
import numpy as np
import torch
from torch._C import device
#from torch._C import long

#own imports
from eggwr_plus import EGGWR_Plus

In [2]:
import argparse

In [5]:
# added Parse command line arguments
parser = argparse.ArgumentParser()
parser.add_argument('--a_t', type=float, help='Activation threshold', default=.95)
#parser.add_argument('--batch_size', type=int, help='Batch size for training', default=8)
parser.add_argument('--batch_size_ft', type=int, help='Batch size for fine tuning of feature extractors. E.g. BERT for NLP', default=32)
parser.add_argument('--datasets', '-d', nargs='+', type=str, help='Datasets used for training', default=['IRIS'])
parser.add_argument('--sem_a_t', type=float, help='Activation threshold for semantic memory in GDM model', default=.35)
parser.add_argument('--del_freq', type=int, help='Deletion frequency in SOINN network', default=10)
parser.add_argument('--delta_plus', type=float, help='Positive Label change rate', default=1)
parser.add_argument('--delta_minus', type=float, help='Negative Label change rate', default=0.1)
parser.add_argument('--dim', type=int, help='Dimension of growing memory and language model output', default=17)
parser.add_argument('--eps_b', type=float, help='Learning rate for weight adaption (BMU)', default=.1)
parser.add_argument('--eps_n', type=float, help='Learning rate for weight adaption of (sample)', default=.001)
parser.add_argument('--h_t', type=float, help='Habituation/Firing threshold', default=.3)
parser.add_argument('--gamma', type=float, help='Learning rate for label weight adaptation', default=.5)
parser.add_argument('--kappa', type=float, help='Habituation controlling parameter', default=1.05)
parser.add_argument('--learner', type=str, help='Learner method', default='SOINNPLUS')
parser.add_argument('--load_latest', type=bool, help='Load latest pretrained model from file', default=False)
parser.add_argument('--log_freq', type=int, help='Logging frequency of learning metrics', default=1)
parser.add_argument('--lr', type=float, help='Learning rate (language model/feature extractor)', default=3e-5)
parser.add_argument('--l_t', type=float, help='Label propagation threshold', default=.5)
parser.add_argument('--max_age', type=int, help='Maximum age of a node connecting edge', default=5)
parser.add_argument('--max_len', type=int, help='Maximum sequence length for the transformer input', default=20)
parser.add_argument('--m_t', type=int, help='Misclassification threshold', default=0)
parser.add_argument('--reduce', type=int, help='Maximum number of train/test samples per dataset', default=300)
parser.add_argument('--reduce_test', type=int, help='Maximum number of test samples per dataset, if -1 reduce is used as maximum number of samples', default=-1)
#parser.add_argument('--seed', type=int, help='Random state for reproducible output', default=42)
parser.add_argument('--tau_b', type=float, help='Constant habituation controlling rate (BMU)', default=.3)
parser.add_argument('--tau_n', type=float, help='Constant habituation controlling rate (sample)', default=.1)
parser.add_argument('--beta', type=float, help='Regulate influence of context on merge vector', default=0.5)
parser.add_argument('--n_context', type=int, help='Window size / number of context', default=4)
parser.add_argument('--class_list', type=list, help='List of classes', default=None)
parser.add_argument('--num_labels', type=int, help='Number of label classes', default=1)
parser.add_argument('--num_sentences', type=int, help='Number of Sentences to Split TextClassificationDatasets into, if -1 Dataset is not split into Sentences ', default=5)
parser.add_argument('--tuning_share', type=float, help='Share of training data used during fine tuning',default=0.1)
parser.add_argument('--bert_finetune', type=bool, help='finetune bert model on data ', default=False)
#args = parser.parse_args()

#original
#parser = argparse.ArgumentParser()
parser.add_argument("--policy", default="DDPG")                  # Policy name (TD3, DDPG or OurDDPG)
parser.add_argument("--env", default="HalfCheetah-v2")          # OpenAI gym environment name
parser.add_argument("--seed", default=0, type=int)              # Sets Gym, PyTorch and Numpy seeds
parser.add_argument("--start_timesteps", default=25e3, type=int)# Time steps initial random policy is used
parser.add_argument("--eval_freq", default=5e3, type=int)       # How often (time steps) we evaluate
parser.add_argument("--max_timesteps", default=1e6, type=int)   # Max time steps to run environment
parser.add_argument("--expl_noise", default=0.1)                # Std of Gaussian exploration noise
parser.add_argument("--batch_size", default=256, type=int)      # Batch size for both actor and critic
parser.add_argument("--discount", default=0.99)                 # Discount factor
parser.add_argument("--tau", default=0.005)                     # Target network update rate
parser.add_argument("--policy_noise", default=0.2)              # Noise added to target policy during critic update
parser.add_argument("--noise_clip", default=0.5)                # Range to clip target policy noise
parser.add_argument("--policy_freq", default=2, type=int)       # Frequency of delayed policy updates
parser.add_argument("--save_model", action="store_true")        # Save model and optimizer parameters
parser.add_argument("--load_model", default="")                 # Model load file name, "" doesn't load, "default" uses file_name

#my own arguments
parser.add_argument("--replay_memory", default="gwr_replay")    # Choose the replay memory to use
args = parser.parse_args("")

In [6]:
class GWR_replay(EGGWR_Plus):
    def __init__(self, **kwargs):
        super().__init__(**kwargs)
        #Reward Matrix for every node j
        self.R = torch.zeros(self.size)
        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    def add(self, state, action, next_state, reward, done):
        state = torch.from_numpy(state)
        next_state = torch.from_numpy(next_state)
        EGGWR_Plus.forward(self,0,[[state,1],[next_state,1]], action, reward)


    def sample(self):
        s_ind = np.random.randint(0, self.size)
        s_ind = torch.tensor(s_ind, dtype=torch.int64)
        temporal_temp = self.temporal[s_ind].to_dense()
        next_s_ind = torch.argmax(temporal_temp[:,0])
        #next_s_ind= torch.argmax(self.temporal[s_ind, :,0])
        temporal_edge = self.temporal[s_ind,next_s_ind].to_dense()
        print(temporal_edge)
        return (
            self.V[s_ind].float().to(self.device),
            self.V[next_s_ind].float().to(self.device),
            temporal_edge[1:7].to(self.device),
            temporal_edge[7].to(self.device),
            #never done state, because for nodes useless: 
            1
        )

    def get_bmu(self, state):
        state = torch.from_numpy(state)
        bmu, _, _ = self.activate_bmu(state)
        return self.V[bmu]

In [7]:
test = GWR_replay(**vars(args))

In [37]:
state = np.random.rand(17)
next_state = np.random.rand(17)
action = np.array([0,1,2,3,4,5])
test.add(state,action,next_state, 5, 0 )

test.size

15

In [56]:
a,b,c,d,e = test.sample()

tensor([2., 0., 1., 2., 3., 4., 5., 5.])


In [None]:
print(a, b, c, d, e)