<a href="https://colab.research.google.com/github/nerdk312/Model-based-RL/blob/master/RNN_model_Cartpole.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import gym
import numpy as np
from sklearn.preprocessing import StandardScaler
from torch.nn.utils import clip_grad_norm_
torch.manual_seed(1)
# Helpful links
# https://github.com/omerbsezer/Fast-Pytorch
# https://towardsdatascience.com/pytorch-basics-how-to-train-your-neural-net-intro-to-rnn-cb6ebc594677


<torch._C.Generator at 0x7fccdd89f230>

In [5]:
class General_functions():
    def __init__(self, ENV_NAME,n_actions):
        self.ENV_NAME = ENV_NAME
        self.env = gym.make(self.ENV_NAME)
        self.device = 'cuda' if torch.cuda.is_available() else 'cpu'
        self.n_actions = n_actions

    def one_hot(self,i):
        a = np.zeros(self.n_actions, 'uint8')
        a[i-1] = 1
        return a

class Data_collection(General_functions):
    def __init__(self,ENV_NAME,n_actions):
        super(Data_collection,self).__init__(ENV_NAME,n_actions)
        self.dataset_random = [] # Where the data is saved each time
        

    def collate_data(self,random_dataset, rl_dataset):
        rand_data = np.array(random_dataset)
        num_rand_examples = len(rand_data)
        D_train = rand_data[:int(-num_rand_examples*1/5)] 
        D_valid = rand_data[int(-num_rand_examples*1/5):]
        print("number random examples:",num_rand_examples, 'len(D_train_rand)', len(D_train),'len(D_valid_rand)', len(D_valid))
        if len(rl_dataset) > 0:
            # Adds the rl dataset to the random dataset if there is any present
            rl_data = np.array(rl_dataset)
            num_rl_examples = len(rl_data)
            D_rl_train = rl_data[:int(-num_rl_examples*1/5)] 
            D_rl_valid = rl_data[int(-num_rl_examples*1/5):]
                        
            D_train = np.concatenate([D_train, D_rl_train], axis = 0)
            D_valid = np.concatenate([D_valid, D_rl_valid], axis = 0)
            print("number rl examples:",num_rl_examples, 'len(D_rl_train)', len(D_rl_train),'len(D_valid_rand)', len(D_rl_valid))
            
        #print("len(D_train):", len(D_train), 'len(D_valid)', len(D_valid))

        # Shuffle the dataset
        '''        
        sff = np.arange(len(D_train))
        np.random.shuffle(sff)
        D_train = D_train[sff]
        '''
        #print('D_train shape',D_train.shape)

        # Create the input and output for the train
        X_train_obs = np.array([obs for obs,_,_,_,_ in D_train]) # Takes obs and action
        X_train_obs = X_train_obs.astype(np.float32)
        #X_train_obs = X_train_obs.astype(np.int16) # Need to change it to a int16 so it is signed ( so negative values can be calculated)
        X_train_act = np.array([act for _,_,_,_,act in D_train])
        
        # Env output
        y_env_train = np.array([no for _,no,_,_,_ in D_train])
        y_env_train = y_env_train.astype(np.float32)
        #y_env_train = y_env_train.astype(np.int16) # Need to change it to a int16 so it is signed ( so negative values can be calculated)
        y_env_train = y_env_train - X_train_obs 
        
        # Next state output
        X_val_obs = np.array([obs for obs,_,_,_,_ in D_valid]) # Takes obs and action
        X_val_obs = X_val_obs.astype(np.float32)
        #X_val_obs = X_val_obs.astype(np.int16) # Need to change it to a int16 so it is signed ( so negative values can be calculated)        
        X_val_act = np.array([act for _,_,_,_,act in D_valid])

        y_env_val = np.array([no for _,no,_,_,_ in D_valid])
        y_env_val = y_env_val.astype(np.float32)
        #y_env_val = y_env_val.astype(np.int16)
        y_env_val = y_env_val - X_val_obs 

        env_train_data, env_val_data = (X_train_obs, X_train_act, y_env_train), (X_val_obs, X_val_act, y_env_val)
        return env_train_data, env_val_data
    
    def normalise(self,train_data, val_data, scaler = None): # Nawid - Used to normalise each dimension individually
        if scaler is None:
            scaler = StandardScaler()
            train_data = scaler.fit_transform(train_data)
        else: 
            train_data = scaler.transform(train_data)
        
        val_data = scaler.transform(val_data)
        return train_data, val_data, scaler
    
    def normalise_dataset(self,env_train_data, env_val_data, X_env_obs_scaler = None,y_env_scaler=None):
        (X_env_train_obs, X_env_train_act, y_env_train), (X_env_val_obs, X_env_val_act, y_env_val) = env_train_data, env_val_data
        
        X_env_train_obs,X_env_val_obs, X_env_obs_scaler =  self.normalise(X_env_train_obs, X_env_val_obs, X_env_obs_scaler)
        y_env_train, y_env_val, y_env_scaler = self.normalise(y_env_train, y_env_val, y_env_scaler)
    
    
        X_env_train = np.concatenate((X_env_train_obs,X_env_train_act),axis=1)
        X_env_val = np.concatenate((X_env_val_obs,X_env_val_act),axis=1)
        env_train_data, env_val_data = (X_env_train, y_env_train),(X_env_val, y_env_val)    

        return env_train_data, env_val_data,X_env_obs_scaler, y_env_scaler

    
    def gather_random_trajectories(self,num_traj):
        for n in range(num_traj):
            if n % 10 ==0:
                print('trajectory number:',n)
                # Initial set up
            #self.env.seed(0)
            self.env = gym.make(self.ENV_NAME)
            obs = self.env.reset()
            
            while True:
                sampled_action = np.random.randint(0,2)
                 
                sampled_action_one_hot = self.one_hot(sampled_action)
                next_obs, reward, done, next_info = self.env.step(sampled_action)
                    
                self.dataset_random.append([obs, next_obs, reward, done,sampled_action_one_hot])

                obs =  next_obs
                if done:
                    break 
        return self.dataset_random
    
ENV_NAME = 'CartPole-v0'
n_actions = 2 
data_collector = Data_collection(ENV_NAME, n_actions)   
rand_dataset = data_collector.gather_random_trajectories(10)
rl_dataset = []
env_train,env_val = data_collector.collate_data(rand_dataset, rl_dataset)
full_env_train,full_env_val,X_env_obs_scaler, y_env_scaler = data_collector.normalise_dataset(env_train,env_val)

trajectory number: 0
number random examples: 175 len(D_train_rand) 140 len(D_valid_rand) 35


In [4]:
x_train, y_train = full_env_train
print(x_train.dtype)

float32


# Sequence prediction with batches

In [0]:
class state_predictor(nn.Module):
  def __init__(self,input_size, hidden_state_size,output_size):
    super(state_predictor, self).__init__()    
    self.lstm=nn.LSTM(input_size,hidden_state_size)
    self.linear=nn.Linear(hidden_state_size, output_size)

  def forward(self,x,h):
    # h: hidden_state, c=output
    # x= x.view(batch_size,timesteps,embed_size)
    #print(h[0].shape)
    #print('x shape:',x.shape)
    lstm_out,(h,c)=self.lstm(x,h)
    #print(out.size())
    #(batch_size*timesteps, hidden_size)
    #out.size(0):batch_size; out.size(1):timesteps, out.size(2): hidden_size
    fc_input=lstm_out.reshape(lstm_out.size(0)*lstm_out.size(1),lstm_out.size(2)) # flattens the matrix
    #print('post reshape',out.size())
    # decode hidden states of all time steps
    out= self.linear(fc_input)
    out = out.reshape(lstm_out.size(0), lstm_out.size(1), out.size(1)) # dimensions timesteps, batch, output size
    #print(out.size())
    # Should reshape the network to make it have timesteps, batchsize and state_size
    return out, (h,c)

In [0]:
state_input = 6
state_output = 4
hidden_size = 1024
learning_rate = 0.002
num_layers = 1
num_directions = 1

model = state_predictor(state_input,hidden_size,state_output)
loss_fn = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr = learning_rate)
batch_size = 5
timesteps = 5

In [35]:
for it in range(10):
    for i in range(0,len(x_train), timesteps*batch_size):
        if len(x_train) > i + (timesteps*batch_size):
            x = x_train[i:i+(timesteps*batch_size),:]
            y = y_train[i:i+(timesteps*batch_size),:]

            inputs = torch.tensor(x)
            inputs = inputs.view(timesteps,batch_size,-1)

            targets = torch.tensor(y)
            targets = targets.view(timesteps,batch_size,-1)

            states = (torch.zeros(num_layers*num_directions,batch_size,hidden_size),
                    torch.zeros(num_layers*num_directions,batch_size,hidden_size)) # Dim ((num_layers * num_direction,batch_size, hidden_size)

            outputs,_ = model(inputs, states)
            loss = loss_fn(outputs, targets) 

            model.zero_grad()
            loss.backward()
        
            clip_grad_norm_(model.parameters(),0.1)
            optimizer.step()
        
            print("Epoch [{}/{}], Loss: {:.4f}".format(i, 10, loss.item()))       

Epoch [0/10], Loss: 0.6620
Epoch [25/10], Loss: 1.0300
Epoch [50/10], Loss: 0.8499
Epoch [75/10], Loss: 0.9297
Epoch [100/10], Loss: 0.6890
Epoch [0/10], Loss: 0.5675
Epoch [25/10], Loss: 0.7744
Epoch [50/10], Loss: 0.6840
Epoch [75/10], Loss: 0.7381
Epoch [100/10], Loss: 0.5243
Epoch [0/10], Loss: 0.4915
Epoch [25/10], Loss: 0.4961
Epoch [50/10], Loss: 0.5328
Epoch [75/10], Loss: 0.5148
Epoch [100/10], Loss: 0.3825
Epoch [0/10], Loss: 0.4250
Epoch [25/10], Loss: 0.3722
Epoch [50/10], Loss: 0.3765
Epoch [75/10], Loss: 0.3035
Epoch [100/10], Loss: 0.2819
Epoch [0/10], Loss: 0.2472
Epoch [25/10], Loss: 0.2683
Epoch [50/10], Loss: 0.2382
Epoch [75/10], Loss: 0.1558
Epoch [100/10], Loss: 0.1612
Epoch [0/10], Loss: 0.1456
Epoch [25/10], Loss: 0.1117
Epoch [50/10], Loss: 0.0879
Epoch [75/10], Loss: 0.0709
Epoch [100/10], Loss: 0.0718
Epoch [0/10], Loss: 0.0371
Epoch [25/10], Loss: 0.0656
Epoch [50/10], Loss: 0.0759
Epoch [75/10], Loss: 0.0381
Epoch [100/10], Loss: 0.0319
Epoch [0/10], Loss: 

In [36]:
batch_size = 2
test_inputs = torch.tensor(x_train[0:timesteps*batch_size,:])
test_inputs = test_inputs.view(timesteps,batch_size,-1)
print(test_inputs.size())

targets = torch.tensor(y_train[0:timesteps*batch_size,:])
targets = targets.view(timesteps,batch_size,-1)
print(targets.size())
    
new_states = (torch.zeros(num_layers*num_directions,batch_size,1024),
          torch.zeros(num_directions*num_layers,batch_size,1024))


outputs,_ = model(test_inputs, new_states)
print('outputs:',outputs)
print('targets:',targets)

torch.Size([5, 2, 6])
torch.Size([5, 2, 4])
outputs: tensor([[[ 0.2367, -0.9234, -0.1610,  0.9299],
         [-0.0368,  1.0571,  0.1748, -1.0366]],

        [[ 0.2056,  1.0955, -0.0589, -1.0945],
         [ 0.4798, -0.9640, -0.3664,  0.9844]],

        [[ 0.3393, -0.8912, -0.1151,  0.9273],
         [ 0.0212, -0.8334,  0.2355,  0.8607]],

        [[-0.2129,  1.0500,  0.5570, -1.0057],
         [ 0.0255, -1.0316,  0.2056,  1.0573]],

        [[-0.2341,  0.8849,  0.6128, -0.8092],
         [ 0.0287, -1.0813,  0.2453,  1.1211]]], grad_fn=<ViewBackward>)
targets: tensor([[[ 0.2264, -0.8743, -0.0813,  0.9267],
         [-0.0452,  1.1466,  0.2553, -1.0886]],

        [[ 0.2259,  1.1461, -0.0721, -1.0817],
         [ 0.4969, -0.8748, -0.3972,  0.9342]],

        [[ 0.2252, -0.8745, -0.0582,  0.9286],
         [-0.0465, -0.8745,  0.2790,  0.9293]],

        [[-0.3181,  1.1460,  0.6165, -1.0789],
         [-0.0472, -0.8759,  0.2923,  0.9497]],

        [[-0.3192,  1.1445,  0.6364, -1.0570],
   

# Sequence with a single sample

In [0]:
class state_predictor(nn.Module):
  def __init__(self,input_size, hidden_state_size,output_size):
    super(state_predictor, self).__init__()
    
    self.lstm=nn.LSTM(input_size,hidden_state_size)
    self.linear=nn.Linear(hidden_state_size, output_size)

  def forward(self,x,h):
    # h: hidden_state, c=output
    # x= x.view(batch_size,timesteps,embed_size)
    #print(h[0].shape)
    out,(h,c)=self.lstm(x,h)
    #print(out.size())
    #(batch_size*timesteps, hidden_size)
    #out.size(0):batch_size; out.size(1):timesteps, out.size(2): hidden_size
    out=out.reshape(out.size(0)*out.size(1),out.size(2)) # flattens the matrix
    #print('post reshape',out.size())
    # decode hidden states of all time steps
    out= self.linear(out)
    #print(out.size())
    return out, (h,c)

In [0]:
for it in range(10):
    for i in range(0,len(x_train), timesteps):
        if len(x_train) > i + timesteps:
            x = x_train[i:i+timesteps,:]
            y = y_train[i:i+timesteps,:]

            inputs = torch.tensor(x)
            inputs = inputs.view(timesteps,batch_size,-1)

            targets = torch.tensor(y)

            states = (torch.zeros(1,batch_size,hidden_size),
                  torch.zeros(1,batch_size,hidden_size))

            outputs,_ = model(inputs, states)
            loss = loss_fn(outputs, targets) 

            model.zero_grad()
            loss.backward()
        
            clip_grad_norm_(model.parameters(),0.1)
            optimizer.step()
        
            print("Epoch [{}/{}], Loss: {:.4f}".format(i, 10, loss.item()))       

In [0]:
test_input = torch.tensor(x_train[0:timesteps,:])
test_input = test_input.view(timesteps,1,-1)

print(test_input.size())
targets = torch.tensor(y_train[0:timesteps,:])
#targets = targets.view(1,1,-1)
print(targets.size())
    
new_states = (torch.zeros(1,1,1024),
          torch.zeros(1,1,1024))



outputs,_ = model(test_input, new_states)
print('outputs:',outputs)
print('targets:',targets)

torch.Size([5, 1, 6])
torch.Size([5, 4])
torch.Size([1, 1, 1024])
torch.Size([1, 1024])
outputs: tensor([[-0.1105, -1.0144,  0.0497,  0.9935],
        [-0.5606,  1.0792,  0.4549, -1.0844],
        [-0.2443,  0.9342,  0.1536, -0.9413],
        [ 0.3207,  1.0246, -0.3347, -1.0304],
        [ 0.9182,  1.0647, -0.8020, -1.0675]], grad_fn=<AddmmBackward>)
targets: tensor([[-0.1773, -1.0489, -0.0194,  1.0251],
        [-0.6346,  0.9543,  0.3945, -0.9710],
        [-0.1765,  0.9539, -0.0303, -0.9656],
        [ 0.2815,  0.9540, -0.4530, -0.9665],
        [ 0.7394,  0.9544, -0.8760, -0.9737]])


# Batch samples

In [0]:
for it in range(10):
    for mb in range(0,len(x_train), batch_size): # Nawid- Batch size is the step size
        if len(x_train) > mb + batch_size:
            x_mb = x_train[mb:mb+batch_size,:]
            y_mb = y_train[mb:mb+batch_size,:]

            inputs = torch.tensor(x_mb)
            inputs = inputs.view(1,batch_size,-1)

            targets = torch.tensor(y_mb)
            #targets = targets.view(1,batch_size,-1)

            states = (torch.zeros(1,batch_size,hidden_size),
                  torch.zeros(1,batch_size,hidden_size))
    
            outputs,_ = model(inputs, states)
            loss = loss_fn(outputs, targets)

            model.zero_grad()
            loss.backward()
        
            clip_grad_norm_(model.parameters(),0.1)
            optimizer.step()
        

            print("Epoch [{}/{}], Loss: {:.4f}".format(i, 10, loss.item()))





Epoch [160/10], Loss: 0.0048
Epoch [160/10], Loss: 0.0045
Epoch [160/10], Loss: 0.0032
Epoch [160/10], Loss: 0.0074
Epoch [160/10], Loss: 0.0265
Epoch [160/10], Loss: 0.0171
Epoch [160/10], Loss: 0.0080
Epoch [160/10], Loss: 0.0306
Epoch [160/10], Loss: 0.0026
Epoch [160/10], Loss: 0.0029
Epoch [160/10], Loss: 0.0091
Epoch [160/10], Loss: 0.0191
Epoch [160/10], Loss: 0.0045
Epoch [160/10], Loss: 0.0104
Epoch [160/10], Loss: 0.0123
Epoch [160/10], Loss: 0.0030
Epoch [160/10], Loss: 0.0051
Epoch [160/10], Loss: 0.0068
Epoch [160/10], Loss: 0.0064
Epoch [160/10], Loss: 0.0070
Epoch [160/10], Loss: 0.0062
Epoch [160/10], Loss: 0.0089
Epoch [160/10], Loss: 0.0103
Epoch [160/10], Loss: 0.0134
Epoch [160/10], Loss: 0.0015
Epoch [160/10], Loss: 0.0014
Epoch [160/10], Loss: 0.0078
Epoch [160/10], Loss: 0.0183
Epoch [160/10], Loss: 0.0019
Epoch [160/10], Loss: 0.0038
Epoch [160/10], Loss: 0.0033
Epoch [160/10], Loss: 0.0030
Epoch [160/10], Loss: 0.0045
Epoch [160/10], Loss: 0.0039
Epoch [160/10]

In [0]:
test_input = torch.tensor(x_train[1,:])
test_input = test_input.view(1,1,-1)

print(test_input.size())
targets = torch.tensor(y_train[1,:])
targets = targets.view(1,1,-1)
print(targets.size())
    
new_states = (torch.zeros(1,1,1024),
          torch.zeros(1,1,1024))



outputs,_ = model(test_input, new_states)
print('outputs:',outputs)
print('targets:',targets)
    

torch.Size([1, 1, 6])
torch.Size([1, 1, 4])
outputs: tensor([[-0.5882,  0.9145,  0.4274, -0.9325]], grad_fn=<AddmmBackward>)
targets: tensor([[[-0.6346,  0.9543,  0.3945, -0.9710]]])


# SINGLE SAMPLE

In [0]:
for i in range(len(x_train)):
    
    inputs = torch.tensor(x_train[i,:])
    #print(inputs.dtype)
    inputs = inputs.view(1,1,-1)
    
    targets = torch.tensor(y_train[i,:])
    targets = targets.view(1,1,-1)
    #print('targets:', targets.size())
    
    
    states = (torch.zeros(1,1,1024),
              torch.zeros(1,1,1024))

    outputs,_ = model(inputs, states)
    #print('outputs:',outputs.size())
    #print('targets:',targets.size())
    loss = loss_fn(outputs, targets.reshape(1,-1))

    model.zero_grad()
    loss.backward()
    optimizer.step()
        
    clip_grad_norm_(model.parameters(),0.1)
    step = (i+1)
    if step % 10 ==0:
        print("Epoch [{}/{}], Loss: {:.4f}".format(i, 10, loss.item()))
 

Epoch [9/10], Loss: 0.2790
Epoch [19/10], Loss: 0.0826
Epoch [29/10], Loss: 0.0048
Epoch [39/10], Loss: 0.0247
Epoch [49/10], Loss: 0.0596
Epoch [59/10], Loss: 0.0397
Epoch [69/10], Loss: 0.0165
Epoch [79/10], Loss: 0.0847


In [0]:
ENV_NAME = 'CartPole-v0'
env = gym.make(ENV_NAME)
obs = env.reset()
obs_list = []
print(obs.shape)
obs_list.append(obs)
obs_values = np.array([obs for obs in obs_list])
print(obs_values.shape)
action = env.action_space.sample()
action_one_hot = one_hot(action)
print(action_one_hot.shape)
action_list = []
action_list.append(action_one_hot)
action_values = np.array([act for act in action_list])
print(action_values.shape)
state =  np.concatenate((obs_values, action_values),axis =1 )
state.shape

(4,)
(1, 4)
(2,)
(1, 2)


(1, 6)

In [0]:




state = state.reshape(1,1,6)
state_input = torch.tensor(state)
lstm = nn.LSTM(6,6)
state_input = state_input.view(len(state_input),1,-1)
hidden = (torch.randn(1,1,6), torch.randn(1,1,6))
out, hidden = lstm(state_input.float(), hidden)



tensor([[[ 0.0386,  0.1876, -0.0047, -0.1450, -0.0882,  0.2597]]],
       grad_fn=<StackBackward>)


In [0]:
#print(obs) # shows 4 different values
inputs = obs.reshape(1,1,4) # sequence size, batch size, and feature size
inputs = torch.tensor(inputs)
lstm = nn.LSTM(4, 4)  # Input dim is 4, output dim is 4
inputs = inputs.view(len(inputs),1,-1)
#print(inputs.size())
lstm = lstm.float()
hidden = (torch.randn(1,1,4), torch.randn(1,1,4))
out, hidden = lstm(inputs.float(), hidden)
print(hidden)

# Initial practice

In [0]:
lstm = nn.LSTM(3, 3)  # Input dim is 3, output dim is 3
inputs = [torch.randn(1, 3) for _ in range(5)]  # make a sequence of length 5

# initialize the hidden state.
hidden = (torch.randn(1, 1, 3),
          torch.randn(1, 1, 3)) # I believe there is 2 different sets of values since this is a lstm and so there are values for the hidden state as well as well as the cell state
for i in inputs:
    # Step through the sequence one element at a time.
    # after each step, hidden contains the hidden state.
    out, hidden = lstm(i.view(1, 1, -1), hidden)
    # alternatively we can do the entire sequence all at once
    # The first value returned by LSTM is all the hidden states throughout the sequence
    # the second isjust the most recent hidden state



inputs = torch.cat(inputs).view(len(inputs),1,-1)
hidden = (torch.randn(1,1,3), torch.randn(1,1,3)) # reset hidden state
out, hidden  = lstm(inputs, hidden)
print('out:',out)
print('hidden:',hidden)


out: tensor([[[ 3.3281e-04, -3.4894e-01,  2.0808e-01]],

        [[ 1.5697e-01, -6.1789e-02,  1.4116e-01]],

        [[ 1.4011e-01, -1.3284e-01,  7.2157e-02]],

        [[ 2.9385e-02, -1.7298e-01,  2.4809e-02]],

        [[ 1.7907e-01, -2.5394e-01,  3.5012e-02]]], grad_fn=<StackBackward>)
hidden: (tensor([[[ 0.1791, -0.2539,  0.0350]]], grad_fn=<StackBackward>), tensor([[[ 0.6383, -0.4234,  0.0539]]], grad_fn=<StackBackward>))


# Pytorch example

In [0]:
def prepare_sequence(seq, to_ix):
    idxs = [to_ix[w] for w in seq]
    return torch.tensor(idxs, dtype=torch.long)

training_data = [
    ("The dog ate the apple".split(), ["DET", "NN", "V", "DET", "NN"]),
    ("Everybody read that book".split(), ["NN", "V", "DET", "NN"])
]

word_to_ix = {}
for sent, tags in training_data:
    for word in sent: #  looks at the words in the sentence 
        if word not in word_to_ix:
            word_to_ix[word] = len(word_to_ix) #  This adds a new word into the dictionary as well as adding as add a value for it
print(word_to_ix)
tag_to_ix = {"DET": 0, "NN": 1, "V": 2}

# These will usually be more like 32 or 64 dimensional.
# We will keep them small, so we can see how the weights change as we train.
EMBEDDING_DIM = 6
HIDDEN_DIM = 6


class LSTMTagger(nn.Module):
    def __init__(self, embedding_dim, hidden_dim, vocab_size, tagset_size):
        super(LSTMTagger,self).__init__()
        self.hidden_dim = hidden_dim

        self.word_embeddings = nn.Embedding(vocab_size, embedding_dim)

        # The LSTM takes word embeddings as inputs,and outputs hidden states
        # with dimensionality hidden  dim
        self.lstm = nn.LSTM(embedding_dim, hidden_dim)

        # The linear layer that maps from hidden state space to tag space
        self.hidden2tag = nn.Linear(hidden_dim. tagset_size)

    def forward(self, sentence):
        embeds = self.word_embeddings(sentence) # changes the sentence to word embeddings
        lstm_out, _ = self.lstm(embeds.view(len(sentence),1,-1))
        tag_space = self.hidden2tag(lstm_out.view(len(sentence),-1))
        tag_scores = F.log_softmax(tag_space, dim=1)
        return tag_scores        

{'The': 0, 'dog': 1, 'ate': 2, 'the': 3, 'apple': 4, 'Everybody': 5, 'read': 6, 'that': 7, 'book': 8}


In [0]:
model = LSTMTagger(EMBEDDING_DIM, HIDDEN_DIM, len(word_to_ix), len(tag_to_ix))
loss_function = nn.NLLLoss()
optimizer = optim.SGB(model.parameters(), lr = 0.1)

with torch.no_grad():
    inputs = prepare_sequence(training_data[0][0], word_to_ix)
    tag_scores = model(inputs)
    print(tag_scores)

for epoch in range(300):
    for sentence, tags in training_data:
        # Step 1. Remember that Pytorch accumulates gradients
        # we need to clear them out before each instance
        model.zero_grad()

        # Step 2. get our inputs ready for the network, that is, turn them into tensors of word indices
        # Tensors of word indices
        sentence_in = prepare_sequence(sentence, word_to_ix)
        targets = prepare_sequence(tags, tag_to_ix)

        # Step 3. Run our forward pass
        tag_scores = model(sentence_in)

        #Step 4. Compute the loss, gradients, and update the parameters by calling optimizer.step()
        loss = loss_function(tag_scores, targets)
        loss.backward()
        optimizer.step()

AttributeError: ignored