### 1. Import packages

In [1]:
import deepdish as dd
fix_memory = dd.io.load('fix_memory.h5')
small_memory=dd.io.load('small_memory.h5')

In [2]:
import wandb
wandb.login()

wandb: Currently logged in as: rant3. Use `wandb login --relogin` to force relogin


True

In [3]:
import numpy as np
import pandas as pd
import random
from IPython import display
from collections import namedtuple, deque
import time
from tqdm import tqdm
import matplotlib.pyplot as plt
%matplotlib inline
""
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
device = torch.device("cpu")
# use cpu run
import gym

### 2. Helper functions

In [4]:
def dict2array(state):
    new_state = []
    for key  in state.keys():
        if key != 'sw':
            new_state.append(state[key])
        else:
            new_state += list(state['sw'])        
    state = np.asarray(new_state)
    return state

In [5]:
def dict2array_partial(state):
    new_state = []
    num_observable_states = 0
    for key  in state.keys():
        if key != 'sw':
            if key == 'cumsumfert':
                new_state.append(state[key])
            if key == 'dap':
                new_state.append(state[key])
            if key == 'dtt':
                new_state.append(state[key])
            if key == 'istage':
                new_state.append(state[key])
            if key == 'pltpop':
                new_state.append(state[key])
            if key == 'rain':
                new_state.append(state[key])
            if key == 'srad':
                new_state.append(state[key])
            if key == 'tmax':
                new_state.append(state[key])
            if key == 'tmin':
                new_state.append(state[key])
            if key == 'vstage':
                new_state.append(state[key])
            if key == 'xlai':
                new_state.append(state[key])
        else:
            new_state += list(state['sw'])
    state = np.asarray(new_state)
    return state

### 3. Initialize the environment

In [6]:
env_args = {
    'run_dssat_location': '/opt/dssat_pdi/run_dssat',  # assuming (modified) DSSAT has been installed in /opt/dssat_pdi
    'log_saving_path': './logs/dssat-pdi.log',  # if you want to save DSSAT outputs for inspection
    # 'mode': 'irrigation',  # you can choose one of those 3 modes
    # 'mode': 'fertilization',
    'mode': 'all',
    'seed': 123456,
    'random_weather': False,  # if you want stochastic weather
}
env = gym.make('gym_dssat_pdi:GymDssatPdi-v0', **env_args)
print('Observation:',env.observation,)
print(len(env.observation),len(env.observation['sw']))
ram_dimensions = 20
nb_actions = 25
print('\nRam information received from DASSAT will has %d dimensions.' % ram_dimensions)
print('There are %d possible actions at each step.' % nb_actions)
print('Discrete?',type(gym.spaces)== gym.spaces.Discrete)
# observation has 27 elements, 9 values in soil water
# state size = 27+8 dimension
# how to defind nb_action? why is 200?

Observation: {'cleach': 0.0, 'cnox': 0.0, 'cumsumfert': 0.0, 'dap': 0, 'dtt': 0.0, 'es': 0.0, 'grnwt': 0.0, 'istage': 7, 'nstres': 0.0, 'pcngrn': 0.0, 'pltpop': 7.199999809265137, 'rain': 0.0, 'rtdep': 0.0, 'runoff': 0.0, 'srad': 13.300000190734863, 'sw': array([0.086     , 0.086     , 0.086     , 0.086     , 0.086     ,
       0.076     , 0.076     , 0.13      , 0.25799999]), 'swfac': 0.0, 'tleachd': 0.0, 'tmax': 22.200000762939453, 'tmin': 3.299999952316284, 'tnoxd': 0.0, 'topwt': 0.0, 'trnu': 0.0, 'vstage': 0.0, 'wtdep': 0.0, 'wtnup': 0.0, 'xlai': 0.0}
27 9

Ram information received from DASSAT will has 20 dimensions.
There are 25 possible actions at each step.
Discrete? False


### 4. Define the network

In [7]:
class Full_QNetwork(nn.Module):
    """Agent (Policy) Model."""
    # given a state of 35 dim, Qnetwork will return 200 values for each possible action  

    def __init__(self, state_size, action_size, fc1_units=128*2,fc2_units=128*2,fc3_units=128*2):
        """Initialize parameters and build model.
        Params
        ======
            state_size (int): Dimension of each state
            action_size (int): Dimension of each action
            fc1_units (int): Number of nodes in first hidden layer
            why is it 256? randomly?
        """
        super(Full_QNetwork, self).__init__()
        self.fc1 = nn.Linear(state_size, fc1_units)
        self.fc2 = nn.Linear(fc1_units, fc2_units)
        self.fc3 = nn.Linear(fc2_units, fc3_units)
        self.fc4 = nn.Linear(fc3_units, action_size)
        # set a nn with 1 layer
        
    def forward(self, state):
        """Build a network that maps state -> action values."""
        x = F.relu(self.fc1(state))
        y = F.relu(self.fc2(x))
        z = F.relu(self.fc3(y))
        #Applies the rectified linear unit function element-wise. max(0,x)
        return self.fc4(z)

In [8]:
trained_model = Full_QNetwork(35, 25)
trained_model.load_state_dict(torch.load('/home/rant3/focal/Model/FL BOTH Full/1.pth'))

<All keys matched successfully>

In [9]:
class Full_Agent():
    """Interacts with and learns from the environment."""

    def __init__(self, state_size, action_size, trained_model):
        """Initialize an Agent object.
        Params
        ======
            state_size (int): dimension of each state
            action_size (int): dimension of each action
        """
        self.state_size = state_size
        self.action_size = action_size
        # Q-Network
        self.qnetwork = trained_model
        #self.qnetwork_local = QNetwork(state_size, action_size).to(device)

        # Replay memory

    def act(self, state, eps=0.):
        """Returns actions for given state as per current policy.
        Params
        ======
            state (array_like): current state
            eps (float): epsilon, for epsilon-greedy action selection
        """
        state = torch.from_numpy(state).float().unsqueeze(0).to(device)
        self.qnetwork.eval()
        with torch.no_grad():
            action_values = self.qnetwork(state)
        self.qnetwork.train()

#         Epsilon-greedy action selection
        if random.random() > eps:
            return np.argmax(action_values.cpu().data.numpy())
        else:
            return random.choice(np.arange(self.action_size))

#         Epsilon-greedy action selection
#         if random.random() > eps:
#             return np.argmax(action_values.cpu().data.numpy())
#         else:
#             return random.choice(np.arange(self.action_size))
# #         return action_values.cpu().data.nump
    def get(self,state):
        self.qnetwork.eval()
        with torch.no_grad():
            output= self.qnetwork(state)
        return output

In [10]:
class Partial_QNetwork(nn.Module):
    """Agent (Policy) Model."""
    # given a state of 35 dim, Qnetwork will return 200 values for each possible action  

    def __init__(self, state_size, action_size, fc1_units=128*2,fc2_units=128*2,fc3_units=128*2):
        """Initialize parameters and build model.
        Params
        ======
            state_size (int): Dimension of each state
            action_size (int): Dimension of each action
            fc1_units (int): Number of nodes in first hidden layer
            why is it 256? randomly?
        """
        super(Partial_QNetwork, self).__init__()
        self.fc1 = nn.Linear(state_size, fc1_units)
        self.fc2 = nn.Linear(fc1_units, fc2_units)
        self.fc3 = nn.Linear(fc2_units, fc3_units)
        self.fc4 = nn.Linear(fc3_units, action_size)
        # set a nn with 1 layer
        
    def forward(self, state):
        """Build a network that maps state -> action values."""
        x = F.relu(self.fc1(state))
        y = F.relu(self.fc2(x))
        z = F.relu(self.fc3(y))
        #Applies the rectified linear unit function element-wise. max(0,x)
        return 4*torch.sigmoid(self.fc4(z))*torch.tensor([[float(40),float(6)]])

In [11]:
class Agent():
    """Interacts with and learns from the environment."""

    def __init__(self, state_size, action_size, LR):
        """Initialize an Agent object.
        Params
        ======
            state_size (int): dimension of each state
            action_size (int): dimension of each action
        """
        self.state_size = state_size
        self.action_size = action_size
        # Q-Network
        self.qnetwork = Partial_QNetwork(state_size, action_size).to(device)
        self.optimizer = optim.Adam(self.qnetwork.parameters(), lr=LR)
        self.targetwork = trained_model
    
    
    def learn_1(self,sp,input1):
        sp=torch.from_numpy(sp).float().unsqueeze(0).to(device)
        input2=self.qnetwork(sp)
        input2=input2.type(torch.float64)
        print('input1 is', input1)
        print('input2 is', input2)
        #if round(input2.item())!= 0:
        #    print('input1 is', input1.item())
        #    print('input2 is', round(input2.item()))
        #    print(episode)
        print('')
        loss = F.mse_loss(input1, input2)
        wandb.log({"loss": loss.item()})
        # Minimize the loss
        self.optimizer.zero_grad()
        loss.backward()
        # after this, the parameter of local network will change based on gradient descent
        for param in self.qnetwork.parameters():
            param.grad.data.clamp_(-1, 1)
        # stabilize traning to keep grad between (-1,1)
        self.optimizer.step()
        
    def learn_2(self,memory,batch_size):
        sample=random.sample(fix_memory,batch_size)
        input1=sample[0][2]
        #print(input1)
        state=torch.from_numpy(sample[0][1]).float().unsqueeze(0)
        input2=self.qnetwork(state)
        input2=input2.type(torch.float64)
        for i in range(1,batch_size):
            input1=torch.vstack((input1,sample[i][2]))
            state=torch.from_numpy(sample[i][1]).float().unsqueeze(0).to(device)
            v=self.qnetwork(state)
            v=v.type(torch.float64)
            input2=torch.vstack((input2,v))
        loss = F.mse_loss(input1, input2)
        wandb.log({"loss": loss.item()})
        # Minimize the loss
        self.optimizer.zero_grad()
        loss.backward()
        # after this, the parameter of local network will change based on gradient descent
        for param in self.qnetwork.parameters():
            param.grad.data.clamp_(-1, 1)
        # stabilize traning to keep grad between (-1,1)
        self.optimizer.step()
    
    def save(self,name):
        torch.save(self.qnetwork.state_dict(),'/home/rant3/focal/model'+name+'.pth')

In [12]:
partial_agent = Agent(state_size=ram_dimensions, action_size=2, LR = 1e-5)

In [13]:
def super_partial(episodes,memory,batch_size,exp=1):
    wandb.init(
      # Set the project where this run will be logged
      project="Super Partial two output new", 
      # We pass a run name (otherwise it’ll be randomly assigned, like sunshine-lollypop-10)
      name=f"experiment_{exp}",)
    loss_window=deque(maxlen=1000)
    loss_list=[]
    for i in range(1,episodes+1):
        #sp = random.choice(memory)
        #print(sp)
        #par_state=sp[1]
        #full_sp = torch.from_numpy(full_sp).float().unsqueeze(0).to(device)
        #input1=sp[2]
        #input1=torch.tensor([[float(0)]])
        partial_agent.learn_2(memory,batch_size)
        #print('loss is',loss)
        #loss_list.append(loss.item())
        #loss_window.append(loss.item())
        #if i % 1000 == 0:
            #print('Average loss is', np.mean(loss_window))
        if i>80000:
            if i%1000==0:
                partial_agent.save(str(i))
            #partial_agent.save(str(i))

In [14]:
super_partial(90000,fix_memory,10)

In [15]:
a=torch.tensor([[float(1),float(2)]])
b=torch.tensor([[float(3),float(4)]])
d=torch.tensor(np.vstack((a,b)))n
print(c)

SyntaxError: invalid syntax (899350283.py, line 3)

In [None]:
a=torch.tensor([float(5),float(2)])
b=torch.tensor([float(8),float(4)])
f=torch.tensor(np.vstack((a,b)))
print(f)

In [None]:
loss=F.mse_loss(d,f)
print(loss)

In [26]:
a=torch.tensor([float(1),float(1)])
f=torch.tensor(torch.vstack((a,a)))
for i in range(9):
    f=torch.tensor(torch.vstack((f,a)))
b=torch.tensor([float(-1),float(-1)])
g=torch.tensor(torch.vstack((b,b)))
for i in range(9):
    g=torch.tensor(torch.vstack((g,b)))
print(f)
print(g)
print(F.mse_loss(f,g))

tensor([[1., 1.],
        [1., 1.],
        [1., 1.],
        [1., 1.],
        [1., 1.],
        [1., 1.],
        [1., 1.],
        [1., 1.],
        [1., 1.],
        [1., 1.],
        [1., 1.]])
tensor([[-1., -1.],
        [-1., -1.],
        [-1., -1.],
        [-1., -1.],
        [-1., -1.],
        [-1., -1.],
        [-1., -1.],
        [-1., -1.],
        [-1., -1.],
        [-1., -1.],
        [-1., -1.]])
tensor(4.)


  f=torch.tensor(torch.vstack((a,a)))
  f=torch.tensor(torch.vstack((f,a)))
  g=torch.tensor(torch.vstack((b,b)))
  g=torch.tensor(torch.vstack((g,b)))
