In [1]:
import numpy as np
import pandas as pd
import random
from IPython import display
from collections import namedtuple, deque
import time
from tqdm import tqdm
import matplotlib.pyplot as plt
%matplotlib inline
""
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
device = torch.device("cpu")
# use cpu run
import gym

In [2]:
def dict2array(state):
    new_state = []
    for key  in state.keys():
        if key != 'sw':
            new_state.append(state[key])
        else:
            new_state += list(state['sw'])        
    state = np.asarray(new_state)
    return state

In [3]:
def dict2array_partial(state):
    new_state = []
    num_observable_states = 0
    for key  in state.keys():
        if key != 'sw':
            if key == 'cumsumfert':
                new_state.append(state[key])
            if key == 'dap':
                new_state.append(state[key])
            if key == 'dtt':
                new_state.append(state[key])
            if key == 'istage':
                new_state.append(state[key])
            if key == 'pltpop':
                new_state.append(state[key])
            if key == 'rain':
                new_state.append(state[key])
            if key == 'srad':
                new_state.append(state[key])
            if key == 'tmax':
                new_state.append(state[key])
            if key == 'tmin':
                new_state.append(state[key])
            if key == 'vstage':
                new_state.append(state[key])
            if key == 'xlai':
                new_state.append(state[key])
        else:
            new_state += list(state['sw'])
    state = np.asarray(new_state)
    return state

In [4]:
env_args = {
    'run_dssat_location': '/opt/dssat_pdi/run_dssat',  # assuming (modified) DSSAT has been installed in /opt/dssat_pdi
    'log_saving_path': './logs/dssat-pdi.log',  # if you want to save DSSAT outputs for inspection
    # 'mode': 'irrigation',  # you can choose one of those 3 modes
    # 'mode': 'fertilization',
    'mode': 'all',
    'seed': 123456,
    'random_weather': False,  # if you want stochastic weather
}
env = gym.make('gym_dssat_pdi:GymDssatPdi-v0', **env_args)
print('Observation:',env.observation,)
print(len(env.observation),len(env.observation['sw']))
ram_dimensions = 20
nb_actions = 25
print('\nRam information received from DASSAT will has %d dimensions.' % ram_dimensions)
print('There are %d possible actions at each step.' % nb_actions)
print('Discrete?',type(gym.spaces)== gym.spaces.Discrete)
# observation has 27 elements, 9 values in soil water
# state size = 27+8 dimension
# how to defind nb_action? why is 200?

Observation: {'cleach': 0.0, 'cnox': 0.0, 'cumsumfert': 0.0, 'dap': 0, 'dtt': 0.0, 'es': 0.0, 'grnwt': 0.0, 'istage': 7, 'nstres': 0.0, 'pcngrn': 0.0, 'pltpop': 7.199999809265137, 'rain': 0.0, 'rtdep': 0.0, 'runoff': 0.0, 'srad': 13.300000190734863, 'sw': array([0.086     , 0.086     , 0.086     , 0.086     , 0.086     ,
       0.076     , 0.076     , 0.13      , 0.25799999]), 'swfac': 0.0, 'tleachd': 0.0, 'tmax': 22.200000762939453, 'tmin': 3.299999952316284, 'tnoxd': 0.0, 'topwt': 0.0, 'trnu': 0.0, 'vstage': 0.0, 'wtdep': 0.0, 'wtnup': 0.0, 'xlai': 0.0}
27 9

Ram information received from DASSAT will has 20 dimensions.
There are 25 possible actions at each step.
Discrete? False


In [5]:
class Full_QNetwork(nn.Module):
    """Agent (Policy) Model."""
    # given a state of 35 dim, Qnetwork will return 200 values for each possible action  

    def __init__(self, state_size, action_size, fc1_units=128*2,fc2_units=128*2,fc3_units=128*2):
        """Initialize parameters and build model.
        Params
        ======
            state_size (int): Dimension of each state
            action_size (int): Dimension of each action
            fc1_units (int): Number of nodes in first hidden layer
            why is it 256? randomly?
        """
        super(Full_QNetwork, self).__init__()
        self.fc1 = nn.Linear(state_size, fc1_units)
        self.fc2 = nn.Linear(fc1_units, fc2_units)
        self.fc3 = nn.Linear(fc2_units, fc3_units)
        self.fc4 = nn.Linear(fc3_units, action_size)
        # set a nn with 1 layer
        
    def forward(self, state):
        """Build a network that maps state -> action values."""
        x = F.relu(self.fc1(state))
        y = F.relu(self.fc2(x))
        z = F.relu(self.fc3(y))
        #Applies the rectified linear unit function element-wise. max(0,x)
        return self.fc4(z)

In [6]:
trained_model = Full_QNetwork(35, 25)
trained_model.load_state_dict(torch.load('/home/rant3/focal/Model/FL Economic/1.pth'))

<All keys matched successfully>

In [7]:
class Full_Agent():
    """Interacts with and learns from the environment."""

    def __init__(self, state_size, action_size, trained_model):
        """Initialize an Agent object.
        Params
        ======
            state_size (int): dimension of each state
            action_size (int): dimension of each action
        """
        self.state_size = state_size
        self.action_size = action_size
        # Q-Network
        self.qnetwork = trained_model
        #self.qnetwork_local = QNetwork(state_size, action_size).to(device)

        # Replay memory

    def act(self, state, eps=0.):
        """Returns actions for given state as per current policy.
        Params
        ======
            state (array_like): current state
            eps (float): epsilon, for epsilon-greedy action selection
        """
        state = torch.from_numpy(state).float().unsqueeze(0).to(device)
        self.qnetwork.eval()
        with torch.no_grad():
            action_values = self.qnetwork(state)
        self.qnetwork.train()

#         Epsilon-greedy action selection
        if random.random() > eps:
            return np.argmax(action_values.cpu().data.numpy())
        else:
            return random.choice(np.arange(self.action_size))

#         Epsilon-greedy action selection
#         if random.random() > eps:
#             return np.argmax(action_values.cpu().data.numpy())
#         else:
#             return random.choice(np.arange(self.action_size))
# #         return action_values.cpu().data.nump
    def get(self,state):
        self.qnetwork.eval()
        with torch.no_grad():
            output= self.qnetwork(state)
        return output

In [8]:
full_agent= Full_Agent(35,25,trained_model)

In [9]:
import deepdish as dd
def get_memory(episodes):
    memory=[]
    for i in range(episodes):
        env.reset()
        state=env.observation
        full_state = dict2array(state)
        par_state = dict2array_partial(state)
        for t in range(500):
            #action1 = full_agent.act(full_state,0.1)
            action2 = full_agent.act(full_state,0)
            a = np.array([[float((action2%5)*40),float(int(action2/5)*6)]])
            #if action!= 0:
            #    memory.append(state)
            #action1 = full_agent.act(state,0.1)
            action = {
                    'anfer': (action2%5)*40,  # if mode == fertilization or mode == all ; nitrogen to fertilize in kg/ha
                    'amir': int(action2/5)*6,  # if mode == irrigation or mode == all ; water to irrigate in L/ha
            }
            if action2 == 5:
                print(action2)
                memory.append((full_state,par_state,torch.tensor(a)))
                memory.append((full_state,par_state,torch.tensor(a)))
                memory.append((full_state,par_state,torch.tensor(a)))
                memory.append((full_state,par_state,torch.tensor(a)))
                memory.append((full_state,par_state,torch.tensor(a)))
            elif action2 == 6:
                print(action2)
                for j in range(15):
                    memory.append((full_state,par_state,torch.tensor(a)))
            else:
                memory.append((full_state,par_state,torch.tensor(a)))
            next_state, reward, done, _ = env.step(action)
            if done:
                #print(i)
                break
            state = next_state
            full_state = dict2array(state)
            par_state = dict2array_partial(state)
    return memory

In [10]:
memory=get_memory(1)
dd.io.save('fix_memory_2.h5', memory)

5
6
6
6
5
6
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
6
6


In [11]:
memory = dd.io.load('fix_memory_2.h5')
print(len(memory))

319


In [12]:
for i in range(20):
    sp=random.choice(memory)
    print(sp[2])

tensor([[40.,  6.]], dtype=torch.float64)
tensor([[0., 0.]], dtype=torch.float64)
tensor([[0., 0.]], dtype=torch.float64)
tensor([[0., 0.]], dtype=torch.float64)
tensor([[0., 6.]], dtype=torch.float64)
tensor([[0., 0.]], dtype=torch.float64)
tensor([[40.,  6.]], dtype=torch.float64)
tensor([[40.,  6.]], dtype=torch.float64)
tensor([[0., 0.]], dtype=torch.float64)
tensor([[0., 0.]], dtype=torch.float64)
tensor([[0., 0.]], dtype=torch.float64)
tensor([[0., 0.]], dtype=torch.float64)
tensor([[0., 0.]], dtype=torch.float64)
tensor([[40.,  6.]], dtype=torch.float64)
tensor([[0., 0.]], dtype=torch.float64)
tensor([[40.,  6.]], dtype=torch.float64)
tensor([[0., 0.]], dtype=torch.float64)
tensor([[40.,  6.]], dtype=torch.float64)
tensor([[0., 6.]], dtype=torch.float64)
tensor([[0., 0.]], dtype=torch.float64)
