In [1]:
%matplotlib inline

# Nästa gång:
# Välja ut passande input
# Exportera model
# importera model
# träna på mer data

In [133]:
import os
import json
from collections import namedtuple

import numpy as np
from scipy import sparse

from tqdm import tqdm

class BatchEnv(object):
    def __init__(self):
        pass

    def init(self, path, root, race, enemy_race, step_mul=8, n_replays=4, n_steps=5, epochs=10, seed=None):
        np.random.seed(seed)

        with open(path) as f:
            replays = json.load(f)

        self.replays = self.__generate_replay_list__(replays, root, race)

        self.race = race
        self.enemy_race = enemy_race

        self.step_mul = step_mul
        self.n_replays = n_replays
        self.n_steps = n_steps

        self.epochs = epochs
        self.epoch = -1
        self.steps = 0

        self.replay_idx = -1
        self.replay_list = [None for _ in range(self.n_replays)]
        
        """
        ## Display Progress Bar
        self.epoch_pbar = tqdm(total=self.epochs, desc='Epoch')
        self.replay_pbar = None
        """

        self.__post_init__()

    def __generate_replay_list__(self, replays, race):
        raise NotImplementedError

    def __init_epoch__(self):
        self.epoch += 1
        """
        if self.epoch > 0:
            self.epoch_pbar.update(1)
        """
        if self.epoch == self.epochs:
            return False

        np.random.shuffle(self.replays)
        """
        ## Display Progress Bar
        if self.replay_pbar is not None:
            self.replay_pbar.close()
        self.replay_pbar = tqdm(total=len(self.replays), desc='  Replays')
        """
        return True

    def __reset__(self):
        self.replay_idx += 1
        if self.replay_idx % len(self.replays) == 0:
            has_more = self.__init_epoch__()
            if not has_more:
                return None

        path = self.replays[self.replay_idx%len(self.replays)]

        return self.__load_replay__(path)

    def __load_replay__(self, path):
        raise NotImplementedError

    def step(self, **kwargs):
        require_init = [False for _ in range(self.n_replays)]
        for i in range(self.n_replays):
            if self.replay_list[i] is None or self.replay_list[i]['done']:
                if self.replay_list[i] is not None:
                    keys = set(self.replay_list[i].keys())
                    for k in keys:
                        del self.replay_list[i][k]
                self.replay_list[i] = self.__reset__()
                require_init[i] = True
            if self.replay_list[i] is None:
                return None

        result = []
        for step in range(self.n_steps):
            result_per_step = []
            for i in range(self.n_replays):
                replay_dict = self.replay_list[i]

                features = self.__one_step__(replay_dict, replay_dict['done'])

                result_per_step.append(features)

            result.append(result_per_step)

        return self.__post_process__(result, **kwargs), require_init

    def __one_step__(self, replay_dict, done):
        raise NotImplementedError

    def __post_process__(self, result, **kwargs):
        raise NotImplementedError

    def step_count(self):
        return self.steps

    def close(self):
        """
        if self.epoch_pbar is not None:
            self.epoch_pbar.close()
        if self.replay_pbar is not None:
            self.replay_pbar.close()
        """
class BatchGlobalFeatureEnv(BatchEnv):
    n_features_dic = {'Terran':  {'Terran': 738,  'Protoss': 648,  'Zerg': 1116},
                      'Protoss': {'Terran': 638,  'Protoss': 548,  'Zerg': 1016},
                      'Zerg':    {'Terran': 1106, 'Protoss': 1016, 'Zerg': 1484}}
    n_actions_dic = {'Terran': 75, 'Protoss': 61, 'Zerg': 74}

    def __post_init__(self):
        self.n_features = self.n_features_dic[self.race][self.enemy_race]
        self.n_actions = self.n_actions_dic[self.race]

    def __generate_replay_list__(self, replays, root, race):
        result = []
        for path_dict in replays:
            for player_path in path_dict[race]:
                result.append(os.path.join(root, player_path['global_path']))

        return result

    def __load_replay__(self, path):
        replay_dict = {}
        replay_dict['ptr'] = 0
        replay_dict['done'] = False
        replay_dict['states'] = np.asarray(sparse.load_npz(path).todense())

        return replay_dict

    def __one_step__(self, replay_dict, done):
        states = replay_dict['states']
        feature_shape = states.shape[1:]
        if done:
            return np.zeros(feature_shape)

        self.steps += 1
        state = states[replay_dict['ptr']]
        replay_dict['ptr'] += 1
        if replay_dict['ptr'] == states.shape[0]:
            #self.replay_pbar.update(1)
            replay_dict['done'] = True

        return state

    def __post_process__(self, result, reward=True, action=False, score=False):
        result = np.asarray(result)

        result_return = [result[:, :, 15:]]
        if reward:
            result_return.append(result[:, :, 0:1])
        if action:
            result_return.append(result[:, :, 1:2])
        if score:
            result_return.append(result[:, :, 2:15])

        return result_return



In [28]:
import torch
import torch.nn as nn
import torch.nn.functional as F

#Input
#

class Net(nn.Module):
    def __init__(self, ):
        super(Net, self).__init__()
        self.fc1 = nn.Linear(1, 2)
        self.fc2 = nn.Linear(2, 78)

    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        return x

net = Net()
print(net)

Net(
  (fc1): Linear(in_features=1, out_features=2, bias=True)
  (fc2): Linear(in_features=2, out_features=78, bias=True)
)


In [10]:
import torch.optim as optim

criterion = nn.MSELoss()
optimizer = optim.Adam(net.parameters(), lr=0.001)

In [163]:
import torch
# Require grad?!?
# weight ?!?!?

def train(model, env):
    optimizer = optim.Adam(model.parameters(), lr=0.001)
    env_return = env.step(reward=True, action=True)
    
    if env_return is not None:
        (states, reward, actions_gt), require_init = env_return
        
    states = torch.from_numpy(states).float()
    actions_gt = torch.from_numpy(actions_gt).long().view(1)
    reward = torch.from_numpy(reward).float().view(-1, 1)
    actions = model(reward)
    
    print(actions_gt.size())
    
    
    loss = 0
    loss += F.cross_entropy(actions, actions_gt)
    print(loss.grad_fn)

    
    optimizer.zero_grad()
    
    loss.backward()
    
    #print(model.fc1.weight)
    
    optimizer.step()
    
    #print(model.fc1.weight)
    
model = Net()
replay_path = ''
dataset_path = ''
race = 'Terran'
enemy_race = 'Terran'
#steps = 20 # ?
n_replays = 1
epochs = 1

path = 'train_val_test/Terran_vs_Terran/train.json'
phrase = 'train'

env = BatchGlobalFeatureEnv()
env.init(path, './', race, enemy_race, n_replays=1, n_steps=1)

train(model, env)

238
torch.Size([1])
<AddBackward0 object at 0x0000024040573588>
Parameter containing:
tensor([[0.5886],
        [0.0722]], requires_grad=True)
Parameter containing:
tensor([[0.6886],
        [0.1722]], requires_grad=True)
