In [26]:
import numpy as np
import torch
import torch.nn as nn
from torch.optim import Adam
from tensorboardX import SummaryWriter
import tqdm
import os
import uuid
import random

use_cuda = torch.cuda.is_available()
FloatTensor = torch.cuda.FloatTensor if use_cuda else torch.FloatTensor
LongTensor = torch.cuda.LongTensor if use_cuda else torch.LongTensor
IntTensor = torch.cuda.IntTensor if use_cuda else torch.IntTensor
ByteTensor = torch.cuda.ByteTensor if use_cuda else torch.ByteTensor
Tensor = FloatTensor

unique_id = str(uuid.uuid4())

def weights_initialize(module):
    if type(module) == nn.Linear:
        nn.init.xavier_uniform_(module.weight, gain=nn.init.calculate_gain('relu'))
        module.bias.data.fill_(0.01)
        
class _TransModel(nn.Module):
    """ Model for DQN """

    def __init__(self, input_len, output_len):
        super(_TransModel, self).__init__()
        
        self.fc1 = nn.Sequential(
            torch.nn.Linear(input_len, 512),
            torch.nn.BatchNorm1d(512),
            nn.ReLU()
        )
        self.fc1.apply(weights_initialize)
        
        self.fc2 = nn.Sequential(
            torch.nn.Linear(512, 128),
            torch.nn.BatchNorm1d(128),
            nn.ReLU()
        )
        self.fc2.apply(weights_initialize)
        
        self.output_layer = nn.Sequential(
            torch.nn.Linear(128, output_len)
        )
        self.output_layer.apply(weights_initialize)
        
    def forward(self, input):
        x = self.fc1(input)
        x = self.fc2(x)
        
        return self.output_layer(x)

class TransModel():
    def __init__(self, input_len, ouput_len, learning_rate = 0.0001):
        self.model = _TransModel(input_len, ouput_len)
        
        if use_cuda:
            print("Using GPU")
            self.model = self.model.cuda()
        else:
            print("Using CPU")
        self.steps = 0
        # self.model = nn.DataParallel(self.model)
        self.optimizer = Adam(self.model.parameters(), lr = learning_rate)
        self.loss_fn = nn.MSELoss(reduction='mean')
        
        self.steps = 0
        
    def predict(self, input, steps, learning):
        
        output = self.model(input).squeeze(1)
        #reward, next_state = output[0], output[1:]

        return output

    def predict_batch(self, input):
        output = self.model(input)
        #reward, next_state = output[:, 0], output[:, 1:]
        return output

    def fit(self, state, target_state):
        loss = self.loss_fn(state, target_state)

        self.optimizer.zero_grad()
        loss.backward()
        self.optimizer.step()
        self.steps += 1
        return loss
    
    def save(self):
        cwd = os.getcwd()
        path = cwd + '/models'
        if not os.path.exists(path):
            os.makedirs(path, exist_ok=True)
        file_path = path + '/NEXUS_' + unique_id + '.pt'
        torch.save(self.model.state_dict(), file_path)

In [42]:
a_b_m = 0
a_b_v = 1
a_b_c = 2
a_b_p = 3
a_nex = 4
e_b_m = 5
e_b_v = 6
e_b_c = 7
e_b_p = 8
e_nex = 9
a_mnrl = 10
a_u_m = 11
a_u_v = 12
a_u_c = 13
e_u_m = 14
e_u_v = 15
e_u_c = 16
a_rwd = 17 # only in second column of data so data[i][1], NOT data[i][0]

data_1 = torch.load('100000_random_v_random.pt')

data_2 = torch.load('60000_sadq_v_random.pt')
print(len(data_1[0][0]))
print(len(data_2[0][0]))
print(len(data_1[0][1]))
print(len(data_2[0][1]))

data_1_games = []
data_2_games = []
data_1_games.append(0)
data_2_games.append(0)

for i in range(len(data_1)):
    if (((data[i][0][a_b_m]) + (data[i][0][a_b_v]) + (data[i][0][a_b_c]) + (data[i][0][a_b_p]) + 
         (data[i][0][e_b_m]) + (data[i][0][e_b_v]) + (data[i][0][e_b_c]) + (data[i][0][e_b_p])) < 
        ((data[i-1][0][a_b_m]) + (data[i-1][0][a_b_v]) + (data[i-1][0][a_b_c]) + (data[i-1][0][a_b_p]) + 
         (data[i-1][0][e_b_m]) + (data[i-1][0][e_b_v]) + (data[i-1][0][e_b_c]) + (data[i-1][0][e_b_p]))):
        data_1_games.append(i)

for i in range(len(data_2)):
    if (((data[i][0][a_b_m]) + (data[i][0][a_b_v]) + (data[i][0][a_b_c]) + (data[i][0][a_b_p]) + 
         (data[i][0][e_b_m]) + (data[i][0][e_b_v]) + (data[i][0][e_b_c]) + (data[i][0][e_b_p])) < 
        ((data[i-1][0][a_b_m]) + (data[i-1][0][a_b_v]) + (data[i-1][0][a_b_c]) + (data[i-1][0][a_b_p]) + 
         (data[i-1][0][e_b_m]) + (data[i-1][0][e_b_v]) + (data[i-1][0][e_b_c]) + (data[i-1][0][e_b_p]))):
        data_2_games.append(i)


data = data_1 + data_2

17
17
18
18


In [43]:
np.set_printoptions(suppress=True)
l = len(data)

for i in range(0, len(data)):
    data[i][1] = [data[i][1][4] / 2000 , data[i][1][9] / 2000 ]
    
    data[i][0][0:4] = np.true_divide( data[i][0][0:4], 30) # Normalize P1 buildings
    data[i][0][5:9] = np.true_divide( data[i][0][5:9], 30) # Normalize P2 buildings
    data[i][0][4] = data[i][0][4] / 2000 # Normalize P1 Nexus HP
    data[i][0][9] = data[i][0][9] / 2000 # Normalize P2 Nexus HP
    data[i][0][10] = data[i][0][10] / 1500 # Normalize P1 Minerals
    data[i][0][11:] = np.true_divide(data[i][0][11:], 60) # Normalize both Player's units on the field


print(data[0][0], data[0][1])

[0.         0.         0.         0.03333333 1.         0.
 0.         0.         0.03333333 1.         0.15       0.
 0.         0.         0.         0.         0.        ] [1.0, 1.0]


In [45]:
import copy

print(len(data))
oneTenthData1 = len(data_1) // 10
oneTenthData2 = len(data_2) // 10

idx_1 = random.sample(range(len(data_1_games)), len(data_1_games))
idx_2 = random.sample(range(len(data_2_games)), len(data_2_games))

test_data_1 = []
test_data_2 = []
train_data = []

for i in range(len(idx_1)):
    where_game = idx_1[i]
    grab_game = data_1_games[where_game]
    
    if where_game + 1 >= len(data_1_games):
        end_grab_game = len(data_1)
    else:
        end_grab_game = data_1_games[where_game + 1]
    
    for k in range(end_grab_game - grab_game):
        if len(test_data_1) < oneTenthData1:
            test_data_1.append(copy.deepcopy(data[grab_game + k]))
        else:
            train_data.append(copy.deepcopy(data[grab_game + k]))

for i in range(len(idx_2)):
    where_game = idx_2[i]
    grab_game = data_2_games[where_game]
    
    if where_game + 1 >= len(data_2_games):
        end_grab_game = len(data_2)
    else:
        end_grab_game = data_2_games[where_game + 1]
    
    for k in range(end_grab_game - grab_game):
        if len(test_data_2) < oneTenthData2:
            test_data_2.append(copy.deepcopy(data[len(data_1) + grab_game + k]))
        else:
            train_data.append(copy.deepcopy(data[len(data_1) + grab_game + k]))

test_data = test_data_1 + test_data_2

train_data = np.array(train_data)
test_data = np.array(test_data)
            
np.random.shuffle(train_data)
np.random.shuffle(test_data)

# train_data = np.array(data[: int(np.floor(l * 0.9))])
# test_data = np.array(data[int(np.floor(l * 0.9)) : ])
print(train_data.shape, test_data.shape)

batch_size = 64
summary_test = SummaryWriter(log_dir = 'nexus-HP-transition-model-report/')

167900
(151110, 2) (16790, 2)


In [None]:
baseline = np.stack(test_data[:, 0])
idx = [4, 9]
baseline_hp = baseline[:, idx]

bl_next_state_reward = np.stack(test_data[:, 1])

mse_baseline = ((baseline_hp - bl_next_state_reward)**2).mean(axis=None)
print(mse_baseline)

In [None]:
check_model = TransModel(len(data[0][0]), len(data[0][1]))

check_model.model.load_state_dict(torch.load('./models/NEXUS_7a4d0dff-6035-49c9-bf82-afa77d8cf1eb.pt'))

evaluation(check_model, test_data, 0)

In [None]:
trans_model = TransModel(len(data[0][0]), len(data[0][1]))

In [None]:
def evaluation(model, data, epoch):
    state_action = torch.from_numpy(np.stack(data[:, 0])).type(FloatTensor)
    next_state_reward = torch.from_numpy(np.stack(data[:, 1])).type(FloatTensor)
    
    model.model.eval()
    criterion = nn.MSELoss(reduction='mean')
    outputs = model.predict_batch(state_action)
#     unNormalizeInputs = next_state_reward[:] * 2000
#     unNormalizeOutputs = outputs[:] * 2000
    mse = criterion(outputs, next_state_reward)
    mse_p1 = criterion(outputs[:, 0], next_state_reward[:, 0])
    mse_p2 = criterion(outputs[:, 1], next_state_reward[:, 1])

    accuracy = torch.sum( torch.sum( torch.eq( outputs, next_state_reward ) )).item()
    accuracy = accuracy / (2 * outputs.size()[0])

    model.model.train()
    
#     summary_test.add_scalar("MSE", float(mse.item()), epoch)
#     summary_test.add_scalars("MSE",{'Player 1 Nexus HP MSE': float(mse_p1.item())}, epoch)
#     summary_test.add_scalars("MSE",{'Player 2 Nexus HP MSE': float(mse_p2.item())}, epoch)
#     summary_test.add_scalars("MSE",{'Baseline Nexus HP MSE': float(mse_baseline)}, epoch)

    #summary_test.add_scalar(tag="Accuracy (Correct / Total)",
    #                        scalar_value=float(accuracy),
    #                        global_step=epoch)
    
#     f = open("nexus-HP-transition-model-report/test_loss.txt", "a+")
#     f.write("loss:" + str(mse.item()) + ", ")
#     f.write("acc:" + str(accuracy) + "\n")
#     if epoch % 1000 == 0:
#         f.write("output:" + str(outputs[0:2]) + "\n")
#         f.write("ground true:" + str(next_state_reward[0:2]) + "\n")
#     f.close()
    return mse.item()

In [None]:
state_action = torch.from_numpy(np.stack(train_data[:, 0])).type(FloatTensor)
next_state_reward = torch.from_numpy(np.stack(train_data[:, 1])).type(FloatTensor)
print(state_action.size(), next_state_reward.size())

for epoch in tqdm.tqdm(range(10000)):
    loss = 0
    s = np.arange(state_action.shape[0])
    np.random.shuffle(s)
    train_x = state_action[s]
    train_y = next_state_reward[s]
    for i in range(state_action.shape[0] // batch_size + 1):
        if (i + 1) * batch_size <= state_action.shape[0]:
            start = i * batch_size
            end = (i + 1) * batch_size
        else:
            start = i * batch_size
            end = state_action.shape[0]
        #print(start, end)
        inputs, ground_true = train_x[start : end, :], train_y[start : end, :]
        outputs = trans_model.predict_batch(inputs)
        loss += trans_model.fit(outputs, ground_true)
#     print(epoch)
    summary_test.add_scalars("MSE",{'Train MSE': float(loss / (state_action.shape[0] // batch_size + 1) )}, epoch)
    evaluation(trans_model, test_data, epoch)
    #break
    if epoch % 1000 == 0 and epoch != 0:
        print(epoch)
        trans_model.save()