In [None]:
import numpy as np
import torch
import torch.nn as nn
from torch.utils.data.sampler import SubsetRandomSampler
from torch.utils.data.dataloader import DataLoader
from torch.optim import Adam
from tensorboardX import SummaryWriter
import tqdm
import os
import uuid
import collections

use_cuda = torch.cuda.is_available()
FloatTensor = torch.cuda.FloatTensor if use_cuda else torch.FloatTensor
LongTensor = torch.cuda.LongTensor if use_cuda else torch.LongTensor
IntTensor = torch.cuda.IntTensor if use_cuda else torch.IntTensor
ByteTensor = torch.cuda.ByteTensor if use_cuda else torch.ByteTensor
Tensor = FloatTensor

unique_id = str(uuid.uuid4())

In [None]:
def weights_initialize(module):
    if type(module) == nn.Linear:
        nn.init.xavier_uniform_(module.weight, gain=nn.init.calculate_gain('relu'))
        module.bias.data.fill_(0.01)
        
class _TransModel(nn.Module):
    """ Model for DQN """

    def __init__(self, input_len, output_len):
        super(_TransModel, self).__init__()
        
        self.model = nn.Sequential(collections.OrderedDict([
                ('fc1', nn.Linear(input_len, 512)),
                ('rl1', nn.ReLU()),
                ('bn1', nn.BatchNorm1d(512)),
                ('fc2', nn.Linear(512, 128)),
                ('rl2', nn.ReLU()),
                ('bn2', nn.BatchNorm1d(128)),
                ('fc3', nn.Linear(128, output_len))
            ]))#.to(device, non_blocking=True)
            
        self.model.apply(weights_initialize)
        
    def forward(self, input):
        return self.model(input)

class TransModel():
    def __init__(self, input_len, ouput_len, learning_rate = 0.0001):
        self.model = _TransModel(input_len, ouput_len)
        
        self.steps = 0
        self.model = nn.DataParallel(self.model, device_ids=[5])
        self.optimizer = Adam(self.model.parameters(), lr = learning_rate)
        self.loss_fn = nn.MSELoss(reduction='mean')
        
    def predict(self, input, steps, learning):
        
        output = self.model(input).squeeze(1)
        #reward, next_state = output[0], output[1:]

        return output

    def predict_batch(self, input):
        output = self.model(input)
        #reward, next_state = output[:, 0], output[:, 1:]
        return output

    def fit(self, state, target_state):
        loss = self.loss_fn(state, target_state)

        self.optimizer.zero_grad()
        loss.backward()
        self.optimizer.step()
        self.steps += 1
        return loss
    
    def save(self):
        cwd = os.getcwd()
        path = cwd + '/models'
        if not os.path.exists(path):
            os.makedirs(path, exist_ok=True)
        file_path = path + '/NEXUS_' + unique_id + '.pt'
        torch.save(self.model.state_dict(), file_path)

In [None]:
data_1 = torch.load('100000_random_v_random.pt')

data_2 = torch.load('60000_sadq_v_random.pt')
print(len(data_1[0][0]))
print(len(data_2[0][0]))
print(len(data_1[0][1]))
print(len(data_2[0][1]))

data_1_games = []
data_2_games = []

# for i in range(len(data_1)):
#     if (((data[i][0][a_b_m]) + (data[i][0][a_b_v]) + (data[i][0][a_b_c]) + (data[i][0][a_b_p]) + 
#          (data[i][0][e_b_m]) + (data[i][0][e_b_v]) + (data[i][0][e_b_c]) + (data[i][0][e_b_p])) < 
#         ((data[i-1][0][a_b_m]) + (data[i-1][0][a_b_v]) + (data[i-1][0][a_b_c]) + (data[i-1][0][a_b_p]) + 
#          (data[i-1][0][e_b_m]) + (data[i-1][0][e_b_v]) + (data[i-1][0][e_b_c]) + (data[i-1][0][e_b_p]))):
#         data_1_games.append(i)

# data_2_games.append(len(data_1))
# for i in range(len(data_1)):
#     if (((data[i][0][a_b_m]) + (data[i][0][a_b_v]) + (data[i][0][a_b_c]) + (data[i][0][a_b_p]) + 
#          (data[i][0][e_b_m]) + (data[i][0][e_b_v]) + (data[i][0][e_b_c]) + (data[i][0][e_b_p])) < 
#         ((data[i-1][0][a_b_m]) + (data[i-1][0][a_b_v]) + (data[i-1][0][a_b_c]) + (data[i-1][0][a_b_p]) + 
#          (data[i-1][0][e_b_m]) + (data[i-1][0][e_b_v]) + (data[i-1][0][e_b_c]) + (data[i-1][0][e_b_p]))):
#         data_2_games.append(len(data_1) + i)

data = data_1 + data_2

In [None]:
def split_data(dataset, val_pct):
    # Determine size of validation set
    n_val = int(val_pct*dataset)
    # Create random permutation of 0 to n-1
    idxs = np.random.permutation(dataset)
    # Pick first n_val indices for validation set
    return idxs[n_val:], idxs[:n_val]

In [None]:
np.set_printoptions(suppress=True)
l = len(data)

for i in range(0, len(data)):
    data[i][1] = np.true_divide(data[i][1][12:], 60)    # Ground truth split into only next-state's units
    
    data[i][0][0:4] = np.true_divide( data[i][0][0:4], 30) # Normalize P1 buildings
    data[i][0][5:9] = np.true_divide( data[i][0][5:9], 30) # Normalize P2 buildings
    data[i][0][4] = data[i][0][4] / 2000 # Normalize P1 Nexus HP
    data[i][0][9] = data[i][0][9] / 2000 # Normalize P2 Nexus HP
    data[i][0][10] = data[i][0][10] / 1500 # Normalize P1 Minerals
    data[i][0][11:] = np.true_divide(data[i][0][11:], 60) # Normalize both Player's units on the field
    
print(data[0][0], data[0][1][0:3], data[0][1][3:6])
print(data[1][0], data[1][1])
print(data[2][0], data[2][1])

np_data = np.array(data)

my_x = np_data[:, 0]
my_y = np_data[:, 1]
tensor_x = torch.stack([torch.Tensor(i) for i in my_x])
tensor_y = torch.stack([torch.Tensor(i) for i in my_y])

tensor_dataset = torch.utils.data.TensorDataset(tensor_x, tensor_y)

train_indices, val_indices = split_data(len(data), val_pct=0.1)

print(len(train_indices), len(val_indices))
print(val_indices[:10])

In [None]:
batch_size = 64
summary_test = SummaryWriter(log_dir = 'nexus-HP-transition-model-report/')


train_sampler = SubsetRandomSampler(train_indices)
train_dl = DataLoader(tensor_dataset, batch_size, sampler=train_sampler)

val_sampler = SubsetRandomSampler(val_indices)
valid_dl = DataLoader(tensor_dataset, batch_size, sampler=val_sampler)

In [None]:
test_data = [data[i] for i in val_indices]
val_set = np.array(test_data)

baseline = np.stack(val_set[:, 0])
baseline_hp = baseline[:, 12:]

bl_next_state_reward = np.stack(val_set[:, 1])

mse_baseline = ((baseline_hp - bl_next_state_reward)**2).mean(axis=None)
print(mse_baseline)

In [None]:
def get_default_device():
    """Pick GPU if available, else CPU"""
    if torch.cuda.is_available():
        return torch.device('cuda')
    else:
        return torch.device('cpu')

def to_device(data, device):
    """Move tensor(s) to chosen device"""
    if isinstance(data, (list,tuple)):
        return [to_device(x, device) for x in data]
    return data.to(device, non_blocking=True)

class DeviceDataLoader():
    """Wrap a dataloader to move data to a device"""
    def __init__(self, dl, device):
        self.dl = dl
        self.device = device
        
    def __iter__(self):
        """Yield a batch of data after moving it to device"""
        for b in self.dl: 
            yield to_device(b, self.device)

    def __len__(self):
        """Number of batches"""
        return len(self.dl)

In [None]:
device = get_default_device()
print(device)

train_dl = DeviceDataLoader(train_dl, device)
valid_dl = DeviceDataLoader(valid_dl, device)

In [None]:
trans_model = TransModel(len(data[0][0]), len(data[0][1]))

to_device(trans_model.model, device)

if use_cuda:
    print("Using GPU")
    trans_model.model = trans_model.model.cuda()
else:
    print("Using CPU")

In [None]:
def evaluation(model, state_action, next_state_reward):
    
    model.model.eval()
    criterion = nn.MSELoss(reduction='mean')
    outputs = model.predict_batch(state_action)
    mse = criterion(outputs, next_state_reward)
    mse_p1 = criterion(outputs[:, 0], next_state_reward[:, 0])
    mse_p2 = criterion(outputs[:, 1], next_state_reward[:, 1])
    
    accuracy = torch.sum( torch.sum( torch.eq( outputs, next_state_reward ) )).item()
    accuracy = accuracy / (2 * outputs.size()[0])

    model.model.train()
    
    return mse.item(), mse_p1.item(), mse_p2.item(), accuracy, len(xb)

def log_inputs(mse, mse_p1, mse_p2, accuracy, mse_baseline, output, ground_truth, epoch):
    summary_test.add_scalar("MSE", float(mse), epoch)
    summary_test.add_scalars("MSE",{'Player 1 Nexus HP MSE': float(mse_p1)}, epoch)
    summary_test.add_scalars("MSE",{'Player 2 Nexus HP  MSE': float(mse_p2)}, epoch)
    summary_test.add_scalars("MSE",{'Baseline Nexus HP MSE': float(mse_baseline)}, epoch)

    #summary_test.add_scalar(tag="Accuracy (Correct / Total)",
    #                        scalar_value=float(accuracy),
    #                        global_step=epoch)
    
    f = open("nexus-HP-transition-model-report/test_loss.txt", "a+")
    f.write("loss:" + str(mse) + ", ")
    f.write("acc:" + str(accuracy) + "\n")
    if epoch % 1000 == 0:
        f.write("output:" + str(outputs[0:2]) + "\n")
        f.write("ground true:" + str(ground_truth[0:2]) + "\n")
    f.close()

In [None]:
for xb, yb in train_dl:
    print(xb.size(), yb.size())
    break
    
for epoch in tqdm.tqdm(range(10000)):
    loss = 0
    
    for xb, yb in train_dl:
        outputs = trans_model.predict_batch(xb)
        loss += trans_model.fit(outputs, yb)

    summary_test.add_scalars("MSE",{'Train MSE': float(loss / (len(train_indices) // batch_size + 1) )}, epoch)
    results = [evaluation(trans_model, xb, yb) for xb, yb in valid_dl]
    
    mse, mse_p1, mse_p2, accuracy, nums = zip(*results)
    total = np.sum(nums)
    avg_loss = np.sum(np.multiply(mse, nums)) / total
    avg_p1_loss = np.sum(np.multiply(mse_p1, nums)) / total
    avg_p2_loss = np.sum(np.multiply(mse_p2, nums)) / total
    avg_metric = np.sum(np.multiply(accuracy, nums)) / total

    log_inputs(avg_loss, avg_p1_loss, avg_p2_loss, avg_metric, mse_baseline, 
                outputs, yb, epoch)
    
    if epoch % 1000 == 0 and epoch != 0:
        print(epoch)
        trans_model.save()