In [1]:
import torch
import numpy as np
import torchvision
from torchvision.datasets import MNIST
from torchvision.transforms import ToTensor
import torch.nn as nn
from torch.utils.data.sampler import SubsetRandomSampler
from torch.utils.data.dataloader import DataLoader
from torch.optim import Adam
from tensorboardX import SummaryWriter
import uuid
import tqdm
import os
import collections

use_cuda = torch.cuda.is_available()
FloatTensor = torch.cuda.FloatTensor if use_cuda else torch.FloatTensor
LongTensor = torch.cuda.LongTensor if use_cuda else torch.LongTensor
IntTensor = torch.cuda.IntTensor if use_cuda else torch.IntTensor
ByteTensor = torch.cuda.ByteTensor if use_cuda else torch.ByteTensor
Tensor = FloatTensor
unique_id = str(uuid.uuid4())

In [2]:
def split_data(dataset, val_pct):
    # Determine size of validation set
    n_val = int(val_pct*dataset)
    # Create random permutation of 0 to n-1
    idxs = np.random.permutation(dataset)
    # Pick first n_val indices for validation set
    return idxs[n_val:], idxs[:n_val]

In [3]:
data = torch.load('100000_random_v_random.pt')
np.set_printoptions(suppress=True)
l = len(data)

for i in range(0, len(data)):
    data[i][1] = [data[i][1][4] / 2000 , data[i][1][9] / 2000 ]
    
    data[i][0][0:4] = np.true_divide( data[i][0][0:4], 30) # Normalize P1 buildings
    data[i][0][5:9] = np.true_divide( data[i][0][5:9], 30) # Normalize P2 buildings
    data[i][0][4] = data[i][0][4] / 2000 # Normalize P1 Nexus HP
    data[i][0][9] = data[i][0][9] / 2000 # Normalize P2 Nexus HP
    data[i][0][10] = data[i][0][10] / 1500 # Normalize P1 Minerals
    data[i][0][11:] = np.true_divide(data[i][0][11:], 60) # Normalize both Player's units on the field

print(data[0][0], data[0][1])

np_data = np.array(data)

my_x = np_data[:, 0]
my_y = np_data[:, 1]
tensor_x = torch.stack([torch.Tensor(i) for i in my_x])
tensor_y = torch.stack([torch.Tensor(i) for i in my_y])

tensor_dataset = torch.utils.data.TensorDataset(tensor_x, tensor_y)

train_indices, val_indices = split_data(len(data), val_pct=0.1)

print(len(train_indices), len(val_indices))
print(val_indices[:10])

[0.         0.         0.         0.03333333 1.         0.
 0.         0.         0.03333333 1.         0.15       0.
 0.         0.         0.         0.         0.        ] [1.0, 1.0]
94050 10450
[ 60248  98937  84965  13546  58398 100756  61149  77214  54019  48604]


In [4]:
batch_size = 128
summary_test = SummaryWriter(log_dir = 'nexus-HP-transition-model-report/')


train_sampler = SubsetRandomSampler(train_indices)
train_dl = DataLoader(tensor_dataset, batch_size, sampler=train_sampler)

val_sampler = SubsetRandomSampler(val_indices)
valid_dl = DataLoader(tensor_dataset, batch_size, sampler=val_sampler)

In [5]:
def weights_initialize(module):
    if type(module) == nn.Linear:
        nn.init.xavier_uniform_(module.weight, gain=nn.init.calculate_gain('relu'))
        module.bias.data.fill_(0.01)
        
class TransModel(nn.Module):
    """ Model for DQN """

    def __init__(self, input_len, output_len):
        # super(_TransModel, self).__init__()
        super().__init__()
        
        self.model = nn.Sequential(collections.OrderedDict([
                        ('fc1', nn.Linear(input_len, 512)),
                        ('rl1', nn.ReLU()),
                        ('bn1', nn.BatchNorm1d(512)),
                        ('fc2', nn.Linear(512, 128)),
                        ('rl2', nn.ReLU()),
                        ('bn2', nn.BatchNorm1d(128)),
                        ('fc3', nn.Linear(128, output_len))
                    ]))#.to(device, non_blocking=True)
            
        self.model.apply(weights_initialize)

    def forward(self, xb):
        return self.model(xb)

# class TransModel():
# def __init__(self, input_len, ouput_len, learning_rate = 0.0001):
#     self.model = _TransModel(input_len, ouput_len)

#         if use_cuda:
#             print("Using GPU")
#             self.model = self.model.cuda()
#         else:
#             print("Using CPU")

#     self.steps = 0
#     # self.model = nn.DataParallel(self.model)


# def predict(self, input, steps, learning):
#     output = self.model(input).squeeze(1)
#     #reward, next_state = output[0], output[1:]
#     return output

# def predict_batch(self, input):
#     output = self.model(input)
#     #reward, next_state = output[:, 0], output[:, 1:]
#     return output

# def fit(self, state, target_state):
#     loss = self.loss_fn(state, target_state)

#     self.optimizer.zero_grad()
#     loss.backward()
#     self.optimizer.step()
#     self.steps += 1
#     return loss

def save(model):
    cwd = os.getcwd()
    path = cwd + '/models'
    if not os.path.exists(path):
        os.makedirs(path, exist_ok=True)
    file_path = path + '/NEXUS_' + unique_id + '.pt'
    torch.save(model.state_dict(), file_path)

#################################
def loss_batch(model, loss_func, xb, yb, opt=None, metric=None):
    # Generate predictions
    preds = model(xb)
    # Calculate loss
    loss = loss_func(preds, yb)
                     
    if opt is not None:
        # Compute gradients
        loss.backward()
        # Update parameters             
        opt.step()
        # Reset gradients
        opt.zero_grad()
    
    metric_result = None
    if metric is not None:
        # Compute the metric
        metric_result = metric(preds, yb)
    
    return loss.item(), len(xb), metric_result

def evaluate(model, loss_fn, valid_dl, metric=None):
    model.eval()
    with torch.no_grad():
        # Pass each batch through the model
        results = [loss_batch(model, loss_fn, xb, yb, metric=metric) for xb,yb in valid_dl]
        # Separate losses, counts and metrics
        losses, nums, metrics = zip(*results)
        # Total size of the dataset
        total = np.sum(nums)
        # Avg. loss across batches 
        avg_loss = np.sum(np.multiply(losses, nums)) / total
        avg_metric = None
        if metric is not None:
            # Avg. of metric across batches
            avg_metric = np.sum(np.multiply(metrics, nums)) / total
    model.train()
    return avg_loss, total, avg_metric

def fit(epochs, lr, model, loss_fn, train_dl, valid_dl, metric=None, opt_fn=None):
    losses, metrics = [], []
    
    # Instantiate the optimizer
    if opt_fn is None:
        print('opt_fn is None')
        opt_fn = torch.optim.SGD
        opt = torch.optim.SGD(model.parameters(), lr=lr)
    
    for epoch in tqdm.tqdm(range(epochs)):
        # Training
        for xb, yb in train_dl:
            loss,_,_ = loss_batch(model, loss_fn, xb, yb, opt_fn)

        # Evaluation
        val_loss, total, val_metric = evaluate(model, loss_fn, valid_dl, metric)
        
        # Record the loss & metric
        losses.append(val_loss)
        metrics.append(val_metric)
        
        # Print progress
        if metric is None:
            print('Epoch [{}/{}], Loss: {:.4f}, Accuracy: {:.4f}'
                  .format(epoch+1, epochs, val_loss, val_metric))
        else:
            print('Epoch [{}/{}], Loss: {:.4f}, {}: {:.4f}'
                  .format(epoch+1, epochs, val_loss, metric.__name__, val_metric))
        
        if epoch % 1000 == 0 and epoch != 0:
            save(model)
        
    return losses, metrics

def accuracy(outputs, ground_true):
    preds = torch.sum( torch.eq( outputs, ground_true), dim=1 )
    return torch.sum(preds).item() / len(preds)

In [6]:
test_data = [data[i] for i in val_indices]
val_set = np.array(test_data)

baseline = np.stack(val_set[:, 0])
idx = [4, 9]
baseline_hp = baseline[:, idx]

bl_next_state_reward = np.stack(val_set[:, 1])

mse_baseline = ((baseline_hp - bl_next_state_reward)**2).mean(axis=None)
print(mse_baseline)

0.015275486650717703


In [7]:
trans_model = TransModel(len(data[0][0]), len(data[0][1]))

In [8]:
def get_default_device():
    """Pick GPU if available, else CPU"""
    if torch.cuda.is_available():
        return torch.device('cuda')
    else:
        return torch.device('cpu')

def to_device(data, device):
    """Move tensor(s) to chosen device"""
    if isinstance(data, (list,tuple)):
        return [to_device(x, device) for x in data]
    return data.to(device, non_blocking=True)

class DeviceDataLoader():
    """Wrap a dataloader to move data to a device"""
    def __init__(self, dl, device):
        self.dl = dl
        self.device = device
        
    def __iter__(self):
        """Yield a batch of data after moving it to device"""
        for b in self.dl: 
            yield to_device(b, self.device)

    def __len__(self):
        """Number of batches"""
        return len(self.dl)

In [9]:
device = get_default_device()

train_dl = DeviceDataLoader(train_dl, device)
valid_dl = DeviceDataLoader(valid_dl, device)

In [10]:
def evaluation(model, data, epoch):
    state_action = torch.from_numpy(np.stack(data[:, 0])).type(FloatTensor)
    next_state_reward = torch.from_numpy(np.stack(data[:, 1])).type(FloatTensor)
    
    model.model.eval()
    criterion = nn.MSELoss(reduction='mean')
    outputs = model.predict_batch(state_action)
    mse = criterion(outputs, next_state_reward)
    mse_p1 = criterion(outputs[:, 0], next_state_reward[:, 0])
    mse_p2 = criterion(outputs[:, 1], next_state_reward[:, 1])

    accuracy = torch.sum( torch.sum( torch.eq( outputs, next_state_reward ) )).item()
    accuracy = accuracy / (2 * outputs.size()[0])

    model.model.train()
    
    summary_test.add_scalar("MSE", float(mse.item()), epoch)
    summary_test.add_scalars("MSE",{'Player 1 Nexus HP MSE': float(mse_p1.item())}, epoch)
    summary_test.add_scalars("MSE",{'Player 2 Nexus HP  MSE': float(mse_p2.item())}, epoch)
    summary_test.add_scalars("MSE",{'Baseline Nexus HP MSE': float(mse_baseline)}, epoch)

    #summary_test.add_scalar(tag="Accuracy (Correct / Total)",
    #                        scalar_value=float(accuracy),
    #                        global_step=epoch)
    
    f = open("nexus-HP-transition-model-report/test_loss.txt", "a+")
    f.write("loss:" + str(mse.item()) + ", ")
    f.write("acc:" + str(accuracy) + "\n")
    if epoch % 1000 == 0:
        f.write("output:" + str(outputs[0:2]) + "\n")
        f.write("ground true:" + str(next_state_reward[0:2]) + "\n")
    f.close()
    return mse.item()

In [11]:
# state_action = torch.from_numpy(np.stack(train_data[:, 0])).type(FloatTensor)
# next_state_reward = torch.from_numpy(np.stack(train_data[:, 1])).type(FloatTensor)
# print(state_action.size(), next_state_reward.size())

to_device(trans_model, device)

if use_cuda:
    print("Using GPU")
    trans_model = trans_model.cuda()
else:
    print("Using CPU")

# trans_model = nn.DataParallel(trans_model, device_ids=[0,1,2])

optimizer = Adam(trans_model.parameters(), lr = 0.0001)
loss_fn = nn.MSELoss(reduction='mean')

losses2, metrics2 = fit(10000, 0.0001, trans_model, loss_fn, train_dl, valid_dl, accuracy, optimizer)

# for epoch in tqdm.tqdm(range(10000)):   
#     loss = 0
#     # s = np.arange(state_action.shape[0])
#     # np.random.shuffle(s)
#     # train_x = state_action[s]
#     # train_y = next_state_reward[s]
# #     for i in range(state_action.shape[0] // batch_size + 1):
# #         if (i + 1) * batch_size <= state_action.shape[0]:
# #             start = i * batch_size
# #             end = (i + 1) * batch_size
# #         else:
# #             start = i * batch_size
# #             end = state_action.shape[0]
# #         #print(start, end)
# #         inputs, ground_true = train_x[start : end, :], train_y[start : end, :]
#     for inputs, ground_true in train_dl:
#         print(ground_true)
#         outputs = trans_model.predict_batch(inputs)
#         loss += trans_model.fit(outputs, ground_true)
# #     print(epoch)
#     summary_test.add_scalars("MSE",{'Train MSE': float(loss / (state_action.shape[0] // batch_size + 1) )}, epoch)
#     evaluation(trans_model, test_data, epoch)
#     #break
#     if epoch % 1000 == 0 and epoch != 0:
#         trans_model.save()

  0%|          | 0/10000 [00:00<?, ?it/s]

Using GPU


  0%|          | 1/10000 [00:03<10:05:55,  3.64s/it]

Epoch [1/10000], Loss: 0.0520, accuracy: 0.0000


  0%|          | 2/10000 [00:06<9:43:03,  3.50s/it] 

Epoch [2/10000], Loss: 0.0218, accuracy: 0.0000


  0%|          | 3/10000 [00:10<9:27:46,  3.41s/it]

Epoch [3/10000], Loss: 0.0133, accuracy: 0.0000


  0%|          | 4/10000 [00:13<9:16:27,  3.34s/it]

Epoch [4/10000], Loss: 0.0105, accuracy: 0.0000


  0%|          | 5/10000 [00:16<9:08:36,  3.29s/it]

Epoch [5/10000], Loss: 0.0083, accuracy: 0.0000


  0%|          | 6/10000 [00:19<8:58:19,  3.23s/it]

Epoch [6/10000], Loss: 0.0070, accuracy: 0.0000


  0%|          | 7/10000 [00:22<8:50:58,  3.19s/it]

Epoch [7/10000], Loss: 0.0056, accuracy: 0.0000


  0%|          | 8/10000 [00:25<8:44:21,  3.15s/it]

Epoch [8/10000], Loss: 0.0048, accuracy: 0.0000


  0%|          | 9/10000 [00:28<8:40:00,  3.12s/it]

Epoch [9/10000], Loss: 0.0042, accuracy: 0.0000


  0%|          | 10/10000 [00:31<8:38:04,  3.11s/it]

Epoch [10/10000], Loss: 0.0037, accuracy: 0.0000


  0%|          | 11/10000 [00:34<8:39:45,  3.12s/it]

Epoch [11/10000], Loss: 0.0034, accuracy: 0.0000


  0%|          | 12/10000 [00:38<8:38:44,  3.12s/it]

Epoch [12/10000], Loss: 0.0031, accuracy: 0.0001


  0%|          | 13/10000 [00:41<8:39:07,  3.12s/it]

Epoch [13/10000], Loss: 0.0026, accuracy: 0.0000


  0%|          | 14/10000 [00:44<8:38:09,  3.11s/it]

Epoch [14/10000], Loss: 0.0026, accuracy: 0.0000


  0%|          | 15/10000 [00:47<8:38:59,  3.12s/it]

Epoch [15/10000], Loss: 0.0023, accuracy: 0.0000


  0%|          | 16/10000 [00:50<8:39:22,  3.12s/it]

Epoch [16/10000], Loss: 0.0022, accuracy: 0.0000


  0%|          | 17/10000 [00:53<8:38:20,  3.12s/it]

Epoch [17/10000], Loss: 0.0021, accuracy: 0.0000


  0%|          | 18/10000 [00:56<8:37:53,  3.11s/it]

Epoch [18/10000], Loss: 0.0022, accuracy: 0.0000


  0%|          | 19/10000 [00:59<8:35:44,  3.10s/it]

Epoch [19/10000], Loss: 0.0021, accuracy: 0.0000


  0%|          | 20/10000 [01:02<8:34:21,  3.09s/it]

Epoch [20/10000], Loss: 0.0018, accuracy: 0.0000


  0%|          | 21/10000 [01:05<8:33:12,  3.09s/it]

Epoch [21/10000], Loss: 0.0018, accuracy: 0.0000


  0%|          | 22/10000 [01:09<8:33:28,  3.09s/it]

Epoch [22/10000], Loss: 0.0018, accuracy: 0.0000


  0%|          | 23/10000 [01:12<8:33:43,  3.09s/it]

Epoch [23/10000], Loss: 0.0016, accuracy: 0.0000


  0%|          | 24/10000 [01:15<8:33:54,  3.09s/it]

Epoch [24/10000], Loss: 0.0018, accuracy: 0.0000


  0%|          | 25/10000 [01:18<8:32:44,  3.08s/it]

Epoch [25/10000], Loss: 0.0016, accuracy: 0.0000


  0%|          | 26/10000 [01:21<8:41:59,  3.14s/it]

Epoch [26/10000], Loss: 0.0016, accuracy: 0.0000


  0%|          | 27/10000 [01:24<8:39:58,  3.13s/it]

Epoch [27/10000], Loss: 0.0015, accuracy: 0.0000


  0%|          | 28/10000 [01:27<8:45:31,  3.16s/it]

Epoch [28/10000], Loss: 0.0014, accuracy: 0.0001


  0%|          | 29/10000 [01:30<8:40:48,  3.13s/it]

Epoch [29/10000], Loss: 0.0014, accuracy: 0.0000


  0%|          | 30/10000 [01:34<8:37:28,  3.11s/it]

Epoch [30/10000], Loss: 0.0016, accuracy: 0.0000


  0%|          | 31/10000 [01:37<8:45:45,  3.16s/it]

Epoch [31/10000], Loss: 0.0014, accuracy: 0.0000


  0%|          | 32/10000 [01:40<8:41:23,  3.14s/it]

Epoch [32/10000], Loss: 0.0014, accuracy: 0.0000


  0%|          | 33/10000 [01:43<8:38:17,  3.12s/it]

Epoch [33/10000], Loss: 0.0013, accuracy: 0.0000


  0%|          | 34/10000 [01:46<8:38:24,  3.12s/it]

Epoch [34/10000], Loss: 0.0013, accuracy: 0.0000


  0%|          | 35/10000 [01:49<8:35:52,  3.11s/it]

Epoch [35/10000], Loss: 0.0013, accuracy: 0.0001


  0%|          | 36/10000 [01:52<8:34:17,  3.10s/it]

Epoch [36/10000], Loss: 0.0013, accuracy: 0.0000


KeyboardInterrupt: 