In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader, TensorDataset
import itertools
import numpy as np
import math
import os
os.environ['CUDA_LAUNCH_BLOCKING'] = '1'

In [2]:
if torch.cuda.is_available():
    device = torch.device("cuda")
    print("Using GPU:", torch.cuda.get_device_name(0))
else:
    device = torch.device("cpu")
    print("Using CPU")

Using CPU


In [29]:
class MatrixDataset(Dataset):
    def __init__(self, matrices, labels):
        self.matrices = matrices
        self.labels = labels

    def __len__(self):
        return len(self.matrices)

    def __getitem__(self, idx):
        matrix = self.matrices[idx]
        label = torch.tensor(self.labels[idx], dtype=torch.float32)
        return matrix, label
    
class Prey_Net(nn.Module):
    def __init__(self):
        super(Prey_Net, self).__init__()
        self.flatten = nn.Flatten()
        self.fc1 = nn.Linear(9, 64)
        self.fc2 = nn.Linear(64, 64)
        self.fc3 = nn.Linear(64, 5)

    def forward(self, x):
        x = self.flatten(x)
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        x = self.fc3(x)
        return x
    
class Predator_Net(nn.Module):
    def __init__(self):
        super(Predator_Net, self).__init__()
        self.flatten = nn.Flatten()
        self.fc1 = nn.Linear(25, 128)
        self.fc2 = nn.Linear(128, 64)
        self.fc3 = nn.Linear(64, 64)
        self.fc4 = nn.Linear(64, 5)

    def forward(self, x):
        x = self.flatten(x)
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        x = torch.relu(self.fc3(x))
        x = self.fc4(x)
        return x


def find_closest_cell(t, point, n):
    """
    t: torch tensor filled with 1s and 2s
    point: tuple (x,y) of the currently observed point
    n: number to search for (1 or 2)
    """
    closest_dist = math.inf
    closest_cell = None
    
    cells = [(i, j) for i in range(t.size()[0]) for j in range(t.size()[1]) if t[i][j] == n]
    
    for cell_point in cells:
        dist = abs(point[0] - cell_point[0]) + abs(point[1] - cell_point[1]) 
        if dist < closest_dist:
            closest_dist = dist
            closest_cell = cell_point
                    
    if closest_cell is None:
        return math.inf
    else:
        return closest_dist


def find_best_move(t, agent):
# Moves are enumerated as # Stand, Top, Right, Bottom, Left
    n = 1 if agent == 2 else 2
    center = (1,1) if agent == 1 else (2,2)

    if agent == 1:
        best_distance = -math.inf
    else:
        best_distance = math.inf

    best_moves = []
    for move in [ (center[0]-1, center[1], 1),  (center[0], center[1]+1, 2), (center[0]+1, center[1], 3), (center[0], center[1]-1, 4)]:
        if (agent == 1 and t[move[0]][move[1]] == 0) or (agent == 2 and t[move[0]][move[1]] != 2):
            new_distance = find_closest_cell(t, move[:2], n)
            
            if (agent == 1 and new_distance > best_distance) or (agent == 2 and new_distance < best_distance):
                best_distance = new_distance
                best_moves = [move[2]]
            elif new_distance == best_distance:
                best_moves.append(move[2])
      

    if len(best_moves) == 0:
        return [1, 0, 0, 0, 0]
      
    prob = 1/len(best_moves)
    prob_vec = [0]*5

    for i in best_moves:
        prob_vec[i] = prob

    return prob_vec


def infer(net, t):
    net.eval()

    with torch.no_grad():  # Disable gradient calculation to save memory and computation
        logits = net(t.unsqueeze(0).to(device))
        probabilities = torch.softmax(logits, dim=1)

        return [el.item() for el in probabilities[0]] 

# Prey training

In [4]:
# Create a list of all possible combinations of 0, 1, and 2
vals = [0, 1, 2]
combinations = list(itertools.product(vals, repeat=9))

# Filter out the combinations where the middle element is not 0
filtered_combinations = list(filter(lambda x: x[4] == 1, combinations))

# Reshape the filtered combinations to tensors of shape (3x3)
prey_tensors = list([torch.tensor(combination, dtype=torch.float32).reshape(3, 3) for combination in filtered_combinations])
prey_labels = [find_best_move(t, 1) for t in prey_tensors]  

In [5]:
batch_size = 32
dataset = MatrixDataset(prey_tensors, prey_labels)
trainloader = DataLoader(dataset, batch_size=batch_size, shuffle=True)

prey_net = Prey_Net().to(device)

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(prey_net.parameters(), lr=0.001)

num_epochs = 50

for epoch in range(num_epochs):
    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        inputs, labels = data
        inputs, labels = inputs.to(device), labels.to(device)

        optimizer.zero_grad()

        outputs = prey_net(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

    print(f"Epoch: {epoch + 1}, Loss: {running_loss / len(trainloader)}")

print("Finished training")

Epoch: 1, Loss: 0.8511202037334442
Epoch: 2, Loss: 0.32008368617773636
Epoch: 3, Loss: 0.28207455202937126
Epoch: 4, Loss: 0.27137385903421374
Epoch: 5, Loss: 0.2588146826234257
Epoch: 6, Loss: 0.25290158799356666
Epoch: 7, Loss: 0.24602344347891913
Epoch: 8, Loss: 0.2416641013335111
Epoch: 9, Loss: 0.23958488333138447
Epoch: 10, Loss: 0.23827663995325565
Epoch: 11, Loss: 0.23322225051640433
Epoch: 12, Loss: 0.2353466049885576
Epoch: 13, Loss: 0.23176268705175918
Epoch: 14, Loss: 0.23394610342180844
Epoch: 15, Loss: 0.232061336759887
Epoch: 16, Loss: 0.22589706044400548
Epoch: 17, Loss: 0.22563695394009062
Epoch: 18, Loss: 0.2285062879060047
Epoch: 19, Loss: 0.22348176635318187
Epoch: 20, Loss: 0.22256433415007723
Epoch: 21, Loss: 0.22280205260889913
Epoch: 22, Loss: 0.22287565176218743
Epoch: 23, Loss: 0.22205264640302913
Epoch: 24, Loss: 0.22104095187429815
Epoch: 25, Loss: 0.22055320069191522
Epoch: 26, Loss: 0.22619200620836424
Epoch: 27, Loss: 0.2252142978741706
Epoch: 28, Loss: 0

In [7]:
# Example of inference
t = prey_tensors[123]
print(t)
print()
infer(prey_net, t)

tensor([[0., 0., 0.],
        [1., 1., 1.],
        [1., 2., 0.]])



[1.2701982541329926e-06,
 0.9999163150787354,
 2.3241022063302808e-05,
 2.8161408760496442e-09,
 5.915996734984219e-05]

# Predator training

In [None]:
# # Create a list of all possible combinations of 0, 1, and 2
# vals = [0, 1, 2]
# combinations = list(itertools.product(vals, repeat=25))

# # Filter out the combinations where the middle element is not 0
# filtered_combinations = list(filter(lambda x: x[12] == 2, combinations))

# # Reshape the filtered combinations to tensors of shape (5x5)
# prey_tensors = list([torch.tensor(combination, dtype=torch.float32).reshape(5, 5) for combination in filtered_combinations])
# # prey_labels = [find_best_move(t, 1) for t in prey_tensors]  

In [39]:
num_tensors = 10000
tensor_shape = (5, 5)
probs = torch.tensor([0.9, 0.08, 0.02], dtype=torch.float32)

predator_tensors = []

for _ in range(num_tensors):
    # Generate a 5x5 tensor with elements drawn from the multinomial distribution
    random_tensor = torch.multinomial(probs, tensor_shape[0] * tensor_shape[1], replacement=True).reshape(tensor_shape)

    # Convert the generated tensor to torch.float32
    random_tensor = random_tensor.to(torch.float32)

    # Set the middle element to 2
    random_tensor[tensor_shape[0] // 2, tensor_shape[1] // 2] = 2

    # Append the generated tensor to the list
    predator_tensors.append(random_tensor)

predator_labels = [find_best_move(t, 2) for t in predator_tensors] 

In [40]:
batch_size = 32
dataset = MatrixDataset(predator_tensors, predator_labels)
trainloader = DataLoader(dataset, batch_size=batch_size, shuffle=True)

predator_net = Predator_Net().to(device)

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(predator_net.parameters(), lr=0.001)

num_epochs = 100

for epoch in range(num_epochs):
    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        inputs, labels = data
        inputs, labels = inputs.to(device), labels.to(device)

        optimizer.zero_grad()

        outputs = predator_net(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

    print(f"Epoch: {epoch + 1}, Loss: {running_loss / len(trainloader)}")

print("Finished training")

Epoch: 1, Loss: 1.206555098390427
Epoch: 2, Loss: 0.9422098729557122
Epoch: 3, Loss: 0.7827305321495373
Epoch: 4, Loss: 0.7084587223994465
Epoch: 5, Loss: 0.6758047210903594
Epoch: 6, Loss: 0.6594369351483
Epoch: 7, Loss: 0.6484458401751595
Epoch: 8, Loss: 0.6403937352160676
Epoch: 9, Loss: 0.6342903632706347
Epoch: 10, Loss: 0.6322273817686989
Epoch: 11, Loss: 0.6274177721514108
Epoch: 12, Loss: 0.6236235407975536
Epoch: 13, Loss: 0.6200119822550887
Epoch: 14, Loss: 0.6193468249834384
Epoch: 15, Loss: 0.6164592199813063
Epoch: 16, Loss: 0.6127759946611362
Epoch: 17, Loss: 0.6132153153609925
Epoch: 18, Loss: 0.6113727307929018
Epoch: 19, Loss: 0.6095307493171753
Epoch: 20, Loss: 0.6091414669070381
Epoch: 21, Loss: 0.6063179397544922
Epoch: 22, Loss: 0.6055100421174265
Epoch: 23, Loss: 0.6033164357986694
Epoch: 24, Loss: 0.6022062848170344
Epoch: 25, Loss: 0.6038723571803242
Epoch: 26, Loss: 0.6021661540380301
Epoch: 27, Loss: 0.6010659440828208
Epoch: 28, Loss: 0.5999455869007415
Epoch

In [44]:
find_best_move(predator_tensors[123], 2)

[0, 0, 0.3333333333333333, 0.3333333333333333, 0.3333333333333333]

In [43]:
# Example of inference
t = predator_tensors[123]
print(t)
print()
infer(predator_net, t)

tensor([[0., 1., 0., 0., 0.],
        [0., 0., 0., 0., 0.],
        [0., 0., 2., 0., 1.],
        [0., 1., 0., 0., 1.],
        [0., 0., 0., 0., 0.]])



[6.18479741162567e-15,
 1.5383237041532993e-05,
 0.4776741564273834,
 0.23342560231685638,
 0.28888484835624695]