In [1]:
%matplotlib inline
import matplotlib
import matplotlib.cm as cm
import matplotlib.pyplot as plt
import numpy as np

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import *

In [21]:
"""
Even the simple XOR system has 8 dimensions of evolutions:
- This model does not always learn
- Highly depends on the initialization
- The minibatch is not helping here (not enough inputs: no estimation of gradient)
"""

class MaximumModel(nn.Module):
    def __init__(self):
        super().__init__()
        self.model = nn.Sequential(nn.Linear(2, 2), nn.ReLU(), nn.Linear(2, 1))
    
    def forward(self, x):
        x = self.model(x)
        return x

def train(xs, ys):
    data_set = TensorDataset(xs, ys)
    data_loader = DataLoader(data_set, batch_size=100, shuffle=True)

    model = MaximumModel()
    criterion = nn.MSELoss()
    optimizer = optim.Adam(model.parameters(), lr=1e-1)

    for epoch in range(1000):
        cumulative_loss = 0.
        for inputs, expected in data_loader:
            optimizer.zero_grad()
            got = model(inputs)
            loss = criterion(got, expected.unsqueeze(-1))
            loss.backward()
            optimizer.step()
            cumulative_loss += loss.item()
        if epoch % 100 == 0:
            print(cumulative_loss)
    return model

n = 1000
xs = np.random.uniform(-5, 5, size=2*n).reshape((n, 2))
ys = np.apply_along_axis(lambda x: max(x), axis=-1, arr=xs)

xs = torch.tensor(xs, dtype=torch.float32, requires_grad=False)
ys = torch.tensor(ys, dtype=torch.float32, requires_grad=False)
model = train(xs, ys)

79.41660404205322
0.00031899770328891464
0.00012811376905119687
0.00011882758644787828
0.019658473684103228
0.8433895418420434
0.025898572814185172
0.003969318553572521
7.839351525262828e-06
0.0025544871932652313


In [22]:
def predict(model, xs):
    xs = torch.tensor(xs, dtype=torch.float32, requires_grad=False)
    ys = model(xs)
    return ys.detach().numpy()

# Works great for inputs it has seen
print(predict(model, [[1.5, -1.5], [-1.5, -2.5], [2, 3]]))

# Does not work that great outside of the training interval
print(predict(model, [[-10, -11], [10, 11], [10, -10]]))

[[ 1.5691013]
 [-1.4501865]
 [ 3.1062284]]
[[-4.6807637]
 [11.306477 ]
 [10.024773 ]]


In [25]:
"""
Same idea, but this time the classifier will just return the index of the maximum value
"""

class MaximumIndexModel(nn.Module):
    def __init__(self):
        super().__init__()
        self.model = nn.Sequential(nn.Linear(2, 2), nn.ReLU(), nn.Linear(2, 2))
        self.softmax = nn.Softmax(dim=-1)
    
    def forward(self, x, with_softmax=True):
        x = self.model(x)
        if with_softmax:
            x = self.softmax(x)
        return x

def train(xs, ys):
    data_set = TensorDataset(xs, ys)
    data_loader = DataLoader(data_set, batch_size=100, shuffle=True)

    model = MaximumIndexModel()
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=1e-1)

    for epoch in range(1000):
        cumulative_loss = 0.
        for inputs, expected in data_loader:
            optimizer.zero_grad()
            got = model(inputs, with_softmax=False)
            loss = criterion(got, expected)
            loss.backward()
            optimizer.step()
            cumulative_loss += loss.item()
        if epoch % 100 == 0:
            print(cumulative_loss)
    return model

n = 1000
xs = np.random.uniform(-5, 5, size=2*n).reshape((n, 2))
ys = np.apply_along_axis(lambda x: 0 if x[0] >= x[1] else 1, axis=-1, arr=xs)

xs = torch.tensor(xs, dtype=torch.float32, requires_grad=False)
ys = torch.tensor(ys, dtype=torch.long, requires_grad=False)
model = train(xs, ys)

3.232817992568016
0.2702178319450468
0.18278778492094716
0.15379493188811466
0.11609094208688475
0.18232797240489163
0.06846127461176366
0.05554319115799444
0.42604836699320003
0.06338563692406751


In [38]:
def predict(model, inputs):
    xs = torch.tensor(inputs, dtype=torch.float32, requires_grad=False)
    ys = model(xs)
    ys = torch.argmax(xs, dim=-1)
    return [inputs[i][j] for i, j in enumerate(ys)]

# Now it works great for any kind of inputs
print(predict(model, [[1.5, -1.5], [-1.5, -2.5], [2, 3]]))
print(predict(model, [[-10, -11], [10, 11], [10, -10]]))

[1.5, -1.5, 3]
[-10, 11, 10]


In [74]:
"""
Same idea, but this time, we use a sequence of any kind of length (using LSTM)
"""

class MaximumModelRange(nn.Module):
    def __init__(self):
        super().__init__()
        self.hidden_size = 2
        self.lstm = nn.LSTM(input_size=1, hidden_size=self.hidden_size)
        self.fc = nn.Linear(self.hidden_size, 1)
    
    def forward(self, x):
        x = x.unsqueeze(dim=2)
        x = torch.transpose(x, dim0=0, dim1=1)
        seq_len, batch_size, input_size = x.shape
        # print(x.shape)
        
        h0 = torch.zeros(1, batch_size, self.hidden_size)
        c0 = torch.zeros(1, batch_size, self.hidden_size)
        output, (hn, cn) = self.lstm(x, (h0, c0))
        # print(output.shape)
        # print(output[-1].shape)
        
        ys = self.fc(output[-1]) # Take the last output, and feed it to the linear layer
        ys = ys.squeeze(dim=-1)  # Get rid of the last value of dimension 1 (regression)
        return ys

# Can be used like this
#   xs = torch.tensor([[1, 2, 3, 4], [2, 1, 4, 3], [-11, 11, 12, -12]], dtype = torch.float32, requires_grad=False)  
#   model = MaximumModelRange()
#   model(xs)
# Will output:
#   tensor([0.1261, 0.1411, 0.2904], grad_fn=<SqueezeBackward1>)

def train_lstm(xs, ys):
    data_set = TensorDataset(xs, ys)
    data_loader = DataLoader(data_set, batch_size=100, shuffle=True)

    model = MaximumModelRange()
    criterion = nn.MSELoss()
    optimizer = optim.Adam(model.parameters(), lr=1e-3)

    for epoch in range(2000):
        cumulative_loss = 0.
        for inputs, expected in data_loader:
            optimizer.zero_grad()
            got = model(inputs)
            loss = criterion(got, expected)
            loss.backward()
            optimizer.step()
            cumulative_loss += loss.item()
        if epoch % 100 == 0:
            print(cumulative_loss)
    return model


n = 1000
xs = np.random.uniform(-5, 5, size=4*n).reshape((n, 4))
ys = np.apply_along_axis(lambda x: max(x), axis=-1, arr=xs)

xs = torch.tensor(xs, dtype=torch.float32, requires_grad=False)
ys = torch.tensor(ys, dtype=torch.float32, requires_grad=False)
model = train_lstm(xs, ys)

126.64274597167969
19.927125096321106
7.24814248085022
2.0433948636054993
0.9851384460926056
0.5886765159666538
0.41289015114307404
0.30657883174717426
0.24300667084753513
0.19793241377919912
0.168237435631454
0.14474424347281456
0.1311894878745079
0.11536591360345483
0.10669053392484784
0.09784344397485256
0.0909389122389257
0.08726467099040747
0.08494188683107495
0.07780324993655086


In [75]:
def predict_lstm(model, xs):
    xs = torch.tensor(xs, dtype=torch.float32, requires_grad=False)
    ys = model(xs)
    return ys.detach().numpy()

# Works "okay" for inputs that fall into the range
print(predict_lstm(model, [[1.5, -1.5, 3], [-1.5, -2.5, -3.5], [2, 5, 3]]))

[ 2.9640284  -0.84462476  5.0479846 ]
