In [None]:
import torch
import torch.nn.functional as F
import matplotlib.pyplot as plt
import numpy as np
import pickle

In [None]:
with open('../data/HeadsOrTails_data.pkl', 'rb') as f:
    data = pickle.load(f)

data_tr = data['data_tr']
data_te = data['data_te']
x_std = data['x_std']
x_m = data['x_m']
y_std = data['y_std']
y_m = data['y_m']

In [None]:
print(data_tr.shape, data_te.shape)

In [None]:
train_loader = torch.utils.data.DataLoader(data['data_tr'], batch_size=64, shuffle=True)
test_loader = torch.utils.data.DataLoader(data['data_te'], batch_size=100000, shuffle=True)

# Choose device

In [None]:
if torch.cuda.is_available():
    device = torch.device('cuda')
    print('cuda available')
    # mlflow.log_param('device', torch.cuda.get_device_name(device))
else:
    device = torch.device('cpu')
    print('cuda not available')
    # mlflow.log_param('device', 'cpu')

# Define model

In [None]:
model = torch.nn.Sequential(
    torch.nn.Linear(1, 32),
    torch.nn.LeakyReLU(),
    torch.nn.Linear(32, 32),
    torch.nn.LeakyReLU(),
    torch.nn.Linear(32,1)
).to(device)

print(model)

In [None]:
n_params = sum(p.numel() for p in model.parameters())
print('Total nr of parameters:', n_params)

# Evaluate model before training

In [None]:
model.eval()
for batch in test_loader:
    batch = batch.clone()
    batch.to(device=device)
    x = batch[:, 0].unsqueeze(-1).to(device=device)
    print(x.shape)
    y = batch[:, 1].unsqueeze(-1).to(device=device)
    pred = model(x)
    print('Shapes:', pred.shape, y.shape)

plt.scatter(x.cpu().detach().numpy(), y.cpu().detach().numpy(), s=1, label='Real')
plt.scatter(x.cpu().detach().numpy(), pred.cpu().detach().numpy(), s=1, label='Predicted')
# plt.gca().set_aspect('equal')
plt.xlabel('Bet (€)')
plt.ylabel('Winnings (€)')
plt.title('Real and predicted before training\n(to check initialization of network)')
plt.legend()
plt.gca().set_aspect('equal')

# Train

In [None]:
lr=1e-4
optimizer = torch.optim.Adam(model.parameters(), lr=lr)
ep_lower_lr = 0

MSE_train = []
MSE_val = []

for epoch in range(200):

    # ====================== 1) TRAIN ======================
    model.train()
    print(f'epoch {epoch}')
    loss_temp = []
    for batch in train_loader:
        batch = batch.clone()
        batch.to(device=device)
        optimizer.zero_grad()

        pred = model(batch[..., [0]].to(device))
        MSE_loss = torch.nn.MSELoss()(pred, batch[..., [1]].to(device))
        MSE_loss.backward()
        optimizer.step()

        loss_temp.append(MSE_loss.item())

    MSE_train.append(np.mean(loss_temp))
    print(f' \t train MSE loss {MSE_train[-1]:8.4}')

    # ====================== 2) EVALUATE ======================
    # In evaluation mode, so not conditioned on output. Calculating KL loss is possible for probabilistic2, but not for probabilistic1.
    model.eval()
    loss_temp = []
    for batch in test_loader:
        batch = batch.clone()
        batch.to(device=device)
        pred = model(batch[..., [0]].to(device))

        MSE_loss = torch.nn.MSELoss()(pred, batch[..., [0]].to(device))
        loss_temp.append(MSE_loss.item())

    MSE_val.append(np.mean(loss_temp))
    print(f' \t val MSE loss {MSE_val[-1]:8.4}')

    # ======== Early stopping =========
    # Stop training if both MSE and KL loss did not decrease enough on validation data
    if (
        epoch > 50
        and (np.mean(MSE_val[-5:]) > 0.995*np.mean(MSE_val[-10:-5]))
    ):

        print('======= !! Validation loss did not decrease enough =======')
        print('Stopping training')
        break

# Compare input, output, predicted output

In [None]:
# print input
print('============== INPUT ==============')
temp_x = (batch[:10, [0]]*data['x_std'] + data['x_m']).cpu().detach().numpy().astype(int)
print(temp_x)

print('============== OUTPUTS ==============')
temp_y = (batch[:10, [1]]*data['y_std'] + data['y_m']).cpu().detach().numpy()
print(temp_y)

print('============== PREDICTIONS ==============')
model.eval()
pred = model(batch[:10, [0]].to(device))*data['y_std'] + data['y_m']
temp_y = pred.cpu().detach().numpy()
print(temp_y)

In [None]:
model.eval()

fig, ax = plt.subplots(figsize=(4,3), dpi=200)

x = (batch[:, [0]]*data['x_std'] + data['x_m']).cpu().detach().numpy()

for i in range(100):
    pred = model(batch[..., [0]].to(device))*data['y_std'] + data['y_m']
    pred = pred.cpu().detach().numpy()

    if i == 0:
        plt.scatter(x, pred, s=1, c='tab:orange', alpha=0.5, label='predicted')
    else:
        plt.scatter(x, pred, s=1, c='tab:orange', alpha=0.5)


y = (batch[:, [1]]*data['y_std'] + data['y_m']).cpu().detach().numpy()
plt.scatter(x, y, s=1, label='true')
plt.xlabel(f'bet (€)')
plt.ylabel(f'winnings (€)')
plt.gca().set_aspect('equal')
plt.legend()
fig.subplots_adjust(left=0.2, bottom=0.2)

# Plot losses

In [None]:
plt.figure()
plt.plot(MSE_train, label='train')
plt.plot(MSE_val, label='val')
plt.legend()
plt.yscale('log')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.show()


# Calculate Wasserstein distance

In [None]:
from scipy.stats import wasserstein_distance

model.eval()
for batch in test_loader:
    batch = batch.clone().to(device)

    preds = np.zeros((len(batch), 100))  # create 100 predictions per data point
    print(preds.shape)

    for i in range(100):  # make 100 predictions for each data point
        pred = model(batch[:, [0]])
        preds[:, i] = pred.cpu().detach().numpy()[:, 0]*y_std+y_m

    print(preds.shape)

    reals = batch[:, 1:].cpu().detach().numpy()*y_std + y_m

    print(reals.shape)

    wd = []
    for pred, real in zip(preds, reals):  # iterate over all data points (as far as I know, this cannot be batched)
        wd.append(wasserstein_distance(pred, real))

print('Mean Wasserstein distance between real and predicted:', np.mean(wd))