In [340]:
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
from torch.utils.data import DataLoader, TensorDataset

In [341]:
class FeedforwardNN(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(FeedforwardNN, self).__init__()
        self.layer1 = nn.Linear(input_size, hidden_size) #bias = False?
        self.relu1 = nn.ReLU()
        self.output_layer = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        x = self.relu1(self.layer1(x))
        x = self.output_layer(x)
        return x

# Set random seed for reproducibility
torch.manual_seed(42)

# Define synthetic input data
input_size = 20
synthetic_input = torch.rand(input_size)

# Instantiate the neural network
hidden_size = 20
output_size = 1
model = FeedforwardNN(input_size, hidden_size, output_size)

# Forward pass
output = model(synthetic_input)

# Print the architecture and output shape
print(model)
print("Output shape:", output.shape)

FeedforwardNN(
  (layer1): Linear(in_features=20, out_features=20, bias=True)
  (relu1): ReLU()
  (output_layer): Linear(in_features=20, out_features=1, bias=True)
)
Output shape: torch.Size([1])


In [342]:
initial_weights = model.state_dict()

In [343]:
# if torch.cuda.is_available():
#     device = torch.device("cuda")          # a CUDA device object
#     print('Using GPU:', torch.cuda.get_device_name(0))
# else:
#     device = torch.device("cpu")           # a CPU device object
#     print('CUDA is not available, using CPU.')
# model.to(device)

In [344]:
p = 0
for name, param in model.named_parameters():
    if 'weight' in name:
        x,y = param.size()
        p += x*y
        print(f'Layer: {name}, Size: {param.size()}')
print('Number of parameters:',p)

Layer: layer1.weight, Size: torch.Size([20, 20])
Layer: output_layer.weight, Size: torch.Size([1, 20])
Number of parameters: 420


In [348]:
# Simulated Reward Function
a = torch.rand(20)
def reward_function(context):
    return 10*(torch.dot(context,a)**2).item() + torch.randn(1).item() 
reward_function(torch.rand(20))

183.07807192206383

In [349]:
class CustomLoss(nn.Module):
    def __init__(self):
        super(CustomLoss, self).__init__()

    def forward(self, predicted, target,flattened_initial_weights,flattened_current_weights):
        loss = (0.5*torch.sum((predicted - target)**2))/target.shape[0]
        m = 20
        lamb = 1
        norm = 0.5*m*lamb*(torch.norm(flattened_initial_weights - flattened_current_weights, p=2)**2)
        return loss + norm

In [350]:
def train_nn(input_data,initial_weights,J = 20):
    model_inside = FeedforwardNN(input_size, hidden_size, output_size)
    model_inside.load_state_dict(initial_weights)

    loss_function = CustomLoss()

    optimizer = optim.SGD(model_inside.parameters(), lr=0.001) ## Fix to just GD on whole dataset and check if it's correct

    size = len(input_data)
    rewards = []
    for s in range(size):
        r = reward_function(input_data[s])
        rewards.append(r)
    rewards = torch.tensor(rewards).reshape(-1,1)
    dataset = TensorDataset(input_data,rewards)

    batch_size = 32
    shuffle = True  # Set to True if you want to shuffle the data
    data_loader = DataLoader(dataset, batch_size=batch_size, shuffle=shuffle)    

    flattened_initial_weights = torch.cat([p.flatten() for p in initial_weights.values()])
    
    for j in range(J):
        for inputs,targets in data_loader:
            optimizer.zero_grad()
            output = model_inside(inputs)
            
            current_weights = model_inside.state_dict()
            flattened_current_weights = torch.cat([p.flatten() for p in current_weights.values()])
            loss = loss_function(output, targets,flattened_initial_weights,flattened_current_weights)
            loss.backward()
            optimizer.step()
        
#         print('output',output)
#         print('rewards',rewards)
    print('--------------------------------------')

    model_weights = model_inside.state_dict()
    return model_weights

In [351]:
def NeuralUCB(model, T, lamb = 0.2, v = 0.2, delta = 0.2, norm = 0.2, step_size = 0.2, num_iter = 10):
    Z = torch.eye(p)
    K = 2
    dim = 20 # feature vector dimension (context)
    m = 20 # Neural Network width
    gamma = 0.2
    best_contexts = []
    for t in range(T):
        synthetic_input = torch.rand((K,dim)) # 2 arms x 10 feature size
        all_grad = []
        ucbs = []
        for k in range(K):
            output = model(synthetic_input[k])
            model.zero_grad()
            output.backward(torch.ones_like(output),retain_graph = True)
            g = []
            for name, param in model.named_parameters():
                if 'weight' in name:
                    tmp = param.grad.flatten()
                    g.append(tmp)
            g = torch.cat(g,dim = 0).reshape(-1,1)
    
    
            Z_inv = torch.inverse(Z)
            
            
            all_grad.append(g)
            exploration_reward = gamma*torch.sqrt((g.T@Z_inv@g)/m) 
            ucb_a = output + exploration_reward
            ucbs.append(ucb_a.item())
            
            
        ucbs = torch.tensor(ucbs)
        action = torch.argmax(ucbs).item()
        best_context = synthetic_input[action]
        best_g = all_grad[action]
        Z = Z + (best_g@best_g.T/m)
        
        best_contexts.append(best_context)
        
#         if (t+1)%50 == 0:
#             ### Train Neural Network
#             new_weights = train_nn(torch.stack(best_contexts),initial_weights,J = t+1)
#             model.load_state_dict(new_weights)
#             print('abc')
        
        new_weights = train_nn(torch.stack(best_contexts),initial_weights,J = 20)
        model.load_state_dict(new_weights)
        # Update gamma: To be updated
        print(t)
        
    return model

In [352]:
model = NeuralUCB(model,100)

--------------------------------------
0
--------------------------------------
1
--------------------------------------
2
--------------------------------------
3
--------------------------------------
4
--------------------------------------
5
--------------------------------------
6
--------------------------------------
7
--------------------------------------
8
--------------------------------------
9
--------------------------------------
10
--------------------------------------
11
--------------------------------------
12
--------------------------------------
13
--------------------------------------
14
--------------------------------------
15
--------------------------------------
16
--------------------------------------
17
--------------------------------------
18
--------------------------------------
19
--------------------------------------
20
--------------------------------------
21
--------------------------------------
22
--------------------------------------
23
--

### Result Testing

In [353]:
test = torch.rand((2,20))

In [354]:
torch.norm(test[0])

tensor(2.3688)

In [355]:
torch.norm(test[1])

tensor(2.7209)

In [356]:
best_idx = 0
best_reward = 0
for idx,t in enumerate(test):
    reward = reward_function(t)
    if reward>best_reward:
        best_reward = reward
        best_idx = idx
    print(f'idx_{idx} reward: ',reward)

idx_0 reward:  184.6775421500206
idx_1 reward:  239.36793792247772


In [357]:
model(test)

tensor([[178.0201],
        [239.3335]], grad_fn=<AddmmBackward0>)