In [234]:
import numpy as np
import pandas as pd
import os 
import sys
import torch

DATASETS = '../datasets/'

def read_file(dir_name):
    main_df = pd.DataFrame()
    directory = DATASETS + dir_name
    for filename in os.listdir(directory):
        data = np.load(os.path.join(directory, filename))
        data_dict = {}
        for keys in data.keys():
            data_dict[keys] = list(data[keys])
        df = pd.DataFrame.from_dict(data_dict)
        main_df = pd.concat([main_df, df])
    return main_df

In [237]:
data = pd.DataFrame()
for name in os.listdir("../datasets/mw"):
    if not (name.startswith('.')):
        dir_name = 'mw/'+name
        print(dir_name)
        df = read_file(dir_name)
        data = pd.concat([data, df])

mw/cls_sweep-v2
mw/cls_push-v2
mw/cls_door-open-v2
mw/cls_peg-insert-side-v2
mw/cls_drawer-close-v2
mw/cls_basketball-v2
mw/cls_reach-v2
mw/cls_window-open-v2
mw/cls_pick-place-v2
mw/cls_button_press_topdown-v2


In [238]:
data.reset_index(drop=True, inplace=True)
data

Unnamed: 0,obs,action,reward,done,discount
0,"[0.006152352, 0.6001898, 0.19430117, 1.0, -0.0...","[-0.6907573, -0.90674317, -0.60277325, -0.2723...",0.000000,False,1.0
1,"[0.005496773, 0.6004596, 0.19479041, 1.0, -0.0...","[-1.2002773, 0.24739377, 3.6632237, -1.072054]",0.183851,False,1.0
2,"[0.003229219, 0.6018244, 0.19687584, 1.0, -0.0...","[-1.069929, 0.31087282, 3.375295, -1.0109515]",0.179854,False,1.0
3,"[-0.001304459, 0.60364145, 0.19940475, 1.0, -0...","[-0.95812297, 0.38289765, -4.0985723, -0.8793693]",0.174293,False,1.0
4,"[-0.008221681, 0.6050473, 0.2002488, 1.0, -0.0...","[-1.0929809, 0.20464042, -4.2532034, -0.84760445]",0.169278,False,1.0
...,...,...,...,...,...
1329022,"[0.019388804, 0.42453432, 0.30353016, 0.678210...","[0.43020093, 0.81866133, 0.058971003, 0.28200394]",0.500190,False,1.0
1329023,"[0.02124585, 0.42707083, 0.30390698, 0.6806378...","[0.48770562, -0.8089056, -0.15299322, 0.56197345]",0.503583,False,1.0
1329024,"[0.02427828, 0.42826843, 0.3044525, 0.67595613...","[0.35907024, -0.54959077, 0.39614707, 0.6642925]",0.508433,False,1.0
1329025,"[0.027801527, 0.4276933, 0.30493283, 0.6662153...","[-0.28105694, -0.9714457, -0.4404523, 0.26024258]",0.512631,False,1.0


In [239]:
import itertools
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import pandas as pd

class Model(nn.Module):
    def __init__(self, input_size, hidden_size1, hidden_size2, output_size):
        super(Model, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size1)
        self.fc2 = nn.Linear(hidden_size1, hidden_size2)
        self.fc3 = nn.Linear(hidden_size2, output_size)

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        x = torch.sigmoid(self.fc3(x))
        return x


class PreferenceMAML:
    def __init__(
        self,
        ml10,
        input_size,
        hidden_size1,
        hidden_size2,
        output_size,
        num_support=10,
        num_query=10,
        num_inner_steps=5,
        **kwargs,
    ):
        self.ml10 = ml10
        self.reward_criterion = nn.BCELoss()
        self.num_support = num_support
        self.num_query = num_query
        self.num_inner_steps = num_inner_steps

        self.model = Model(input_size, hidden_size1, hidden_size2, output_size)

    def construct_episodes(self):
        episodes = []
        episode = []
        for _, row in self.ml10.iterrows():
            episode.append(row)
            if row['done']:
                episodes.append(episode)
                episode = []
        return episodes



    def form_sigma_groups(self, episode, k):
        sigmas = []
        segments = []
        q, r = divmod(len(episode), k)
        for i in range(k):
            segments.append(episode[i*q+min(i,r) : (i+1)*q+min(i+1,r)])

        for i in range(k):
            sigma_i = segments[i]
            for j in range(i+1, k):
                sigma_j = segments[j]

                sigmas.append((sigma_i, sigma_j))
        return sigmas

    def compare_probabilities(self, sigma1, sigma2):
        exp_sum_rewards_sigma1 = np.exp(sum(row['reward'] for row in sigma1))
        exp_sum_rewards_sigma2 = np.exp(sum(row['reward'] for row in sigma2))
        prob = exp_sum_rewards_sigma1 / (exp_sum_rewards_sigma1 + exp_sum_rewards_sigma2)
        return [1,0] if prob > 0.5 else [0,1]


    def prepare_data(self, k):
        X = []
        y = []
        episodes = self.construct_episodes()
        for episode in episodes:
            sigmas = self.form_sigma_groups(episode, k)
            for _ in range(len(sigmas)):

                sigma1 = sigmas[_][0]
                sigma2 = sigmas[_][1]

                obs_action_sigma1 = []
                for row in sigma1:
                    obs_action = list(row['obs']) + list(row['action'])  # Concatenate obs and action
                    obs_action_sigma1.append(obs_action)

                obs_action_sigma2 = []
                for row in sigma2:
                    obs_action = list(row['obs']) + list(row['action'])  # Concatenate obs and action
                    obs_action_sigma2.append(obs_action)

                if len(obs_action_sigma1) > len(obs_action_sigma2):
                    obs_action_sigma1 = obs_action_sigma1[1:]
                elif len(obs_action_sigma1) < len(obs_action_sigma2):
                    obs_action_sigma2 = obs_action_sigma2[1:]
                else:
                    continue

                X.append(np.concatenate((obs_action_sigma1, obs_action_sigma2), axis = 1))
                y.append([self.compare_probabilities(sigma1, sigma2)]) 

        return X, y


    def setup_optimizers(self, optim_class, optim_kwargs):
        self.optim = optim_class(self.model.parameters(), **optim_kwargs)

    def _train_step(self, X, y):
        self.optim.zero_grad()
        loss = self._outer_step(X, y)
        loss.backward()
        self.optim.step()
        return loss.item()

    def _outer_step(self, X, y):
        outer_losses = []
        for i in range(len(X)):
            loss = self._compute_loss(X[i], y[i])
            outer_losses.append(loss)
        return torch.mean(torch.stack(outer_losses))

    def _compute_loss(self, X, y):
        X_tensor = torch.tensor(X, dtype=torch.float32)
        y_tensor = torch.tensor([y], dtype=torch.float32)
        output = self.model(X_tensor)
        output_flat = output.view(-1)
        y_flat = y_tensor.view(-1)
        loss = self.reward_criterion(output_flat[-2:], y_flat)
        return loss

ml10 = data.copy()  
input_size = 86  # Assuming obs has 39 numbers and action has 4 numbers * 2 for pair of sigmas
hidden_size1 = 128
hidden_size2 = 128
output_size = 2
num_epochs = 20

model = PreferenceMAML(ml10, input_size, hidden_size1, hidden_size2, output_size)
model.setup_optimizers(optim.Adam, {"lr": 0.005})

X, y = model.prepare_data(k=4)

# Train the model
for epoch in range(num_epochs):
    loss = model._train_step(X, y)
    print(f"Epoch {epoch+1}, Loss: {loss}")


  exp_sum_rewards_sigma2 = np.exp(sum(row['reward'] for row in sigma2))
  exp_sum_rewards_sigma1 = np.exp(sum(row['reward'] for row in sigma1))
  prob = exp_sum_rewards_sigma1 / (exp_sum_rewards_sigma1 + exp_sum_rewards_sigma2)


Epoch 1, Loss: 0.7192118763923645
Epoch 2, Loss: 0.6447442770004272
Epoch 3, Loss: 0.5888914465904236
Epoch 4, Loss: 0.5564464926719666
Epoch 5, Loss: 0.5714359283447266
Epoch 6, Loss: 0.5726175904273987
Epoch 7, Loss: 0.552608847618103
Epoch 8, Loss: 0.5367072224617004
Epoch 9, Loss: 0.5327599048614502
Epoch 10, Loss: 0.5348684191703796
Epoch 11, Loss: 0.5360583066940308
Epoch 12, Loss: 0.5335521697998047
Epoch 13, Loss: 0.528028130531311
Epoch 14, Loss: 0.5218993425369263
Epoch 15, Loss: 0.5177727937698364
Epoch 16, Loss: 0.5167074799537659
Epoch 17, Loss: 0.5171335935592651
Epoch 18, Loss: 0.5161841511726379
Epoch 19, Loss: 0.5128001570701599
Epoch 20, Loss: 0.5085576772689819


In [240]:
test = pd.DataFrame()
for name in os.listdir("../datasets/mw_valid"):
    if not (name.startswith('.')):
        dir_name = 'mw_valid/'+name
        print(dir_name)
        df = read_file(dir_name)
        test = pd.concat([data, df])

# test = read_file('mw/cls_pick-place-v2')
test.reset_index(drop=True, inplace=True)
Test = PreferenceMAML(test, input_size, hidden_size1, hidden_size2, output_size)
test_X, test_y = Test.prepare_data(k=4)


# Evaluate the model on the test set
def evaluate_model(model, X, y):
    predictions = []
    with torch.no_grad():
        for i in range(len(X)):
            X_tensor = torch.tensor(X[i], dtype=torch.float32)
            output = model.model(X_tensor.unsqueeze(0))  # Unsqueeze to add batch dimension
            predictions.append(output.squeeze().numpy())  # Squeeze to remove batch dimension

    preds = []
    for _ in range(len(predictions)):
        preds.append(list(np.max(predictions[_], axis=0)))

    pred_label = []
    for i in preds:
        pred_label.append([1,0] if preds[_][0]>preds[_][1] else [0,1])


    Y = []
    for _ in y:
        Y.append(_[0])

    sum = 0
    for _ in range(len(Y)):
        sum += pred_label[_]==Y[_]
    accuracy = sum/len(Y)
    return accuracy

test_accuracy = evaluate_model(model, test_X, test_y)
print(f'Test Accuracy: {test_accuracy}')

mw_valid/cls_shelf-place-v2
mw_valid/cls_drawer-open-v2
mw_valid/cls_lever-pull-v2
mw_valid/cls_sweep-into-v2


  exp_sum_rewards_sigma2 = np.exp(sum(row['reward'] for row in sigma2))
  exp_sum_rewards_sigma1 = np.exp(sum(row['reward'] for row in sigma1))
  prob = exp_sum_rewards_sigma1 / (exp_sum_rewards_sigma1 + exp_sum_rewards_sigma2)


Test Accuracy: 0.2693798449612403


In [5]:
# # def preprocess_df(df, num_segments):
# df = data.copy()
# num_segments = 4

# episodes = []
# current_episode = []

# for index, row in df.iterrows():
#     current_episode.append(row)
#     if row['done'] or index == len(df) - 1:
#         D = []
#         segments = np.array_split(current_episode, num_segments)
#         for i in range(len(segments) - 1):
#             for j in range(i, len(segments) - 1):
#                 sigma1 = pd.DataFrame(segments[i])
#                 sigma2 = pd.DataFrame(segments[j])
#                 reward_sum_sigma1 = sum(sigma1[2])
#                 reward_sum_sigma2 = sum(sigma2[2])
#                 p = torch.tensor([[np.exp(reward_sum_sigma1) / (np.exp(reward_sum_sigma1) + np.exp(reward_sum_sigma2))]])
#                 y = torch.tensor([1]) if p.item() >= 0.5 else torch.tensor([0])

#                 s1_obs = torch.tensor(sigma1[0])
#                 s1_act = torch.tensor(sigma1[1])
#                 s1_reward = torch.tensor(sigma1[2])

#                 s2_obs = torch.tensor(sigma2[0])
#                 s2_act = torch.tensor(sigma2[1])
#                 s2_reward = torch.tensor(sigma2[2])

                
                
#                 d = (p, y)
#                 D.append(d)

#                 # x_obs = torch.tensor(sigma1[0].tolist())
#                 # x_act = torch.tensor(sigma1[1].tolist())
#                 # y_support = torch.tensor([[1, 0]]) if p.item() > 0.5 else torch.tensor([[0, 1]])
#                 # x_query = torch.tensor(sigma2[0].tolist())
#                 # y_query = torch.tensor([0]) if p.item() > 0.5 else torch.tensor([1])
#                 # episodes.append(((x_obs, x_act), y_support, (x_obs, x_act), y_query))
#         episodes.append(D)
#         current_episode = []

#     # return episodes

In [13]:
# # Example testing
# def test(model, test_data):
#     criterion = nn.CrossEntropyLoss()
#     correct = 0
#     total = 0
#     with torch.no_grad():
#         for S1, S2, y, _ in test_data:
#             x = torch.cat([S1, S2], dim=0)  # Assuming S1 and S2 are tensors
#             y = torch.tensor(y)
#             outputs = model(x)
#             _, predicted = torch.max(outputs.data, 1)
#             total += y.size(0)
#             correct += (predicted == y).sum().item()
#     accuracy = 100 * correct / total
#     print('Accuracy of the network on the test data: %d %%' % accuracy)

# # Assuming you have test data
# test_data = [...]  # Define your test data
# test(model, test_data)