In [6]:
import numpy as np
import pandas as pd
import os 
import sys
import torch

DATASETS = '../datasets/'

def read_file(dir_name):
    main_df = pd.DataFrame()
    directory = DATASETS + dir_name
    for filename in os.listdir(directory):
        data = np.load(os.path.join(directory, filename))
        data_dict = {}
        for keys in data.keys():
            data_dict[keys] = list(data[keys])
        df = pd.DataFrame.from_dict(data_dict)
        main_df = pd.concat([main_df, df])
    return main_df

In [7]:
data = pd.DataFrame()
for name in os.listdir("../datasets/mw"):
    if not (name.startswith('.')):
        dir_name = 'mw/'+name
        print(dir_name)
        df = read_file(dir_name)
        data = pd.concat([data, df])

mw/cls_sweep-v2
mw/cls_push-v2
mw/cls_door-open-v2
mw/cls_peg-insert-side-v2
mw/cls_drawer-close-v2
mw/cls_basketball-v2
mw/cls_reach-v2
mw/cls_window-open-v2
mw/cls_pick-place-v2
mw/cls_button_press_topdown-v2


# MAIN CODE

In [5]:
import itertools
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import pandas as pd
from tqdm import tqdm
import warnings
warnings.filterwarnings('ignore')

class Model(nn.Module):
    def __init__(self, input_size, hidden_size1, hidden_size2, output_size):
        super(Model, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size1)
        self.fc2 = nn.Linear(hidden_size1, hidden_size2)
        self.fc3 = nn.Linear(hidden_size2, output_size)

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        x = torch.sigmoid(self.fc3(x))
        return x


class PreferenceMAML:
    def __init__(
        self,
        ml10,
        input_size,
        hidden_size1,
        hidden_size2,
        output_size,
        inner_lr = 0.01,
        num_support=10,
        num_query=10,
        num_inner_steps=5,
        **kwargs,
    ):
        self.ml10 = ml10
        self.reward_criterion =  nn.BCELoss()
        self.num_support = num_support
        self.num_query = num_query
        self.num_inner_steps = num_inner_steps
        self.inner_lr = inner_lr

        self.model = Model(input_size, hidden_size1, hidden_size2, output_size)

    def construct_episodes(self):
        episodes = []
        episode = []
        for _, row in self.ml10.iterrows():
            episode.append(row)
            if row['done']:
                episodes.append(episode)
                episode = []
        self.episodes = episodes
        # return episodes

    def form_sigma_groups(self, episode, k):
        sigmas = []
        rev_epi = np.flip(episode)
        segments = np.array_split(rev_epi, k)

        # q, r = divmod(len(episode), k)
        # for i in range(k):
        #     segments.append(episode[i*q+min(i,r) : (i+1)*q+min(i+1,r)])


        # for i in range(k):
        #     sigma_i = segments[i]
        #     for j in range(i+1, k):
        #         sigma_j = segments[j]

        #         sigmas.append((sigma_i, sigma_j))
        return segments

    def compare_probabilities(self, sigma1, sigma2):
        exp_sum_rewards_sigma1 = np.exp(sum(row['reward'] for row in sigma1))
        exp_sum_rewards_sigma2 = np.exp(sum(row['reward'] for row in sigma2))
        prob = exp_sum_rewards_sigma1 / (exp_sum_rewards_sigma1 + exp_sum_rewards_sigma2)
        return [0] if prob > 0.5 else [1]

    def prepare_data(self, k):
        X = []
        y = []
        # episodes = self.construct_episodes()
        episodes = self.episodes
        for i in range(len(episodes)):
            episode = episodes[i]
            segments1 = self.form_sigma_groups(episode, k)
            print(len(segments1), segments1[0].shape)
            
            # for j in range(i+1, len(episode)):

            # sigmas = self.form_sigma_groups(episode, k)
            # for _ in range(len(sigmas)):
            #     sigma1 = sigmas[_][0]
            #     sigma2 = sigmas[_][1]

            #     obs_action_sigma1 = []
            #     for row in sigma1:
            #         obs_action = list(row['obs']) + list(row['action']) 
            #         obs_action_sigma1.append(obs_action)

            #     obs_action_sigma2 = []
            #     for row in sigma2:
            #         obs_action = list(row['obs']) + list(row['action'])  
            #         obs_action_sigma2.append(obs_action)

            #     if len(obs_action_sigma1) > len(obs_action_sigma2):
            #         obs_action_sigma1 = obs_action_sigma1[1:]
            #     elif len(obs_action_sigma1) < len(obs_action_sigma2):
            #         obs_action_sigma2 = obs_action_sigma2[1:]
            #     else:
            #         continue

                # X.append(np.concatenate((obs_action_sigma1, obs_action_sigma2), axis=1))
                # y.append(self.compare_probabilities(sigma1, sigma2))

        return X, y

    def setup_optimizers(self, optim_class, optim_kwargs):
        self.optim = optim_class(self.model.parameters(), **optim_kwargs)

    def _train_step(self, X, y):
        self.optim.zero_grad()
        loss = self._outer_step(X, y)
        loss.backward()
        self.optim.step()
        return loss.item()

    def _outer_step(self, X, y):
        outer_losses = []
        for i in tqdm(range(len(X))):
            if len(X[i])>self.num_support:
                support_X, support_y, query_X, query_y = self._split_support_query(X[i], y[i])
                # Inner loop (adaptation)
                adapted_model = self._inner_loop(support_X, support_y)
                # Compute loss using the adapted model on query set
                query_loss = self._compute_loss(adapted_model, query_X, query_y)
                outer_losses.append(query_loss)
        return torch.mean(torch.stack(outer_losses))

    def _inner_loop(self, support_X, support_y):
        adapted_model = Model(self.model.fc1.in_features, self.model.fc1.out_features,
                              self.model.fc2.out_features, self.model.fc3.out_features)
        adapted_model.load_state_dict(self.model.state_dict())

        inner_optimizer = optim.Adam(adapted_model.parameters(), lr=self.inner_lr)

        for _ in range(self.num_inner_steps):
            inner_optimizer.zero_grad()
            loss = self._compute_loss(adapted_model, support_X, support_y)
            loss.backward()
            inner_optimizer.step()

        return adapted_model

    def _compute_loss(self, model, X, y):
        X_tensor = torch.tensor(X, dtype=torch.float32)
        y_tensor = torch.tensor(y, dtype=torch.float32)  
        op = model(X_tensor)
        loss = self.reward_criterion(op, y_tensor)
        return loss

    def _split_support_query(self, X, y):
        num_samples = len(X)
        all_indices = np.arange(num_samples)
        # Randomly sample support indices
        support_indices = np.random.choice(num_samples, self.num_support, replace=False)
        query_indices = np.setdiff1d(all_indices, support_indices)
        support_X = X[support_indices]
        query_X = X[query_indices]
        support_y = [y] * self.num_support
        query_y = [y] * len(query_indices)

        return support_X, support_y, query_X, query_y

################################################################################################
################################################################################################
################################################################################################

ml10 = data.copy()
print(ml10)
input_size = 86  # Assuming obs has 39 numbers and action has 4 numbers * 2 for pair of sigmas
hidden_size1 = 128
hidden_size2 = 128
output_size = 1
num_epochs = 5
outer_lr = 0.001

model = PreferenceMAML(ml10, input_size, hidden_size1, hidden_size2, output_size)
model.setup_optimizers(optim.Adam, {"lr": outer_lr})
model.construct_episodes()

# # Train the model
# for epoch in range(num_epochs):
#     print(f'\nBeginning Training - Epoch [{epoch+1}/{num_epochs}]')
#     loss = model._train_step(X, y)
#     print(f"Epoch {epoch+1}, Loss: {loss}")


                                                    obs  \
0     [0.006152352, 0.6001898, 0.19430117, 1.0, -0.0...   
1     [0.005496773, 0.6004596, 0.19479041, 1.0, -0.0...   
2     [0.003229219, 0.6018244, 0.19687584, 1.0, -0.0...   
3     [-0.001304459, 0.60364145, 0.19940475, 1.0, -0...   
4     [-0.008221681, 0.6050473, 0.2002488, 1.0, -0.0...   
...                                                 ...   
4562  [0.019388804, 0.42453432, 0.30353016, 0.678210...   
4563  [0.02124585, 0.42707083, 0.30390698, 0.6806378...   
4564  [0.02427828, 0.42826843, 0.3044525, 0.67595613...   
4565  [0.027801527, 0.4276933, 0.30493283, 0.6662153...   
4566  [0.031075295, 0.42619953, 0.30530906, 0.660721...   

                                                 action    reward   done  \
0     [-0.6907573, -0.90674317, -0.60277325, -0.2723...  0.000000  False   
1        [-1.2002773, 0.24739377, 3.6632237, -1.072054]  0.183851  False   
2         [-1.069929, 0.31087282, 3.375295, -1.0109515]  0.1798

KeyboardInterrupt: 

In [4]:
print('Preparing Data.')
X, y = model.prepare_data(k=4)
print('Data Preparation Done.\n')

Preparing Data.
4 (30, 5)
4 (21, 5)
4 (28, 5)
4 (22, 5)
4 (31, 5)
4 (31, 5)
4 (27, 5)
4 (21, 5)
4 (31, 5)
4 (21, 5)
4 (27, 5)
4 (21, 5)
4 (21, 5)
4 (21, 5)
4 (21, 5)
4 (21, 5)
4 (19, 5)
4 (18, 5)
4 (19, 5)
4 (21, 5)
4 (24, 5)
4 (27, 5)
4 (24, 5)
4 (21, 5)
4 (25, 5)
4 (26, 5)
4 (26, 5)
4 (21, 5)
4 (26, 5)
4 (22, 5)
4 (19, 5)
4 (26, 5)
4 (21, 5)
4 (22, 5)
4 (19, 5)
4 (26, 5)
4 (18, 5)
4 (24, 5)
4 (21, 5)
4 (19, 5)
4 (30, 5)
4 (17, 5)
4 (126, 5)
4 (31, 5)
4 (16, 5)
4 (16, 5)
4 (21, 5)
4 (20, 5)
4 (11, 5)
4 (21, 5)
4 (273, 5)
4 (22, 5)
4 (22, 5)
4 (22, 5)
4 (22, 5)
4 (22, 5)
4 (22, 5)
4 (22, 5)
4 (22, 5)
4 (22, 5)
4 (22, 5)
4 (22, 5)
4 (22, 5)
4 (22, 5)
4 (22, 5)
4 (23, 5)
4 (26, 5)
4 (21, 5)
4 (22, 5)
4 (30, 5)
4 (25, 5)
4 (26, 5)
4 (21, 5)
4 (20, 5)
4 (21, 5)
4 (26, 5)
4 (22, 5)
4 (21, 5)
4 (26, 5)
4 (20, 5)
4 (24, 5)
4 (21, 5)
4 (22, 5)
4 (26, 5)
4 (23, 5)
4 (21, 5)
4 (27, 5)
4 (22, 5)
4 (20, 5)
4 (19, 5)
4 (13, 5)
4 (32, 5)
4 (20, 5)
4 (13, 5)
4 (23, 5)
4 (28, 5)
4 (30, 5)
4 (27, 5)
4 

In [None]:
test = pd.DataFrame()
for name in os.listdir("../datasets/mw_valid"):
    if not (name.startswith('.')):
        dir_name = 'mw_valid/'+name
        print(dir_name)
        df = read_file(dir_name)
        test = pd.concat([data, df])

test.reset_index(drop=True, inplace=True)
Test = PreferenceMAML(test, input_size, hidden_size1, hidden_size2, output_size)
test_X, test_y = Test.prepare_data(k=4)


# Evaluate the model on the test set
def evaluate_model(model, X, y):
    predictions = []
    with torch.no_grad():
        for i in range(len(X)):
            X_tensor = torch.tensor(X[i], dtype=torch.float32)
            output = model.model(X_tensor.unsqueeze(0))  
            predictions.append(output.squeeze().numpy())  

    preds = []
    for _ in range(len(predictions)):
        preds.append((np.array(predictions[_]).mean()))

    pred_label = []
    for i in range(len(preds)):
        pred_label.append([0] if preds[i]>0.5 else [1])
    
    sum = 0
    for _ in range(len(y)):
        sum += pred_label[_]==y[_]
    accuracy = sum/len(y)
    return accuracy, pred_label

test_accuracy, pred_labels = evaluate_model(model, test_X, test_y)
print(f'\nTest Accuracy: {test_accuracy}')

mw_valid/cls_shelf-place-v2
mw_valid/cls_drawer-open-v2
mw_valid/cls_lever-pull-v2
mw_valid/cls_sweep-into-v2


KeyboardInterrupt: 

### Without INNER LOOP

In [353]:
# import itertools
# import numpy as np
# import torch
# import torch.nn as nn
# import torch.optim as optim
# import pandas as pd

# class Model(nn.Module):
#     def __init__(self, input_size, hidden_size1, hidden_size2, output_size):
#         super(Model, self).__init__()
#         self.fc1 = nn.Linear(input_size, hidden_size1)
#         self.fc2 = nn.Linear(hidden_size1, hidden_size2)
#         self.fc3 = nn.Linear(hidden_size2, output_size)

#     def forward(self, x):
#         x = torch.relu(self.fc1(x))
#         x = torch.relu(self.fc2(x))
#         x = torch.sigmoid(self.fc3(x))
#         return x


# class PreferenceMAML:
#     def __init__(
#         self,
#         ml10,
#         input_size,
#         hidden_size1,
#         hidden_size2,
#         output_size,
#         num_support=10,
#         num_query=10,
#         num_inner_steps=5,
#         **kwargs,
#     ):
#         self.ml10 = ml10
#         self.reward_criterion =  nn.CrossEntropyLoss()
#         self.num_support = num_support
#         self.num_query = num_query
#         self.num_inner_steps = num_inner_steps

#         self.model = Model(input_size, hidden_size1, hidden_size2, output_size)

#     def construct_episodes(self):
#         episodes = []
#         episode = []
#         for _, row in self.ml10.iterrows():
#             episode.append(row)
#             if row['done']:
#                 episodes.append(episode)
#                 episode = []
#         return episodes



#     def form_sigma_groups(self, episode, k):
#         sigmas = []
#         segments = []
#         q, r = divmod(len(episode), k)
#         for i in range(k):
#             segments.append(episode[i*q+min(i,r) : (i+1)*q+min(i+1,r)])

#         for i in range(k):
#             sigma_i = segments[i]
#             for j in range(i+1, k):
#                 sigma_j = segments[j]

#                 sigmas.append((sigma_i, sigma_j))
#         return sigmas

#     def compare_probabilities(self, sigma1, sigma2):
#         exp_sum_rewards_sigma1 = np.exp(sum(row['reward'] for row in sigma1))
#         exp_sum_rewards_sigma2 = np.exp(sum(row['reward'] for row in sigma2))
#         prob = exp_sum_rewards_sigma1 / (exp_sum_rewards_sigma1 + exp_sum_rewards_sigma2)
#         return [1,0] if prob > 0.5 else [0,1]


#     def prepare_data(self, k):
#         X = []
#         y = []
#         episodes = self.construct_episodes()
#         for episode in episodes:
#             sigmas = self.form_sigma_groups(episode, k)
#             for _ in range(len(sigmas)):

#                 sigma1 = sigmas[_][0]
#                 sigma2 = sigmas[_][1]

#                 obs_action_sigma1 = []
#                 for row in sigma1:
#                     obs_action = list(row['obs']) + list(row['action'])  # Concatenate obs and action
#                     obs_action_sigma1.append(obs_action)

#                 obs_action_sigma2 = []
#                 for row in sigma2:
#                     obs_action = list(row['obs']) + list(row['action'])  # Concatenate obs and action
#                     obs_action_sigma2.append(obs_action)

#                 if len(obs_action_sigma1) > len(obs_action_sigma2):
#                     obs_action_sigma1 = obs_action_sigma1[1:]
#                 elif len(obs_action_sigma1) < len(obs_action_sigma2):
#                     obs_action_sigma2 = obs_action_sigma2[1:]
#                 else:
#                     continue

#                 X.append(np.concatenate((obs_action_sigma1, obs_action_sigma2), axis = 1))
#                 y.append([self.compare_probabilities(sigma1, sigma2)]) 

#         return X, y


#     def setup_optimizers(self, optim_class, optim_kwargs):
#         self.optim = optim_class(self.model.parameters(), **optim_kwargs)

#     def _train_step(self, X, y):
#         self.optim.zero_grad()
#         loss = self._outer_step(X, y)
#         loss.backward()
#         self.optim.step()
#         return loss.item()

#     def _outer_step(self, X, y):
#         outer_losses = []
#         for i in range(len(X)):
#             loss = self._compute_loss(X[i], y[i])
#             outer_losses.append(loss)
#         return torch.mean(torch.stack(outer_losses))

#     def _compute_loss(self, X, y):
#         X_tensor = torch.tensor(X, dtype=torch.float32)
#         y_tensor = torch.tensor([y], dtype=torch.float32)
#         output = self.model(X_tensor)
#         output_flat = output.view(-1)
#         y_flat = y_tensor.view(-1)
#         loss = self.reward_criterion(output_flat[-2:], y_flat)
#         return loss

# ml10 = data.copy()  
# input_size = 86  # Assuming obs has 39 numbers and action has 4 numbers * 2 for pair of sigmas
# hidden_size1 = 128
# hidden_size2 = 128
# output_size = 2
# num_epochs = 20

# model = PreferenceMAML(ml10, input_size, hidden_size1, hidden_size2, output_size)
# model.setup_optimizers(optim.Adam, {"lr": 0.005})

# X, y = model.prepare_data(k=4)

# # Train the model
# for epoch in range(num_epochs):
#     loss = model._train_step(X, y)
#     print(f"Epoch {epoch+1}, Loss: {loss}")


### With INNER LOOP but Improper classification

In [354]:
# import itertools
# import numpy as np
# import torch
# import torch.nn as nn
# import torch.optim as optim
# import pandas as pd
# from tqdm import tqdm
# import warnings
# warnings.filterwarnings('ignore')

# class Model(nn.Module):
#     def __init__(self, input_size, hidden_size1, hidden_size2, output_size):
#         super(Model, self).__init__()
#         self.fc1 = nn.Linear(input_size, hidden_size1)
#         self.fc2 = nn.Linear(hidden_size1, hidden_size2)
#         self.fc3 = nn.Linear(hidden_size2, output_size)

#     def forward(self, x):
#         x = torch.relu(self.fc1(x))
#         x = torch.relu(self.fc2(x))
#         x = torch.sigmoid(self.fc3(x))
#         return x


# class PreferenceMAML:
#     def __init__(
#         self,
#         ml10,
#         input_size,
#         hidden_size1,
#         hidden_size2,
#         output_size,
#         inner_lr = 0.01,
#         num_support=10,
#         num_query=10,
#         num_inner_steps=5,
#         **kwargs,
#     ):
#         self.ml10 = ml10
#         self.reward_criterion =  nn.CrossEntropyLoss()
#         self.num_support = num_support
#         self.num_query = num_query
#         self.num_inner_steps = num_inner_steps
#         self.inner_lr = inner_lr

#         self.model = Model(input_size, hidden_size1, hidden_size2, output_size)

#     def construct_episodes(self):
#         episodes = []
#         episode = []
#         for _, row in self.ml10.iterrows():
#             episode.append(row)
#             if row['done']:
#                 episodes.append(episode)
#                 episode = []
#         return episodes

#     def form_sigma_groups(self, episode, k):
#         sigmas = []
#         segments = []
#         q, r = divmod(len(episode), k)
#         for i in range(k):
#             segments.append(episode[i*q+min(i,r) : (i+1)*q+min(i+1,r)])

#         for i in range(k):
#             sigma_i = segments[i]
#             for j in range(i+1, k):
#                 sigma_j = segments[j]

#                 sigmas.append((sigma_i, sigma_j))
#         return sigmas

#     def compare_probabilities(self, sigma1, sigma2):
#         exp_sum_rewards_sigma1 = np.exp(sum(row['reward'] for row in sigma1))
#         exp_sum_rewards_sigma2 = np.exp(sum(row['reward'] for row in sigma2))
#         prob = exp_sum_rewards_sigma1 / (exp_sum_rewards_sigma1 + exp_sum_rewards_sigma2)
#         return [1,0] if prob > 0.5 else [0,1]

#     def prepare_data(self, k):
#         X = []
#         y = []
#         episodes = self.construct_episodes()
#         for episode in episodes:
#             sigmas = self.form_sigma_groups(episode, k)
#             for _ in range(len(sigmas)):
#                 sigma1 = sigmas[_][0]
#                 sigma2 = sigmas[_][1]

#                 obs_action_sigma1 = []
#                 for row in sigma1:
#                     obs_action = list(row['obs']) + list(row['action'])  # Concatenate obs and action
#                     obs_action_sigma1.append(obs_action)

#                 obs_action_sigma2 = []
#                 for row in sigma2:
#                     obs_action = list(row['obs']) + list(row['action'])  # Concatenate obs and action
#                     obs_action_sigma2.append(obs_action)

#                 if len(obs_action_sigma1) > len(obs_action_sigma2):
#                     obs_action_sigma1 = obs_action_sigma1[1:]
#                 elif len(obs_action_sigma1) < len(obs_action_sigma2):
#                     obs_action_sigma2 = obs_action_sigma2[1:]
#                 else:
#                     continue

#                 X.append(np.concatenate((obs_action_sigma1, obs_action_sigma2), axis=1))
#                 y.append(self.compare_probabilities(sigma1, sigma2))

#         return X, y

#     def setup_optimizers(self, optim_class, optim_kwargs):
#         self.optim = optim_class(self.model.parameters(), **optim_kwargs)

#     def _train_step(self, X, y):
#         self.optim.zero_grad()
#         loss = self._outer_step(X, y)
#         loss.backward()
#         self.optim.step()
#         return loss.item()

#     def _outer_step(self, X, y):
#         outer_losses = []
#         for i in tqdm(range(len(X))):
#             if len(X[i])>self.num_support:
#                 support_X, support_y, query_X, query_y = self._split_support_query(X[i], y[i])
#                 # Inner loop (adaptation)
#                 adapted_model = self._inner_loop(support_X, support_y)
#                 # Compute loss using the adapted model on query set
#                 query_loss = self._compute_loss(adapted_model, query_X, query_y)
#                 outer_losses.append(query_loss)
#         return torch.mean(torch.stack(outer_losses))

#     def _inner_loop(self, support_X, support_y):
#         adapted_model = Model(self.model.fc1.in_features, self.model.fc1.out_features,
#                               self.model.fc2.out_features, self.model.fc3.out_features)
#         adapted_model.load_state_dict(self.model.state_dict())

#         inner_optimizer = optim.Adam(adapted_model.parameters(), lr=self.inner_lr)

#         for _ in range(self.num_inner_steps):
#             inner_optimizer.zero_grad()
#             loss = self._compute_loss(adapted_model, support_X, support_y)
#             print(loss)
#             loss.backward()
#             inner_optimizer.step()

#         return adapted_model

#     def _compute_loss(self, model, X, y):
#         X_tensor = torch.tensor(X, dtype=torch.float32)
#         y_class = [0 if y[i]==[1,0] else 1 for i in range(len(y))]
#         y_tensor = torch.tensor(y_class, dtype=torch.long)  # Assuming y is class indices
#         output = model(X_tensor)

#         loss = self.reward_criterion(output, y_tensor)
#         return loss

#     def _split_support_query(self, X, y):
#         num_samples = len(X)
#         all_indices = np.arange(num_samples)
#         # Randomly sample support indices
#         support_indices = np.random.choice(num_samples, self.num_support, replace=False)
#         query_indices = np.setdiff1d(all_indices, support_indices)
#         support_X = X[support_indices]
#         query_X = X[query_indices]
#         # For y, we can simply use the same indices as for X, as it has a fixed length of 2
#         support_y = [y] * self.num_support
#         query_y = [y] * len(query_indices)

#         return support_X, support_y, query_X, query_y


# ml10 = data.copy()  
# input_size = 86  # Assuming obs has 39 numbers and action has 4 numbers * 2 for pair of sigmas
# hidden_size1 = 128
# hidden_size2 = 128
# output_size = 2
# num_epochs = 5
# outer_lr = 0.001

# model = PreferenceMAML(ml10, input_size, hidden_size1, hidden_size2, output_size)
# model.setup_optimizers(optim.Adam, {"lr": outer_lr})

# print('Preparing Data.')
# # X, y = model.prepare_data(k=4)
# print('Data Preparation Done.\n')

# # Train the model
# for epoch in range(num_epochs):
#     print(f'\nBeginning Training - Epoch [{epoch+1}/{num_epochs}]')
#     loss = model._train_step(X, y)
#     print(f"Epoch {epoch+1}, Loss: {loss}")
