In [None]:
from PTLF.lab import lab_setup
lab_setup(settings_path='path/to/your/project/dir/project_name.json')

In [None]:
from PTLF.utils import *

## Simple

In [None]:
import pandas as pd
class DS01(DataSet):
    def __init__(self):
        self.args = {"data_src":None}

    def _setup(self, args):
        self.df = pd.read_csv(args['data_src'])

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        row = self.df.iloc[idx, :].values
        row = torch.tensor(row, dtype=torch.float32)  # Convert entire row to float32 tensor
        label = row[-1]
        data = row[:-1]
        return [data], [label]


In [37]:
from torch.utils.data import Dataset,DataLoader
import torch

In [38]:
dsargs = {
        'loc':'DS01',
        'args':{
            "data_src": r"D:\stdML\Py310\Adult\Prepared\raw2\test.csv"
        }
}
ds = load_component(**dsargs, setup=True)
Dloader = DataLoader(ds,batch_size=32)
dT = iter(Dloader)



In [39]:
dt = next(dT)

In [40]:
dt[1]

[tensor([0., 0., 0., 1., 1., 1., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0.])]

In [41]:
dt[0][0].shape

torch.Size([32, 14])

In [None]:
import torch.nn as nn
from torch.nn import functional as F
from torchinfo import summary

class SimpleNN(Model):
    def __init__(self):
        super().__init__()
        self.args = {"h1_dim":None, "h2_dim":None,'drop':None}
    def _setup(self, args):
        h1_dim, h2_dim, drop = args['h1_dim'], args['h2_dim'], args['drop']
        self.seq = nn.Sequential(
            nn.Linear(14, h1_dim),
            nn.ReLU(),
            nn.Linear(h1_dim, h2_dim),
            nn.ReLU(),
            nn.Linear(h2_dim, h2_dim*2),
            nn.ReLU(),
            nn.Linear(h2_dim*2, h2_dim*2),
            nn.ReLU()
        )

        self.dropout = nn.Dropout(p=drop)
        self.final = nn.Linear(h2_dim*2, 1)

    def forward(self, x):
        x = self.seq(x)
        # print(x.shape)
        x = self.dropout(x)
        x = self.final(x)
        return x

model = load_component(loc='SimpleNN', args={"h1_dim":120, "h2_dim":100, 'drop':0.3}, setup=True)
# model.to('cuda')
summary(model=model, input_data=dt[0])
# model(*dt[0]).shape



Layer (type:depth-idx)                   Output Shape              Param #
SimpleNN                                 [32, 1]                   --
├─Sequential: 1-1                        [32, 200]                 --
│    └─Linear: 2-1                       [32, 120]                 1,800
│    └─ReLU: 2-2                         [32, 120]                 --
│    └─Linear: 2-3                       [32, 100]                 12,100
│    └─ReLU: 2-4                         [32, 100]                 --
│    └─Linear: 2-5                       [32, 200]                 20,200
│    └─ReLU: 2-6                         [32, 200]                 --
│    └─Linear: 2-7                       [32, 200]                 40,200
│    └─ReLU: 2-8                         [32, 200]                 --
├─Dropout: 1-2                           [32, 200]                 --
├─Linear: 1-3                            [32, 1]                   201
Total params: 74,501
Trainable params: 74,501
Non-trainable params: 0

In [43]:
model(*dt[0])

tensor([[ -220.3309],
        [-3426.8130],
        [-1660.9607],
        [ -948.7296],
        [-1603.6975],
        [ -275.6187],
        [-1397.0565],
        [-1364.8336],
        [-1171.5076],
        [-1769.9922],
        [ -481.8759],
        [-1443.5280],
        [ -222.4868],
        [ -874.3320],
        [ -927.0721],
        [ -677.9394],
        [ -546.9390],
        [ -435.9732],
        [ -694.5280],
        [ -785.2214],
        [-1834.9946],
        [ -287.0668],
        [-2317.0957],
        [-1651.9097],
        [ -170.4194],
        [ -214.1748],
        [-5181.8262],
        [ -711.2175],
        [-1923.5010],
        [ -833.2474],
        [ -293.7393],
        [ -859.3155]], grad_fn=<AddmmBackward0>)

In [None]:
#Optimizer
import torch.optim as optim

class OptAdam(Optimizer):
    def __init__(self):
        super().__init__()

    def _setup(self,args):
        learning_rate = args.get('learning_rate', 0.001)
        self.optimizer = optim.Adam(args['model_parameters'], lr=learning_rate)

    def step(self, **kwargs):
        self.optimizer.step()

    def zero_grad(self):
        self.optimizer.zero_grad()



In [None]:
#Metrics
import torch
from PyTorchLabFlow.utils import Metric
from torchmetrics.classification import BinaryAccuracy

class BinAcc(Metric):
    def __init__(self):
        super().__init__()
        self.accuracy = BinaryAccuracy()

    def setup(self, args):
        if self.check_args(args):

            self.accuracy = BinaryAccuracy()
            return self
    def forward(self,y_pred, y_true):
        y_true = y_true[0]
        y_pred = y_pred.view_as(y_true)
        accuracy = self.accuracy(y_pred, y_true)
        return accuracy.item()


import torch.nn as nn
from sklearn.metrics import roc_auc_score
class AUROC(Metric):
    def __init__(self):
        super().__init__()
    def setup(self, args):
        if self.check_args(args):

            self.accuracy = BinaryAccuracy()
            return self
    def forward(self, outputs, targets):
        # If outputs are raw logits, apply a sigmoid or softmax to get probabilities
        # For binary classification (sigmoid)
        targets = targets[0]
        # outputs = outputs.view_as(targets)
        if outputs.size(1) == 1:
            probabilities = torch.sigmoid(outputs).detach().cpu().numpy()
            targets = targets.detach().cpu().numpy()
            auroc = roc_auc_score(targets, probabilities)
        # For multi-class classification (softmax)
        else:
            probabilities = torch.softmax(outputs, dim=1).detach().cpu().numpy()
            targets = targets.detach().cpu().numpy()
            # One-hot encode targets for multi-class
            auroc = roc_auc_score(targets, probabilities, average='macro', multi_class='ovr')

        return auroc

from sklearn.metrics import average_precision_score

class AUPRC(Metric):
    def __init__(self):
        super().__init__()
    def setup(self, args):
        if self.check_args(args):

            self.accuracy = BinaryAccuracy()
            return self
    def forward(self, outputs, targets):
        # If outputs are raw logits, apply a sigmoid or softmax to get probabilities
        # For binary classification (sigmoid)
        targets = targets[0]
        if outputs.size(1) == 1:
            probabilities = torch.sigmoid(outputs).detach().cpu().numpy()
            targets = targets.detach().cpu().numpy()
            auprc = average_precision_score(targets, probabilities)
        # For multi-class classification (softmax)
        else:
            probabilities = torch.softmax(outputs, dim=1).detach().cpu().numpy()
            targets = targets.detach().cpu().numpy()
            # For multi-class, use average_precision_score for each class separately and average
            auprc = average_precision_score(targets, probabilities, average='macro', multi_class='ovr')

        return auprc

from sklearn.metrics import f1_score
class F1Score(Metric):
    def __init__(self):
        super().__init__()
    def setup(self, args):
        if self.check_args(args):

            self.accuracy = BinaryAccuracy()
            return self
    def forward(self, outputs, targets):
        # If outputs are raw logits, apply a sigmoid or softmax to get probabilities
        # For binary classification (sigmoid)
        targets = targets[0]
        if outputs.size(1) == 1:
            probabilities = torch.sigmoid(outputs).detach().cpu().numpy()
            predictions = (probabilities > 0.5).astype(int)  # Convert to 0 or 1 (binary classification)
            targets = targets.detach().cpu().numpy()
            f1 = f1_score(targets, predictions)
        # For multi-class classification (softmax)
        else:
            probabilities = torch.softmax(outputs, dim=1).detach().cpu().numpy()
            predictions = probabilities.argmax(axis=1)  # Choose the class with the highest probability
            targets = targets.detach().cpu().numpy()
            f1 = f1_score(targets, predictions, average='macro')  # Macro-average for multi-class
        return f1



In [None]:
#Loss
from torch import nn
from PyTorchLabFlow.utils import Loss

class BCElogit(Loss):
    def __init__(self):
        super().__init__()
        self.args ={}
    def _setup(self,args):
        self.criterion = nn.BCEWithLogitsLoss()

    def forward(self, logits, y_true):
        print('inside loss',type(y_true), y_true.shape)
        y_true = y_true[0]
        logits = logits.view_as(y_true)
        # print('sdjkfndjnvjnf',type(logits), type(y_true))
        loss = self.criterion(logits, y_true.float())
        return loss


In [None]:
expargs = {
    'dataset':{
        'loc':'DS01',
        'args':{ }
    },
    'model':{
        'loc': 'SimpleNN',
        'args': {"h1_dim":120, "h2_dim":1000, 'drop':0.3}
    },
    "loss":{
        'loc':"BCElogit",
        'args':{},
    },
    'optimizer':{
        'loc':'OptAdam',
        'args':{}
    },
    "metrics":{
        "accuracy":{
            'loc':"BinAcc",
            'args':{}
        } ,
        "auroc":{
            'loc':"AUROC",
            'args':{}
        } ,
        "f1score":{
            'loc':"AUPRC",
            'args':{}
        } ,
        "auprc":{
            'loc':"F1Score",
            'args':{}
        }
    },
    "train_data_src": r"D:\stdML\Py310\Adult\Prepared\raw2\train.csv",
    "val_data_src": r"D:\stdML\Py310\Adult\Prepared\raw2\valid.csv",
    "train_batch_size":36,
    "val_batch_size":36
}

## Embedding

In [None]:
import pandas as pd
import torch
class DS02(DataSet):
    def __init__(self):
        self.args = {"data_src":None}

    def _setup(self, args):
        self.df = pd.read_csv(args['data_src'])
        self.df.replace('?', pd.NA, inplace=True)
        self.df = self.df.dropna()
        # Define categorical and continuous columns
        self.cat_cols = [
            'workclass', 'education', 'marital_status', 'relationship', 'race',
            'occupation', 'native_country'
        ]
        self.cont_cols = [
            'age', 'fnlwgt', 'education_num', 'capital_gain', 'capital_loss', 'hours_per_week'
        ]
        self.label_col = 'income'

        # Define mappings for categorical columns (ensure this matches your earlier mappings)
        self.label_encoders = {
            'workclass': {
                'Private': 0, 'Local-gov': 1, 'Self-emp-not-inc': 2, 'Federal-gov': 3,
                'State-gov': 4, 'Self-emp-inc': 5, 'Without-pay': 6, 'Never-worked': 7
            },
            'education': {
                '11th': 0, 'HS-grad': 1, 'Assoc-acdm': 2, 'Some-college': 3, '10th': 4,
                'Prof-school': 5, '7th-8th': 6, 'Bachelors': 7, 'Masters': 8, '5th-6th': 9,
                'Assoc-voc': 10, '9th': 11, 'Doctorate': 12, '12th': 13, '1st-4th': 14, 'Preschool': 15
            },
            'marital_status': {
                'Never-married': 0, 'Married-civ-spouse': 1, 'Widowed': 2,
                'Divorced': 3, 'Separated': 4, 'Married-spouse-absent': 5, 'Married-AF-spouse': 6
            },
            'relationship': {
                'Own-child': 0, 'Husband': 1, 'Not-in-family': 2,
                'Unmarried': 3, 'Wife': 4, 'Other-relative': 5
            },
            'race': {
                'Black': 0, 'White': 1, 'Other': 2, 'Amer-Indian-Eskimo': 3, 'Asian-Pac-Islander': 4
            },
            'occupation': {
                'Machine-op-inspct': 0, 'Farming-fishing': 1, 'Protective-serv': 2,
                'Other-service': 3, 'Prof-specialty': 4, 'Craft-repair': 5,
                'Adm-clerical': 6, 'Exec-managerial': 7, 'Tech-support': 8,
                'Sales': 9, 'Priv-house-serv': 10, 'Transport-moving': 11,
                'Handlers-cleaners': 12, 'Armed-Forces': 13
            },
            'native_country': {
                'United-States': 0, 'Peru': 1, 'Guatemala': 2, 'Mexico': 3, 'Dominican-Republic': 4,
                'Ireland': 5, 'Germany': 6, 'Philippines': 7, 'Thailand': 8, 'Haiti': 9, 'El-Salvador': 10,
                'Puerto-Rico': 11, 'Vietnam': 12, 'South': 13, 'Columbia': 14, 'Japan': 15, 'India': 16,
                'Cambodia': 17, 'Poland': 18, 'Laos': 19, 'England': 20, 'Cuba': 21, 'Taiwan': 22,
                'Italy': 23, 'Canada': 24, 'Portugal': 25, 'China': 26, 'Nicaragua': 27, 'Honduras': 28,
                'Iran': 29, 'Scotland': 30, 'Jamaica': 31, 'Ecuador': 32, 'Yugoslavia': 33, 'Hungary': 34,
                'Hong': 35, 'Greece': 36, 'Trinadad&Tobago': 37, 'Outlying-US(Guam-USVI-etc)': 38,
                'France': 39, 'Holand-Netherlands': 40
            }
        }

        # Encode categorical variables
        for col, mapping in self.label_encoders.items():
            self.df[col] = self.df[col].replace(mapping)

        # Encode label column
        self.df[self.label_col] = self.df[self.label_col].replace({'<=50K': 0, '>50K': 1})

        # Convert everything to torch tensors
        self.cat_data = torch.tensor(self.df[self.cat_cols].values, dtype=torch.long)
        self.cont_data = torch.tensor(self.df[self.cont_cols].values, dtype=torch.float32)
        self.labels = torch.tensor(self.df[self.label_col].values, dtype=torch.float32).unsqueeze(1)
    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        label = self.labels[idx]
        return [self.cat_data[idx], self.cont_data[idx]], [label]


In [57]:
from torch.utils.data import Dataset,DataLoader
import torch

In [58]:
dsargs = {
        'loc':'DS02',
        'args':{
            "data_src": "D:/stdML/Py310/Adult/Prepared/raw/test.csv"
        }
}
ds = load_component(**dsargs, setup=True)
Dloader = DataLoader(ds,batch_size=32)
dT = iter(Dloader)

  self.df[col] = self.df[col].replace(mapping)
  self.df[self.label_col] = self.df[self.label_col].replace({'<=50K': 0, '>50K': 1})


In [59]:
dt = next(dT)

In [60]:
dt[0][1]

tensor([[3.6000e+01, 3.2709e+04, 1.0000e+01, 3.3250e+03, 0.0000e+00, 4.5000e+01],
        [1.9000e+01, 4.4381e+05, 9.0000e+00, 0.0000e+00, 0.0000e+00, 3.0000e+01],
        [2.3000e+01, 2.4040e+05, 1.3000e+01, 0.0000e+00, 0.0000e+00, 1.5000e+01],
        [4.9000e+01, 3.3087e+05, 1.0000e+01, 0.0000e+00, 0.0000e+00, 7.0000e+01],
        [3.5000e+01, 3.7646e+05, 1.0000e+01, 1.5024e+04, 0.0000e+00, 6.0000e+01],
        [3.4000e+01, 1.9200e+05, 1.4000e+01, 0.0000e+00, 0.0000e+00, 5.5000e+01],
        [2.2000e+01, 2.9293e+05, 1.3000e+01, 0.0000e+00, 0.0000e+00, 1.0000e+01],
        [4.3000e+01, 1.7023e+05, 1.3000e+01, 0.0000e+00, 0.0000e+00, 6.0000e+01],
        [3.2000e+01, 3.3154e+05, 9.0000e+00, 0.0000e+00, 0.0000e+00, 4.0000e+01],
        [4.6000e+01, 2.7377e+05, 9.0000e+00, 0.0000e+00, 0.0000e+00, 4.0000e+01],
        [3.5000e+01, 1.5231e+05, 7.0000e+00, 0.0000e+00, 0.0000e+00, 4.0000e+01],
        [4.2000e+01, 1.9512e+05, 4.0000e+00, 0.0000e+00, 0.0000e+00, 3.5000e+01],
        [4.0000e

In [None]:
def generate_model_params(dataset):
    embedding_info = []

    for col in dataset.cat_cols:
        max_val = dataset.df[col].max()
        num_categories = int(max_val + 1)  # include -1 if used
        emb_dim = min(50, (num_categories + 1) // 2)
        embedding_info.append((num_categories, emb_dim))

    continuous_dim = len(dataset.cont_cols)
    output_dim = 1  # binary classification

    return {
        "embedding_info": embedding_info,
        "continuous_dim": continuous_dim,
        "hidden_dim": 64,
        "output_dim": output_dim
    }

# Example:
params = generate_model_params(ds)
params


{'embedding_info': [(7, 4),
  (16, 8),
  (7, 4),
  (6, 3),
  (5, 3),
  (14, 7),
  (40, 20)],
 'continuous_dim': 6,
 'hidden_dim': 64,
 'output_dim': 1}

In [None]:
import torch.nn as nn
from torch.nn import functional as F
from torchinfo import summary

class SimpleNNe(Model):
    def __init__(self):
        super().__init__()
        self.args = {"embedding_info":None, "continuous_dim":None,'hidden_dim':None, 'drop':None}

    def _setup(self, args):
        embedding_info, continuous_dim, hidden_dim, drop = args['embedding_info'], args['continuous_dim'], args['hidden_dim'], args['drop']
        self.embeddings = nn.ModuleList([
            nn.Embedding(num_categories, emb_dim)
            for num_categories, emb_dim in embedding_info
        ])

        self.continuous_dim = continuous_dim
        total_emb_dim = sum(emb_dim for _, emb_dim in embedding_info)

        self.fc = nn.Sequential(
            nn.Linear(total_emb_dim + continuous_dim, hidden_dim),
            nn.ReLU(),
            nn.Dropout(drop),
            nn.Linear(hidden_dim, 1)
        )

    def forward(self, x_cat, x_cont):
        x = [emb(x_cat[:, i]) for i, emb in enumerate(self.embeddings)]
        x = torch.cat(x, dim=1)
        x = torch.cat([x, x_cont], dim=1)
        return self.fc(x)


args = {'embedding_info': [(7, 4),
  (16, 84),
  (7, 40),
  (6, 30),
  (5, 300),
  (14, 790),
  (41, 21)],
 'continuous_dim': 6,
 'hidden_dim': 648, 'drop':0.3}

model = load_component(loc='SimpleNNe', args=args, setup=True)
# model.to('cuda')
summary(model=model, input_data=dt[0])
# model(*dt[0])

Layer (type:depth-idx)                   Output Shape              Param #
SimpleNNe                                [32, 1]                   --
├─ModuleList: 1-1                        --                        --
│    └─Embedding: 2-1                    [32, 4]                   28
│    └─Embedding: 2-2                    [32, 84]                  1,344
│    └─Embedding: 2-3                    [32, 40]                  280
│    └─Embedding: 2-4                    [32, 30]                  180
│    └─Embedding: 2-5                    [32, 300]                 1,500
│    └─Embedding: 2-6                    [32, 790]                 11,060
│    └─Embedding: 2-7                    [32, 21]                  861
├─Sequential: 1-2                        [32, 1]                   --
│    └─Linear: 2-8                       [32, 648]                 826,848
│    └─ReLU: 2-9                         [32, 648]                 --
│    └─Dropout: 2-10                     [32, 648]                 

In [None]:
expargs = {
    'dataset':{
        'loc':'DS02',
        'args':{ }
    },
    'model':{
        'loc': 'SimpleNNe',
        'args': {'embedding_info': [(7, 4),
                                  (16, 84),
                                  (7, 40),
                                  (6, 30),
                                  (5, 300),
                                  (14, 790),
                                  (41, 21)],
                 'continuous_dim': 6,
                 'hidden_dim': 648, 'drop':0.3
                }
    },
    "loss":{
        'loc':"BCElogit",
        'args':{},
    },
    'optimizer':{
        'loc':'OptAdam',
        'args':{}
    },
    "metrics":{
        "accuracy":{
            'loc':"BinAcc",
            'args':{}
        } ,
        "auroc":{
            'loc':"AUROC",
            'args':{}
        } ,
        "f1score":{
            'loc':"AUPRC",
            'args':{}
        } ,
        "auprc":{
            'loc':"F1Score",
            'args':{}
        }
    },
    "train_data_src": r"D:\stdML\Py310\Adult\Prepared\raw\train.csv",
    "val_data_src": r"D:\stdML\Py310\Adult\Prepared\raw\valid.csv",
    "train_batch_size":36,
    "val_batch_size":36
}