In [5]:
import os
DIR = "Data"
file_names = os.listdir(DIR)
file_names 

['arcene.mat',
 'BASEHOCK.mat',
 'COIL20.mat',
 'gisette.mat',
 'Isolet.mat',
 'madelon.mat',
 'PCMAC.mat',
 'RELATHE.mat']

In [23]:
from scipy.io import loadmat
from scipy.sparse import issparse
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from Data import VFLDataset
from torch.utils.data import DataLoader
import VFL
import torch
for file_name in file_names:
    if file_name.endswith(".mat"):
        mat = loadmat(os.path.join(DIR, file_name))
        X = mat["X"]
        y = mat["Y"]
        if issparse(X):
                data_X = data_X.todense()
        y = y.flatten()
        print(file_name, X.shape, y.shape)
        if file_name in ['madelon.mat', 'arcene.mat', 'gisette.mat']:
            y[np.where(y == -1)] = 0
        if file_name in ['BASEHOCK.mat', 'RELATHE.mat', 'PCMAC.mat']:
            y[np.where(y == 1)] = 0
            y[np.where(y == 2)] = 1
        scaler = MinMaxScaler()
        X = scaler.fit_transform(X)
        name = file_name.rstrip('.mat')
        print(name)
        RESULT_DIR = "Results"
        result_dir = os.path.join(RESULT_DIR, name)
        if not os.path.exists(result_dir):
            os.mkdir(result_dir)

        EPOCH = 100
        NUM_TRAIL = 5
        NUM_NORMAL = np.arange(0, 100, 5)
        NUM_OVERWHELMED = [0]+[5]*(len(NUM_NORMAL)-1)

        NUM_SHORTCUT = [0, 1, 2, 3]
        NUM_SHORTCUT = np.repeat(np.array(NUM_SHORTCUT), 5)
        assert len(NUM_NORMAL) == len(NUM_OVERWHELMED) == len(NUM_SHORTCUT)

        for trail in range(NUM_TRAIL):
            for i, num_normal in enumerate(NUM_NORMAL):
                num_over = NUM_OVERWHELMED[i]
                num_shortcut = NUM_SHORTCUT[i]

                dataset = VFLDataset(data_source=(X, y), 
                    num_clients=3,
                    gini_portion=None,
                    insert_noise=True, 
                    num_random_samples=num_normal,
                    num_shortcut=num_shortcut,
                    num_overwhelemd=num_over)

                
                train_loader = DataLoader(
                    dataset.train(), batch_size=128, shuffle=True)
                val_loader = DataLoader(
                    dataset.valid(), batch_size=1000, shuffle=True)
                test_loader = DataLoader(dataset.test(), batch_size=1000, shuffle=True)
                input_dim_list = dataset.get_input_dim_list()
                noisy_label = dataset.get_inserted_features_label()

                print(y)

                ###########################
                # FNN Model
                ############################

                saving_name = f'FNN_Name_{name}_Trail_{trail}_Normal_{num_normal}_Overwhelmed_{num_over}_Shortcut_{num_shortcut}'
                if np.unique(y).size == 2:
                    output_dim = 1
                    criterion = torch.nn.BCELoss()
                else:
                    output_dim = np.unique(y).size
                    criterion = torch.nn.CrossEntropyLoss()
                
                type = "FNN" 
                models, top_model = VFL.make_binary_models(
                    input_dim_list=input_dim_list, 
                    type=type, 
                    emb_dim=128, 
                    output_dim=output_dim, hidden_dims=[256, 128],
                    activation='relu')
                VFL.train(
                    models, top_model, train_loader, val_loader, test_loader,
                    epochs=EPOCH, 
                    criterion=criterion,
                    verbose=False,
                    save_dir=os.path.join(result_dir, saving_name)+".pt",
                    log_dir=os.path.join(result_dir, saving_name)+".csv",
                    save_mask_at=100000
                )

                ###########################
                # STG Model
                ############################
                type = 'STG'
                saving_name = f'STG_Name_{name}_Trail_{trail}_Normal_{num_normal}_Overwhelmed_{num_over}_Shortcut_{num_shortcut}'
                models, top_model = VFL.make_binary_models(
                    input_dim_list=input_dim_list,
                    type='STG',
                    emb_dim=128,
                    output_dim=output_dim, hidden_dims=[256, 128],
                    activation='relu')
               
                VFL.train(
                    models, top_model, train_loader, val_loader, test_loader,
                    epochs=EPOCH,
                    criterion=criterion,
                    verbose=False,
                    save_dir=os.path.join(result_dir, saving_name)+".pt",
                    log_dir=os.path.join(result_dir, saving_name)+".csv",
                    save_mask_at=100000
                )


                ###########################
                # Dual-STG Model
                ###########################
                type = 'Dual-STG'


                break 
            break


        
        


        
        


arcene.mat (200, 10000) (200,)
arcene
Inserting : 0 Random Samples, 0 Overwhelmed Samples, 0 Shortcut Samples
Client 0: Feature Index 0-2499
Client 1: Feature Index 2500-4999
Client 2: Feature Index 5000-7499
Server : Feature Index 7500-9999
[1 0 1 1 0 0 1 0 0 0 0 1 0 1 0 1 0 0 0 0 0 0 0 1 0 0 1 0 1 0 1 1 1 0 0 1 0
 0 1 0 1 0 0 1 0 0 0 0 1 1 0 1 0 0 1 0 1 1 1 0 1 1 0 1 0 0 0 0 1 1 0 1 0 0
 1 0 0 1 0 1 1 1 0 1 1 0 1 1 0 0 1 0 1 1 0 0 0 1 0 1 0 0 0 1 1 1 0 1 0 0 1
 0 0 1 0 0 1 1 1 1 1 0 1 0 1 0 0 0 0 0 0 0 1 1 1 0 0 0 1 0 0 1 0 0 1 0 0 1
 1 0 0 1 1 0 0 0 0 1 1 1 0 1 1 0 0 0 0 1 0 0 1 0 1 0 0 1 0 0 1 1 1 0 0 1 1
 0 1 1 0 0 1 0 1 1 0 0 0 1 1 0]
BASEHOCK.mat (1993, 4862) (1993,)
BASEHOCK
Inserting : 0 Random Samples, 0 Overwhelmed Samples, 0 Shortcut Samples
Client 0: Feature Index 0-1215
Client 1: Feature Index 1216-2431
Client 2: Feature Index 2432-3646
Server : Feature Index 3647-4861
[0 0 0 ... 1 1 1]
COIL20.mat (1440, 1024) (1440,)
COIL20
Inserting : 0 Random Samples, 0 Overwhelmed Sam

RuntimeError: 0D or 1D target tensor expected, multi-target not supported

In [27]:
import torch
criterion = torch.nn.CrossEntropyLoss()
x = torch.randn(3, 5)
y = torch.tensor([1, 2, 3]).reshape(-1, 1)
print(X.shape, y.shape)
criterion(x,y)

(1440, 1024) torch.Size([3, 1])


RuntimeError: 0D or 1D target tensor expected, multi-target not supported