## import necessary modules

In [2]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
from torch.utils.data import DataLoader, Dataset
import h5py
import os

In [6]:
"""
@brief: This is a dataset class to load data from tensor 
"""

class TensorDataset(Dataset):
    def __init__(self, data:torch.tensor):
        super().__init__()
        self.data = data
    
    def __getitem__(self, idx:int):
        return self.data[idx]

    def __len__(self):
        return self.data.shape[0]

In [3]:
"""
@brief: This is a function to read data from .mat file into tensor
"""
def load_tensor_from_mat(file_location:str):
    save_dir = "dataset_cache"
    cache_name = file_location.split(sep="/")[-1].split(sep=".")[0] + ".pt"
    save_path = os.path.join(save_dir, cache_name)
    if os.path.exists(save_path):
        data = torch.load(save_path)    
    else:
        if os.path.exists(file_location):
            np_data = np.transpose(np.array(h5py.File(name=file_location)["Gwl_ud"])).astype(np.float16)
            data = torch.from_numpy(np_data).type(torch.float) # transform array into tensor
            if not os.path.exists(save_path):
                os.mkdir(save_dir)
            torch.save(data, save_path)
        else:
            raise "ERROR! The input argument file_location does not exist!"

class SignNetData(Dataset):
    def __init__(self, file_location:str, mode:str="train", train_ratio:int=0.7, save_dir:str="dataset_cache/"):
        """
        parameters:
            @save_dir: it indicates the position to load the matrix before  if exists, otherwise save the matrix into the place
        """
        super().__init__()
        self.file_location = file_location
        self.train_ratio = train_ratio
        self.mode = mode
        self.save_dir = save_dir
        cache_name = self.file_location.split(sep="/")[-1].split(sep=".")[0] + ".pt"
        self.save_path = os.path.join(save_dir, cache_name)
        if os.path.exists(self.save_path):
            self.data = torch.load(self.save_path)    
        else:
            if os.path.exists(self.file_location):
                np_data = np.transpose(np.array(h5py.File(name=self.file_location)["Gwl_ud"])).astype(np.float16)
                self.data = torch.from_numpy(np_data).type(torch.float) # transform array into tensor
                if not os.path.exists(self.save_path):
                    os.mkdir(save_dir)
                torch.save(self.data, self.save_path)
            else:
                raise "ERROR! The input argument file_location does not exist!"
        
    def __getitem__(self, idx:int):
        return self.data[idx]
    
    def __len__(self):
        return self.data.shape[0]



## construct autoencoder class

In [4]:
class AutoEncoder(nn.Module):
    def __init__(self, name, in_dim:int, hidden_dim:int, activate_func=F.relu):
        super().__init__()
        self.name = name
        self.encoder = nn.Linear(in_features=in_dim, out_features=hidden_dim, bias=True)
        self.decoder = nn.Linear(in_features=hidden_dim, out_features=in_dim, bias=True)
        self.in_dim = in_dim
        self.hidden_dim = hidden_dim
        self.activate_func = activate_func
        device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
        self.to(device)
    
    def forward(self, x):
        x = self.encoder(x)
        x = self.activate_func(x)
        x = self.decoder(x)
        return x
    
    def encode(self, x):
        x = self.encoder(x)
        x = self.activate_func(x)
        return x
    
    def decode(self, x):
        x = self.decoder(x)
        x = self.activate_func(x)
        return x

    def save_output(self, save_dir:str):
        if not os.path.exists(save_dir):
            os.mkdir(save_dir)
        filename = self.name + self.in_dim + "-" + self.hidden_dim + ".pth"
        file_pos = os.path.join(save_dir, filename)
        torch.save(self.state_dict(), file_pos)

    def save_params(self, save_dir:str):
        pass

    def load_input(self, loading_location:str):
        t = torch.load(loading_location)

    def load_params(self, save_dir:str):
        pass

## construct stacked autoencoder class

In [5]:
class StackedAutoEncoder(nn.Module):
    def __init__(self, *layer_dim, **kwargs):
        super().__init__()
        self.sae = []
        self.activate_func = torch.tanh
        for k,v in kwargs.items():
            if k == "activate_func":
                self.activate_func = v
            elif k == "save_dir":
                self.save_dir = v
        
        # create autoencoders and store them into self.sae
        for idx in range(len(layer_dim)-1):
            ae = AutoEncoder(layer_dim[idx], layer_dim[idx+1], self.activate_func)
            self.sae.append(ae)
        
        self.in_dim = layer_dim[0]
        self.out_dim = layer_dim[-1]

        # initialize layers
        self.layer_init()
        
    def layer_init(self):
        for ae in self.sae:
            for layer in ae.modules():
                if isinstance(layer, nn.Linear):
                    nn.init.xavier_normal_(layer.weight.data)
                elif isinstance(layer, nn.Conv2d):
                    nn.init.kaiming_normal_(layer.weight.data)

    def __len__(self):
        """ return the depth of the stacked auto-encoder """
        return len(self.sae)

    # def layer_init(self):
    #     try:
    #         for ae in self.sae:
    #             torch.load()
    #     except NameError:
    #         pass
    #     else:
    #         for ae in self.sae:
    #             for layer in ae.modules():
    #                 if isinstance(layer, nn.Linear):
    #                     nn.init.xavier_normal_(layer.weight.data)
    #                 elif isinstance(layer, nn.Conv2d):
    #                     nn.init.kaiming_normal_(layer.weight.data)

    def forward(self, x):
        for ae in self.sae:
            x = ae.encode(x)
        return x

    def reconstruct(self, x):
        for ae in self.sae:
            x = ae.encode(x)
        for ae in reversed(self.sae):
            x = ae.decode(x)
        return x

## layer-wise training 

In [None]:
class AETrainer():
    def __init__(self, data_pos:str, ae:AutoEncoder):
        self.model = ae
        self.data_location = data_pos

    def train(self, epochs:int):
        data = self.load_data(self.data_location)
        dataset = TensorDataset(data)
        dataloader = DataLoader(dataset=dataset, batch_size=8, shuffle=True)
        optimizer = torch.optim.Adam(self.model.parameters())
        for epoch in range(epochs):
            for in_rep in dataloader:
                out_rep = self.model(in_rep)
                loss_score = self.model.loss_func(in_rep, out_rep)
                optimizer.zero_grad()
                loss_score.backward()
                optimizer.step()
            if epoch % 5 == 0:
                print("<<<<<<<<<< epochs: {0}/{1} , loss: {2} >>>>>>>>>>".format(epoch, epochs, loss_score))


    def load_params(self, params_pos:str):
        pass

    def load_data(self, data_pos:str) -> torch.tensor:
        return torch.load(data_pos)

    def save(self) -> str:
        pass



In [7]:
class SAETrainer():
    def __init__(self, 
                 model:StackedAutoEncoder, 
                 dataset:Dataset,
                 loss_func:F=F.tanh):
        
        self.models = model
        self.dataset = dataset
        self.loss_func = loss_func

    def train(self, epochs:int):
        node_rep = self.dataset.data.cuda()
        print("begin to train stacked autoencoder layer-wisely")
        rep_data = SignNetData("dataset\\epinions_UD.mat").data
        rep_dataset = TensorDataset(rep_data)
        for idx in len(self.models):
            print("<<<<<<<<<< start to train {0}/{1} auto-encoder >>>>>>>>>>".format(idx+1, len(self.models)))
            ae = self.models.sae[idx]
            ae_trainer = AETrainer(ae, rep_dataset)
            ae_trainer.train(10)
            rep_data = ae(rep_data)
            rep_dataset = TensorDataset(rep_data)
        torch.save(rep_data, "final_rep.pt")

        # for ae in self.sae.sae:
        #     print("the {}/{} autoencoder".format(ae_cnt,len(self.sae.sae)))
        #     optimizer = torch.optim.Adam(ae.parameters(),lr=5e-3)
        #     rep_dataset = TensorDataset(node_rep)
        #     for epoch in range(epochs):
        #         node_loader = DataLoader(rep_dataset, batch_size=8, shuffle=True)
        #         print("run epoch {0}/{1}".format(epoch+1, epochs))
        #         node_cnt = 0
        #         for node in node_loader:
        #             reconstruct_node = ae(node)
        #             optimizer.zero_grad()
        #             loss_score = self.loss_func(reconstruct_node, node)
        #             if node_cnt % 800 == 0:
        #                 print("calculate {0}/{1} node".format(node_cnt, len(rep_dataset)))
        #                 print("loss={0}".format(loss_score))
        #             loss_score.backward(retain_graph=True)
        #             optimizer.step()
        #             node_cnt = node_cnt + 8
                
        #     node_rep = ae.encode(node_rep)
        #     ae_cnt = ae_cnt + 1
        #     ae.save()
            
    def eval(self):
        dataloader = TensorDataset(self.dataset.data.cuda())
        rep_tensor = torch.zeros(self.sae.in_dim, self.sae.out_dim)
        for idx in range(len(dataloader)):
            rep_tensor[idx] = self.sae(dataloader[idx])
        return rep_tensor

    def compute_reconstruct_error(self):
        dataloader = TensorDataset(self.dataset.data.cuda())
        rep_tensor = torch.zeros(self.sae.in_dim, self.sae.in_dim)
        for idx in range(len(dataloader)):
            rep_tensor[idx] = self.sae.reconstruct(dataloader[idx])
        err = torch.mean((torch.sum(rep_tensor - self.dataset.data)))
        return err
    
    def save(self):
        for i in range(len(self.sae.sae)):
            filename = "checkpoint" + str(i) + ".pth"
            saving_location = os.path.join(self.dataset.save_dir, filename)
            torch.save(self.sae.sae.state_dict(), saving_location)

    def load(self, load_dir:str):
        pass
    


## test self-defined signed network models

In [8]:
if __name__ == "__main__":
    epinions = SignNetData(file_location="dataset/epinions_UD.mat")
    Encoder2Vec = StackedAutoEncoder(7000, 1024, 32, activate_func = torch.tanh)
    loss = F.mse_loss
    sae_trainer = SAETrainer(Encoder2Vec, epinions, loss)
    err = sae_trainer.compute_reconstruct_error()
    print("reconstructing error = {0}".format(err))
    sae_trainer.train(1)
    err = sae_trainer.compute_reconstruct_error()
    print("reconstructing error = {0}".format(err))

reconstructing error = -602628.375
<__main__.SAETrainer object at 0x000001E04CC21520>
begin to train stacked autoencoder layer-wise
the 1/2 autoencoder
run epoch 1/1
calculate 0/7000 node
loss=0.017768435180187225
calculate 800/7000 node
loss=0.16204749047756195
calculate 1600/7000 node
loss=0.1533586084842682


KeyboardInterrupt: 