In [1]:
import pandas as pd
from utils import dataframe_to_tensor, tensor_to_dataframe, clean_dataframe, plot_losses, combine_dataframes
import torch
import numpy as np
from IPython.display import display
import torch.nn as nn
from observer_gan import random_normal, train, train_wgan, random_model, save_models, load_models
import os

In [2]:
def train_and_save_model(N, num_positive_samples, num_unlabeled_samples, percent_unlabeled_positive):
    start = 0
    while os.path.exists(f'models/{num_positive_samples}_{num_unlabeled_samples}_{percent_unlabeled_positive}_{start}_discriminator.pth'):
        start += 1
    for ii in range(start, start + N):
        positive_dataframe_train = pd.read_csv('poc_dataset/WalletSmartContract_poc_data_positive_train.csv')
        positive_dataframe_train = positive_dataframe_train.sample(n=num_positive_samples)
        clean_positive_dataframe_train = clean_dataframe(positive_dataframe_train)
        unlabeled_dataframe_train_positive = pd.read_csv('poc_dataset/WalletSmartContract_poc_data_unlabeled_positive_train.csv')
        unlabeled_dataframe_train_negative = pd.read_csv('poc_dataset/WalletSmartContract_poc_data_unlabeled_negative_train.csv')
        unlabeled_dataframe_train = combine_dataframes(unlabeled_dataframe_train_positive, unlabeled_dataframe_train_negative, num_unlabeled_samples, percent_unlabeled_positive)
        clean_unlabeled_dataframe_train = clean_dataframe(unlabeled_dataframe_train)
        positive_tensor_train = dataframe_to_tensor(clean_positive_dataframe_train)
        unlabeled_tensor_train = dataframe_to_tensor(clean_unlabeled_dataframe_train)

        n_epochs = 10000
        n_samples_per_epoch = 5000
        hidden_size = 25
        hidden_layers = 4
        discriminator, generator, observer, _, _, _ = train(1e-3, n_epochs, unlabeled_tensor_train, positive_tensor_train, n_samples_per_epoch, hidden_size, hidden_size, hidden_size, hidden_layers, hidden_layers, hidden_layers)
        save_models(discriminator, generator, observer, f'{num_positive_samples}_{num_unlabeled_samples}_{percent_unlabeled_positive}_{ii}')

In [3]:
N = 5
positive_samples_params = [10 ** 2, 10 ** 3, 10 ** 4, 10 ** 5, 10 ** 6]
unlabeled_samples_params = [10 ** 2, 10 ** 3, 10 ** 4, 10 ** 5, 10 ** 6]
percent_unlabeled_positive_params = [0.99, 0.9, 0.75]
for num_positive_samples in positive_samples_params:
    for num_unlabeled_samples in unlabeled_samples_params:
        for percent_unlabeled_positive in percent_unlabeled_positive_params:
            train_and_save_model(N, num_positive_samples, num_unlabeled_samples, percent_unlabeled_positive)

Epoch: 100%|██████████| 10000/10000 [01:27<00:00, 114.87it/s]
Epoch: 100%|██████████| 10000/10000 [01:43<00:00, 96.26it/s]
Epoch: 100%|██████████| 10000/10000 [01:41<00:00, 98.37it/s]
Epoch: 100%|██████████| 10000/10000 [01:29<00:00, 111.16it/s]
Epoch: 100%|██████████| 10000/10000 [01:27<00:00, 114.56it/s]
Epoch: 100%|██████████| 10000/10000 [01:28<00:00, 112.66it/s]
Epoch: 100%|██████████| 10000/10000 [01:29<00:00, 111.83it/s]
Epoch: 100%|██████████| 10000/10000 [01:27<00:00, 114.42it/s]
Epoch: 100%|██████████| 10000/10000 [01:30<00:00, 110.61it/s]
Epoch: 100%|██████████| 10000/10000 [01:30<00:00, 110.62it/s]
Epoch: 100%|██████████| 10000/10000 [01:38<00:00, 101.33it/s]
Epoch: 100%|██████████| 10000/10000 [01:32<00:00, 108.53it/s]
Epoch: 100%|██████████| 10000/10000 [01:36<00:00, 103.53it/s]
Epoch: 100%|██████████| 10000/10000 [01:40<00:00, 99.72it/s]
Epoch: 100%|██████████| 10000/10000 [01:36<00:00, 103.13it/s]
Epoch: 100%|██████████| 10000/10000 [01:39<00:00, 100.63it/s]
Epoch: 100%