In [5]:
import os

directory = '../processed_data/no_miss'
datasets = os.listdir(directory)
datasets_MCAR = [dataset for dataset in datasets if dataset[-7] == 'C']
datasets_MNAR = [dataset for dataset in datasets if dataset[-7] == 'N']

directory = '../processed_data/yes_miss'
datasets = os.listdir(directory)

print('MCAR datasets:', datasets_MCAR)
print('MNAR datasets:', datasets_MNAR)
print('Datasets with missing values:', datasets)

MCAR datasets: ['airfoil_MCAR.csv', 'christine_MCAR.csv', 'philippine_MCAR.csv', 'phoneme_MCAR.csv', 'wine_quality_MCAR.csv']
MNAR datasets: ['airfoil_MNAR.csv', 'christine_MNAR.csv', 'philippine_MNAR.csv', 'phoneme_MNAR.csv', 'wine_quality_MNAR.csv']
Datasets with missing values: ['cirrhosis.csv', 'equity.csv', 'fico.csv', 'support.csv', 'wiki.csv']


In [2]:
import pandas as pd
import numpy as np
from sklearn.impute import SimpleImputer
from sklearn.model_selection import train_test_split

def prepareData(dataset_name, SEED):
    data = pd.read_csv(f'../processed_data/no_miss/{dataset_name}')
    y = data.y.values
    X = data.drop('y', axis=1).values
    n, dim = X.shape

    if len(data.y.unique()) == 2:
        regression = False
    else:
        regression = True

    # Split the data into training, validation, and test sets
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, 
                                                        random_state=SEED, stratify=y if not regression else None)
    X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.2, 
                                                      random_state=SEED, stratify=y_train if not regression else None)
    
    # Impute missing values
    imputer = SimpleImputer(strategy='mean')
    X_train = imputer.fit_transform(X_train)
    X_val = imputer.transform(X_val)
    X_test = imputer.transform(X_test)
    
    # Normalize and scale according to the training set
    eps = 1e-6
    mean = X_train.mean(axis=0)
    std = X_train.std(axis=0) + eps
    X_train = (X_train - mean) / std
    X_val = (X_val - mean) / std
    X_test = (X_test - mean) / std
    
    return X_train, X_val, X_test, y_train, y_val, y_test, dim, regression

import torch
from torch.utils.data import DataLoader, TensorDataset

def data2Tensors(X_train, X_val, X_test, y_train, y_val, y_test):
    train_batch = int(2**np.ceil(np.log2(X_train.shape[0]//10)))
    X_train, X_val, X_test = [torch.tensor(x, dtype=torch.float32) for x in [X_train, X_val, X_test]]
    y_train, y_val, y_test = [torch.tensor(y, dtype=torch.float32) for y in [y_train, y_val, y_test]]
    
    train_loader = DataLoader(TensorDataset(X_train, y_train), batch_size=train_batch, shuffle=True)
    val_loader, test_loader = [DataLoader(TensorDataset(X, y), batch_size=X.shape[0], shuffle=False) for X, y in [(X_val, y_val), (X_test, y_test)]]
    return train_loader, val_loader, test_loader

# MCAR

In [3]:
# Time
import time

# Random Forest
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
from sklearn.metrics import f1_score, accuracy_score, precision_score, recall_score, roc_auc_score
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

# MLP
from models import BasicMLP
from train_utils import train, getPredictions

# Evaluation
metrics = ['time', 'mse', 'mae', 'r2', 'acc', 'prec', 'rec', 'f1', 'roc_auc']

# Experiment
seeds = [int(bin(i)[2:]) for i in list(range(5))]
results_forest = np.zeros((len(datasets_MCAR), len(seeds), len(metrics)))
results_mlp = np.zeros((len(datasets_MCAR), len(seeds), len(metrics)))

for did, mcar_dataset in enumerate(datasets_MCAR):
    for sid, seed in enumerate(seeds):
        # Verbose
        print(f'Dataset: {did+1}, Seed: {sid+1} STARTING TRAINING\n')

        # Prepare data
        X_train, X_val, X_test, y_train, y_val, y_test, dim, regression = prepareData(mcar_dataset, seed)
        train_loader, val_loader, test_loader = data2Tensors(X_train, X_val, X_test, y_train, y_val, y_test)
        
        if regression:
            # Fit Random Forest
            start_time = time.time()
            model = RandomForestRegressor(n_estimators=1000,
                                          max_depth=4,
                                          min_samples_split=2,
                                          min_samples_leaf=5,
                                          max_features='sqrt',
                                          bootstrap=True,
                                          random_state=seed)
            model.fit(X_train, y_train)
            end_time = time.time()
            
            y_pred = model.predict(X_test)
            mse = mean_squared_error(y_test, y_pred)
            mae = mean_absolute_error(y_test, y_pred)
            r2 = r2_score(y_test, y_pred)
            training_time = end_time - start_time
            results_forest[did, sid] = [training_time, mse, mae, r2, np.nan, np.nan, np.nan, np.nan, np.nan]

            # Fit MLP
            start_time = time.time()
            model = BasicMLP(input_dim=dim, 
                             model_layers=[2**int(np.log2(dim)+1), 2**int(np.log2(dim))],
                             dropout_rate=0)
            epochs = 5000
            patience = 100
            device = 'cuda' if torch.cuda.is_available() else 'cpu'
            
            train(model=model, 
                  train_loader=train_loader, 
                  val_loader=val_loader, 
                  epochs=epochs, 
                  patience=patience, 
                  regression_flag=regression, 
                  device=device, 
                  seed=seed,
                  verbose = True)
            end_time = time.time()

            y_pred, y_test = getPredictions(model, test_loader, device)
            mse = mean_squared_error(y_test, y_pred)
            mae = mean_absolute_error(y_test, y_pred)
            r2 = r2_score(y_test, y_pred)
            training_time = end_time - start_time
            results_mlp[did, sid] = [training_time, mse, mae, r2, np.nan, np.nan, np.nan, np.nan, np.nan]
            
        else:
            # Fit Random Forest
            start_time = time.time()
            model = RandomForestClassifier(n_estimators=500,
                                           max_depth=4,
                                           min_samples_split=2,
                                           min_samples_leaf=5,
                                           max_features='sqrt',
                                           bootstrap=True,
                                           random_state=seed)
            model.fit(X_train, y_train)
            end_time = time.time()
            
            y_pred = model.predict(X_test)
            acc = accuracy_score(y_test, y_pred)
            f1 = f1_score(y_test, y_pred)
            prec = precision_score(y_test, y_pred)
            rec = recall_score(y_test, y_pred)
            roc_auc = roc_auc_score(y_test, y_pred)
            training_time = end_time - start_time
            results_forest[did, sid] = [training_time, np.nan, np.nan, np.nan, acc, prec, rec, f1, roc_auc]

            # Fit MLP
            start_time = time.time()
            model = BasicMLP(input_dim=dim, 
                             model_layers=[2**int(np.log2(dim)+1), 2**int(np.log2(dim))],
                             dropout_rate=0)
            epochs = 5000
            patience = 100
            device = 'cuda' if torch.cuda.is_available() else 'cpu'
            
            train(model=model, 
                  train_loader=train_loader, 
                  val_loader=val_loader, 
                  epochs=epochs, 
                  patience=patience, 
                  regression_flag=regression, 
                  device=device, 
                  seed=seed,
                  verbose=True)
            end_time = time.time()

            y_pred, y_test = getPredictions(model, test_loader, device)
            y_pred = ((torch.sigmoid(torch.tensor(y_pred)) >= 0.5) * 1.).cpu().numpy()
            acc = accuracy_score(y_test, y_pred)
            f1 = f1_score(y_test, y_pred)
            prec = precision_score(y_test, y_pred)
            rec = recall_score(y_test, y_pred)
            roc_auc = roc_auc_score(y_test, y_pred)
            training_time = end_time - start_time
            results_mlp[did, sid] = [training_time, np.nan, np.nan, np.nan, acc, prec, rec, f1, roc_auc]

        print(f'\nTraining results: {results_forest[did, sid][3] if regression else results_forest[did, sid][-1]:.4f} (RF) | {results_mlp[did, sid][3] if regression else results_mlp[did, sid][-1]:.4f} (MLP)\n')
        np.save('../results/raw/results_forest_MCAR.npy', results_forest)
        np.save('../results/raw/results_mlp_MCAR.npy', results_mlp)

Dataset: 1, Seed: 1 STARTING TRAINING

Epoch 500, val loss: 10909.5
Epoch 1000, val loss: 2887.342529296875
Epoch 1500, val loss: 835.9053955078125
Epoch 2000, val loss: 305.31939697265625
Epoch 2500, val loss: 149.91946411132812
Epoch 3000, val loss: 72.09498596191406
Epoch 3500, val loss: 39.17677307128906
Epoch 4000, val loss: 30.48405647277832
Epoch 4500, val loss: 27.764240264892578
Epoch 5000, val loss: 26.832918167114258

Training results: 0.5310 (RF) | 0.5460 (MLP)

Dataset: 1, Seed: 2 STARTING TRAINING

Epoch 500, val loss: 11124.78125
Epoch 1000, val loss: 3865.17138671875
Epoch 1500, val loss: 921.7392578125
Epoch 2000, val loss: 432.8335876464844
Epoch 2500, val loss: 222.0435333251953
Epoch 3000, val loss: 103.85845947265625
Epoch 3500, val loss: 52.07768249511719
Epoch 4000, val loss: 31.33675765991211
Epoch 4500, val loss: 24.93090057373047
Epoch 5000, val loss: 22.956268310546875

Training results: 0.4973 (RF) | 0.3843 (MLP)

Dataset: 1, Seed: 3 STARTING TRAINING

Epoch

# MNAR

In [4]:
# Time
import time

# Random Forest
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
from sklearn.metrics import f1_score, accuracy_score, precision_score, recall_score, roc_auc_score
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

# MLP
from models import BasicMLP
from train_utils import train, getPredictions

# Evaluation
metrics = ['time', 'mse', 'mae', 'r2', 'acc', 'prec', 'rec', 'f1', 'roc_auc']

# Experiment
seeds = [int(bin(i)[2:]) for i in list(range(5))]
results_forest = np.zeros((len(datasets_MNAR), len(seeds), len(metrics)))
results_mlp = np.zeros((len(datasets_MNAR), len(seeds), len(metrics)))

for did, mnar_dataset in enumerate(datasets_MNAR):
    for sid, seed in enumerate(seeds):
        # Verbose
        print(f'Dataset: {did+1}, Seed: {sid+1} STARTING TRAINING\n')

        # Prepare data
        X_train, X_val, X_test, y_train, y_val, y_test, dim, regression = prepareData(mnar_dataset, seed)
        train_loader, val_loader, test_loader = data2Tensors(X_train, X_val, X_test, y_train, y_val, y_test)
        
        if regression:
            # Fit Random Forest
            start_time = time.time()
            model = RandomForestRegressor(n_estimators=1000,
                                          max_depth=4,
                                          min_samples_split=2,
                                          min_samples_leaf=5,
                                          max_features='sqrt',
                                          bootstrap=True,
                                          random_state=seed)
            model.fit(X_train, y_train)
            end_time = time.time()
            
            y_pred = model.predict(X_test)
            mse = mean_squared_error(y_test, y_pred)
            mae = mean_absolute_error(y_test, y_pred)
            r2 = r2_score(y_test, y_pred)
            training_time = end_time - start_time
            results_forest[did, sid] = [training_time, mse, mae, r2, np.nan, np.nan, np.nan, np.nan, np.nan]

            # Fit MLP
            start_time = time.time()
            model = BasicMLP(input_dim=dim, 
                             model_layers=[2**int(np.log2(dim)+1), 2**int(np.log2(dim))],
                             dropout_rate=0)
            epochs = 5000
            patience = 100
            device = 'cuda' if torch.cuda.is_available() else 'cpu'
            
            train(model=model, 
                  train_loader=train_loader, 
                  val_loader=val_loader, 
                  epochs=epochs, 
                  patience=patience, 
                  regression_flag=regression, 
                  device=device, 
                  seed=seed,
                  verbose = True)
            end_time = time.time()

            y_pred, y_test = getPredictions(model, test_loader, device)
            mse = mean_squared_error(y_test, y_pred)
            mae = mean_absolute_error(y_test, y_pred)
            r2 = r2_score(y_test, y_pred)
            training_time = end_time - start_time
            results_mlp[did, sid] = [training_time, mse, mae, r2, np.nan, np.nan, np.nan, np.nan, np.nan]
            
        else:
            # Fit Random Forest
            start_time = time.time()
            model = RandomForestClassifier(n_estimators=500,
                                           max_depth=4,
                                           min_samples_split=2,
                                           min_samples_leaf=5,
                                           max_features='sqrt',
                                           bootstrap=True,
                                           random_state=seed)
            model.fit(X_train, y_train)
            end_time = time.time()
            
            y_pred = model.predict(X_test)
            acc = accuracy_score(y_test, y_pred)
            f1 = f1_score(y_test, y_pred)
            prec = precision_score(y_test, y_pred)
            rec = recall_score(y_test, y_pred)
            roc_auc = roc_auc_score(y_test, y_pred)
            training_time = end_time - start_time
            results_forest[did, sid] = [training_time, np.nan, np.nan, np.nan, acc, prec, rec, f1, roc_auc]

            # Fit MLP
            start_time = time.time()
            model = BasicMLP(input_dim=dim, 
                             model_layers=[2**int(np.log2(dim)+1), 2**int(np.log2(dim))],
                             dropout_rate=0)
            epochs = 5000
            patience = 100
            device = 'cuda' if torch.cuda.is_available() else 'cpu'
            
            train(model=model, 
                  train_loader=train_loader, 
                  val_loader=val_loader, 
                  epochs=epochs, 
                  patience=patience, 
                  regression_flag=regression, 
                  device=device, 
                  seed=seed,
                  verbose=True)
            end_time = time.time()

            y_pred, y_test = getPredictions(model, test_loader, device)
            y_pred = ((torch.sigmoid(torch.tensor(y_pred)) >= 0.5) * 1.).cpu().numpy()
            acc = accuracy_score(y_test, y_pred)
            f1 = f1_score(y_test, y_pred)
            prec = precision_score(y_test, y_pred)
            rec = recall_score(y_test, y_pred)
            roc_auc = roc_auc_score(y_test, y_pred)
            training_time = end_time - start_time
            results_mlp[did, sid] = [training_time, np.nan, np.nan, np.nan, acc, prec, rec, f1, roc_auc]

        print(f'\nTraining results: {results_forest[did, sid][3] if regression else results_forest[did, sid][-1]:.4f} (RF) | {results_mlp[did, sid][3] if regression else results_mlp[did, sid][-1]:.4f} (MLP)\n')
        np.save('../results/raw/results_forest_MNAR.npy', results_forest)
        np.save('../results/raw/results_mlp_MNAR.npy', results_mlp)

Dataset: 1, Seed: 1 STARTING TRAINING

Epoch 500, val loss: 10620.736328125
Epoch 1000, val loss: 2837.836669921875
Epoch 1500, val loss: 574.8975830078125
Epoch 2000, val loss: 252.41244506835938
Epoch 2500, val loss: 114.41757202148438
Epoch 3000, val loss: 60.76890563964844
Epoch 3500, val loss: 40.854766845703125
Epoch 4000, val loss: 33.99954605102539
Epoch 4500, val loss: 30.806556701660156
Epoch 5000, val loss: 29.316131591796875

Training results: 0.5638 (RF) | 0.5086 (MLP)

Dataset: 1, Seed: 2 STARTING TRAINING

Epoch 500, val loss: 11234.2138671875
Epoch 1000, val loss: 4195.4697265625
Epoch 1500, val loss: 1195.6588134765625
Epoch 2000, val loss: 547.4733276367188
Epoch 2500, val loss: 231.5416717529297
Epoch 3000, val loss: 96.56597137451172
Epoch 3500, val loss: 43.486045837402344
Epoch 4000, val loss: 26.11496925354004
Epoch 4500, val loss: 21.594314575195312
Epoch 5000, val loss: 20.494321823120117

Training results: 0.5629 (RF) | 0.4517 (MLP)

Dataset: 1, Seed: 3 STARTI

# REAL MISSINGNESS

In [6]:
import pandas as pd
import numpy as np
from sklearn.impute import SimpleImputer
from sklearn.model_selection import train_test_split

def prepareData(dataset_name, SEED):
    data = pd.read_csv(f'../processed_data/yes_miss/{dataset_name}')
    y = data.y.values
    X = data.drop('y', axis=1).values
    n, dim = X.shape

    if len(data.y.unique()) == 2:
        regression = False
    else:
        regression = True

    # Split the data into training, validation, and test sets
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, 
                                                        random_state=SEED, stratify=y if not regression else None)
    X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.2, 
                                                      random_state=SEED, stratify=y_train if not regression else None)
    
    # Impute missing values
    imputer = SimpleImputer(strategy='mean')
    X_train = imputer.fit_transform(X_train)
    X_val = imputer.transform(X_val)
    X_test = imputer.transform(X_test)
    
    # Normalize and scale according to the training set
    eps = 1e-6
    mean = X_train.mean(axis=0)
    std = X_train.std(axis=0) + eps
    X_train = (X_train - mean) / std
    X_val = (X_val - mean) / std
    X_test = (X_test - mean) / std
    
    return X_train, X_val, X_test, y_train, y_val, y_test, dim, regression

import torch
from torch.utils.data import DataLoader, TensorDataset

def data2Tensors(X_train, X_val, X_test, y_train, y_val, y_test):
    train_batch = int(2**np.ceil(np.log2(X_train.shape[0]//10)))
    X_train, X_val, X_test = [torch.tensor(x, dtype=torch.float32) for x in [X_train, X_val, X_test]]
    y_train, y_val, y_test = [torch.tensor(y, dtype=torch.float32) for y in [y_train, y_val, y_test]]
    
    train_loader = DataLoader(TensorDataset(X_train, y_train), batch_size=train_batch, shuffle=True)
    val_loader, test_loader = [DataLoader(TensorDataset(X, y), batch_size=X.shape[0], shuffle=False) for X, y in [(X_val, y_val), (X_test, y_test)]]
    return train_loader, val_loader, test_loader

In [8]:
# Time
import time

# Random Forest
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
from sklearn.metrics import f1_score, accuracy_score, precision_score, recall_score, roc_auc_score
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

# MLP
from models import BasicMLP
from train_utils import train, getPredictions

# Evaluation
metrics = ['time', 'mse', 'mae', 'r2', 'acc', 'prec', 'rec', 'f1', 'roc_auc']

# Experiment
seeds = [int(bin(i)[2:]) for i in list(range(5))]
results_forest = np.zeros((len(datasets), len(seeds), len(metrics)))
results_mlp = np.zeros((len(datasets), len(seeds), len(metrics)))

for did, dataset in enumerate(datasets):
    for sid, seed in enumerate(seeds):
        # Verbose
        print(f'Dataset: {did+1}, Seed: {sid+1} STARTING TRAINING\n')

        # Prepare data
        X_train, X_val, X_test, y_train, y_val, y_test, dim, regression = prepareData(dataset, seed)
        train_loader, val_loader, test_loader = data2Tensors(X_train, X_val, X_test, y_train, y_val, y_test)
        
        if regression:
            # Fit Random Forest
            start_time = time.time()
            model = RandomForestRegressor(n_estimators=1000,
                                          max_depth=4,
                                          min_samples_split=2,
                                          min_samples_leaf=5,
                                          max_features='sqrt',
                                          bootstrap=True,
                                          random_state=seed)
            model.fit(X_train, y_train)
            end_time = time.time()
            
            y_pred = model.predict(X_test)
            mse = mean_squared_error(y_test, y_pred)
            mae = mean_absolute_error(y_test, y_pred)
            r2 = r2_score(y_test, y_pred)
            training_time = end_time - start_time
            results_forest[did, sid] = [training_time, mse, mae, r2, np.nan, np.nan, np.nan, np.nan, np.nan]

            # Fit MLP
            start_time = time.time()
            model = BasicMLP(input_dim=dim, 
                             model_layers=[2**int(np.log2(dim)+1), 2**int(np.log2(dim))],
                             dropout_rate=0)
            epochs = 5000
            patience = 100
            device = 'cuda' if torch.cuda.is_available() else 'cpu'
            
            train(model=model, 
                  train_loader=train_loader, 
                  val_loader=val_loader, 
                  epochs=epochs, 
                  patience=patience, 
                  regression_flag=regression, 
                  device=device, 
                  seed=seed,
                  verbose = True)
            end_time = time.time()

            y_pred, y_test = getPredictions(model, test_loader, device)
            mse = mean_squared_error(y_test, y_pred)
            mae = mean_absolute_error(y_test, y_pred)
            r2 = r2_score(y_test, y_pred)
            training_time = end_time - start_time
            results_mlp[did, sid] = [training_time, mse, mae, r2, np.nan, np.nan, np.nan, np.nan, np.nan]
            
        else:
            # Fit Random Forest
            start_time = time.time()
            model = RandomForestClassifier(n_estimators=500,
                                           max_depth=4,
                                           min_samples_split=2,
                                           min_samples_leaf=5,
                                           max_features='sqrt',
                                           bootstrap=True,
                                           random_state=seed)
            model.fit(X_train, y_train)
            end_time = time.time()
            
            y_pred = model.predict(X_test)
            acc = accuracy_score(y_test, y_pred)
            f1 = f1_score(y_test, y_pred)
            prec = precision_score(y_test, y_pred)
            rec = recall_score(y_test, y_pred)
            roc_auc = roc_auc_score(y_test, y_pred)
            training_time = end_time - start_time
            results_forest[did, sid] = [training_time, np.nan, np.nan, np.nan, acc, prec, rec, f1, roc_auc]

            # Fit MLP
            start_time = time.time()
            model = BasicMLP(input_dim=dim, 
                             model_layers=[2**int(np.log2(dim)+1), 2**int(np.log2(dim))],
                             dropout_rate=0)
            epochs = 5000
            patience = 100
            device = 'cuda' if torch.cuda.is_available() else 'cpu'
            
            train(model=model, 
                  train_loader=train_loader, 
                  val_loader=val_loader, 
                  epochs=epochs, 
                  patience=patience, 
                  regression_flag=regression, 
                  device=device, 
                  seed=seed,
                  verbose=True)
            end_time = time.time()

            y_pred, y_test = getPredictions(model, test_loader, device)
            y_pred = ((torch.sigmoid(torch.tensor(y_pred)) >= 0.5) * 1.).cpu().numpy()
            acc = accuracy_score(y_test, y_pred)
            f1 = f1_score(y_test, y_pred)
            prec = precision_score(y_test, y_pred)
            rec = recall_score(y_test, y_pred)
            roc_auc = roc_auc_score(y_test, y_pred)
            training_time = end_time - start_time
            results_mlp[did, sid] = [training_time, np.nan, np.nan, np.nan, acc, prec, rec, f1, roc_auc]

        print(f'\nTraining results: {results_forest[did, sid][3] if regression else results_forest[did, sid][-1]:.4f} (RF) | {results_mlp[did, sid][3] if regression else results_mlp[did, sid][-1]:.4f} (MLP)\n')
        np.save('../results/raw/results_forest_real.npy', results_forest)
        np.save('../results/raw/results_mlp_real.npy', results_mlp)

Dataset: 1, Seed: 1 STARTING TRAINING

Epoch 500, val loss: 0.5520844459533691

Training results: 0.7377 (RF) | 0.7171 (MLP)

Dataset: 1, Seed: 2 STARTING TRAINING

Epoch 500, val loss: 0.5043120980262756

Training results: 0.6734 (RF) | 0.6991 (MLP)

Dataset: 1, Seed: 3 STARTING TRAINING


Training results: 0.7146 (RF) | 0.7379 (MLP)

Dataset: 1, Seed: 4 STARTING TRAINING

Epoch 500, val loss: 0.5612610578536987

Training results: 0.7300 (RF) | 0.7429 (MLP)

Dataset: 1, Seed: 5 STARTING TRAINING

Epoch 500, val loss: 0.5596283078193665

Training results: 0.7532 (RF) | 0.7378 (MLP)

Dataset: 2, Seed: 1 STARTING TRAINING

Epoch 500, val loss: 0.5801419615745544
Epoch 1000, val loss: 0.5317674875259399
Epoch 1500, val loss: 0.513891339302063
Epoch 2000, val loss: 0.5071946382522583

Training results: 0.6003 (RF) | 0.8047 (MLP)

Dataset: 2, Seed: 2 STARTING TRAINING

Epoch 500, val loss: 0.6401071548461914
Epoch 1000, val loss: 0.6046442985534668
Epoch 1500, val loss: 0.5822026133537292
E