In [None]:
import pandas as pd
import numpy as np
from sklearn import preprocessing
from sklearn.preprocessing import StandardScaler, MinMaxScaler
import torch
from torch.nn import MSELoss
from skorch.regressor import NeuralNetRegressor
from skorch.dataset import ValidSplit
from skorch.callbacks import EarlyStopping
from sklearn import metrics
from sklearn.model_selection import train_test_split
from torch.nn import Linear
from torch.nn import Sigmoid, ReLU
from torch.nn import Module
from sklearn.metrics import r2_score
from scipy.spatial import distance
from scipy.spatial.distance import euclidean
from torch import tensor
from sklearn.metrics import mean_squared_error
from scipy.spatial.distance import cdist
from sklearn.ensemble import RandomForestRegressor
from torch.nn.init import xavier_uniform_
from model import MLP
import random
from skorch.callbacks import EarlyStopping, LRScheduler, Freezer, Unfreezer
from torch.optim.lr_scheduler import CyclicLR, ReduceLROnPlateau
from skorch.regressor import NeuralNetRegressor, NeuralNet

In [None]:
n_members = 3   
query_number = 25          # The number of the AL Iterations in each Exp
iteration = 20             # Total number of the Exps
batch_size = 10
total_initail_size = 9
initial_size = 5

seed_rf = np.load(file="..\..\Seeds\seed2.npy")
seed_initial = np.load(file="..\..\Seeds\seed3.npy")

seed_nn1 = np.load(file="..\..\Seeds\seed4.npy")
seed_nn2 = np.load(file="..\..\Seeds\seed5.npy")
seed_nn3 = np.load(file="..\..\Seeds\seed6.npy")

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [None]:
def Committee_Prediction(learner_list, X_test, y_test):
    initial_pred=[]
    for i in learner_list:
        initial_pred.append(r2_score(y_test, i.predict(X_test)))
    initial_pred=np.array(initial_pred)
    
    return initial_pred.mean()

In [None]:
def retrain_committee(learner_list, used_data, used_label):
    new_list = []
    for l in learner_list:
        jdx = np.random.choice(np.arange(used_data.shape[0]), used_data.shape[0], replace=True)
        l = l.fit(used_data[jdx], used_label[jdx])
        new_list.append(l)
    return new_list

In [None]:
def multi_argmax(values, n_instances):

    assert n_instances <= values.shape[0], 'n_instances must be less or equal than the size of utility.'

    max_idx = np.argpartition(-values, n_instances-1, axis=0)[:n_instances]
    return max_idx

In [None]:
def query_QBC(unlebeled_samples, learner_list, n_instances=batch_size):
    initial_pred=np.empty(shape=(n_members, unlebeled_samples.shape[0]))
    for i in range(n_members):
        initial_pred[i,:] = learner_list[i].predict(unlebeled_samples).reshape(1, -1)
    std = np.var(initial_pred, axis=0)
    return multi_argmax(std, n_instances=n_instances)

In [None]:
def main_function(query_number, iters):
# For saving results:
    rf_model_training_r2 = []
    rf_model_training_mse = []
    rf_model_testing_r2 = []
    rf_model_testing_mse = []
    
    # Number of member in committee:
    learner_list = []
    
    # The model for evaluation:
    rf_model = RandomForestRegressor(random_state=seed_rf[iters], n_estimators=100)
    
    # Load the data:
    name1 = "..\..\Datasets\AutoMPG\X_train" + str(iters) + ".npy"
    name2 = "..\..\Datasets\AutoMPG\X_test" + str(iters) + ".npy"
    name3 = "..\..\Datasets\AutoMPG\y_train" + str(iters) + ".npy"
    name4 = "..\..\Datasets\AutoMPG\y_test" + str(iters) + ".npy"
    
    X_train = np.load(name1, allow_pickle=True).astype(np.float32)
    X_test = np.load(name2, allow_pickle=True).astype(np.float32)
    y_train = np.load(name3, allow_pickle=True).astype(np.float32).reshape(-1, 1)
    y_test = np.load(name4, allow_pickle=True).astype(np.float32).reshape(-1, 1)
    
    # Feature Dim
    X = X_train.shape[1]
    # The unlabeled pool
    X_index = np.arange(X_train.shape[0])
    
    # Queried samples and labels
    used_data = np.empty(shape=(0, X))
    used_label = np.empty(shape=(0)).reshape(-1, 1)
    
    # Initial samples and labels
    X_initial = np.empty(shape=(0,X))
    y_initial = np.empty(shape=(0)).reshape(-1, 1)

    # Initial Stage 1:
    np.random.seed(seed_initial[iters])
    idx = np.random.choice(range(len(X_index)), size=initial_size, replace=False)
    train_idx = X_index[idx]

    X_initial = X_train[train_idx]
    y_initial = y_train[train_idx].reshape(-1, 1)

    used_data = np.append(used_data, X_initial, axis=0).astype(np.float32)
    used_label = np.append(used_label, y_initial, axis=0).astype(np.float32).reshape(-1, 1)
    X_index = np.delete(X_index, idx, axis=0)
    
    # Initial Sagte 2:
    rest_initial_X = np.empty(shape=(0,X))
    rest_initial_y = np.empty(shape=(0)).reshape(-1, 1)
    
    np.random.seed(seed_initial[iters])
    idx = np.random.choice(range(len(X_index)), size=total_initail_size-initial_size, replace=False)
    train_idx = X_index[idx]
    
    rest_initial_X = np.append(rest_initial_X, X_train[train_idx], axis=0).astype(np.float32)
    rest_initial_y = np.append(rest_initial_y, y_train[train_idx], axis=0).astype(np.float32).reshape(-1, 1)

    X_index = np.delete(X_index, idx, axis=0)
    
    used_data = np.append(used_data, rest_initial_X, axis=0).astype(np.float32)
    used_label = np.append(used_label, rest_initial_y, axis=0).astype(np.float32).reshape(-1, 1)
    
    # Finished the Initialization Stages:
    # RF Regressor for evaluation:
    rf_model.fit(used_data, used_label.ravel())
    # Training Scores:
    rf_training_r2 = r2_score(used_label, rf_model.predict(used_data))
    rf_training_mse = mean_squared_error(used_label, rf_model.predict(used_data))
    rf_model_training_r2.append(rf_training_r2)
    rf_model_training_mse.append(rf_training_mse)
    
    # Test Scores:
    rf_model_r2 = r2_score(y_test, rf_model.predict(X_test))
    rf_model_mse = mean_squared_error(y_test, rf_model.predict(X_test))
    rf_model_testing_r2.append(rf_model_r2)
    rf_model_testing_mse.append(rf_model_mse)
    
    print("After Initialization RF R2:", rf_model_r2)

    used_data = torch.from_numpy(used_data).to(device)
    used_label = torch.from_numpy(used_label).to(device)
    for member_idx in range(n_members):
        
        if member_idx == 0:
            np.random.seed(seed_nn1[iters])
            torch.manual_seed(seed_nn1[iters])
        if member_idx == 1:
            np.random.seed(seed_nn2[iters])
            torch.manual_seed(seed_nn2[iters])
        if member_idx == 2:
            np.random.seed(seed_nn3[iters])
            torch.manual_seed(seed_nn3[iters])
            
        regressor = NeuralNetRegressor(
                                   MLP(X),
                                   criterion=MSELoss(),
                                   optimizer=torch.optim.Adam,
                                   verbose=0,
                                   max_epochs=100,
                                   lr=0.001,
                                   # Used for the batch AL
                                   callbacks=[EarlyStopping(patience=20), ('lr_scheduler', LRScheduler(policy=ReduceLROnPlateau))],
                                   train_split=ValidSplit(cv=5),
                                   warm_start=False,
                                   device='cuda',
                                   batch_size = 200
                                   )
        jdx = np.random.choice(np.arange(used_data.shape[0]), used_data.shape[0], replace=True)
        regressor.fit(used_data[jdx], used_label[jdx])
        learner_list.append(regressor)

    print("NN Test R2 after Initialization", Committee_Prediction(learner_list, X_test, y_test))
    print('NN R2', Committee_Prediction(learner_list, X_test, y_test))

    for idx in range(query_number):
        print('Query no. %d' % (idx+1))

        idx = query_QBC(X_train[X_index], learner_list, n_instances=batch_size)

        # Query the new sample:
        X_train_index = X_index[idx]

        new_X = X_train[X_train_index].reshape(batch_size, -1)
        new_y = y_train[X_train_index].reshape(batch_size, -1)

        # Adding the used data to the used_data pool
        used_data = used_data.cpu().numpy()
        used_label = used_label.cpu().numpy()
        used_data = np.append(used_data, new_X, axis=0).astype(np.float32)
        used_label = np.append(used_label, new_y, axis=0).astype(np.float32).reshape(-1, 1)
        
        # RF Evaluation
        rf_model.fit(used_data, used_label.ravel())
        
        # Training Evaluation:
        rf_training_r2 = r2_score(used_label, rf_model.predict(used_data))
        rf_training_mse = mean_squared_error(used_label, rf_model.predict(used_data))
        rf_model_training_r2.append(rf_training_r2)
        rf_model_training_mse.append(rf_training_mse)
        
        # Test Evaluation:
        rf_model_r2 = r2_score(y_test, rf_model.predict(X_test))
        rf_model_mse = mean_squared_error(y_test, rf_model.predict(X_test))
        rf_model_testing_r2.append(rf_model_r2)
        rf_model_testing_mse.append(rf_model_mse)
        
        print(np.unique(used_data, axis=0).shape)
        print("RF R2:", rf_model_r2)
        print("Remaining:", X_index.shape[0])

        # remove queried instance from pool
        X_index = np.delete(X_index, idx, axis=0)
        
        used_data = torch.from_numpy(used_data).to(device)
        used_label = torch.from_numpy(used_label).to(device)
        
        # Retrain the committee:
        # for l in learner_list:
        #     jdx = np.random.choice(np.arange(used_data.shape[0]), used_data.shape[0], replace=True)
        #     l.fit(used_data[jdx], used_label[jdx])  
        learner_list = retrain_committee(learner_list, used_data, used_label)
            
        # print('NN R2', Committee_Prediction(learner_list, X_test, y_test))
    
    rf_model_testing_r2 = np.array(rf_model_testing_r2)
    rf_model_testing_mse = np.array(rf_model_testing_mse)
    rf_model_training_r2 = np.array(rf_model_training_r2)
    rf_model_training_mse = np.array(rf_model_training_mse)

    # Used_data and Unsed_label:
    used_data = used_data.cpu().numpy()
    used_label = used_label.cpu().numpy()
    np.save(file="..\..\Results\Res_AutoMPG\QBC\Summary\\used_data" + str(iters) + ".npy", arr=used_data)
    np.save(file="..\..\Results\Res_AutoMPG\QBC\Summary\\used_labels" + str(iters) + ".npy", arr=used_label)

    np.save(file="..\..\Results\Res_AutoMPG\QBC\Summary\\testing_rf_r2_" + str(iters) + ".npy", arr=rf_model_testing_r2)
    np.save(file="..\..\Results\Res_AutoMPG\QBC\Summary\\testing_rf_mse_" + str(iters) + ".npy", arr=rf_model_testing_mse)

In [None]:
for i in range(iteration):
    print("The Iteration is ", i)
    main_function(query_number, i)