In [None]:
import pandas as pd
import numpy as np
from sklearn import preprocessing
from sklearn.preprocessing import StandardScaler, MinMaxScaler
import torch
from torch.nn import MSELoss
from copy import deepcopy
from sklearn import metrics
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score
import copy
from scipy.spatial import distance
from scipy.spatial.distance import euclidean
from torch import tensor
from sklearn.metrics import mean_squared_error
from scipy.spatial.distance import cdist
from sklearn.ensemble import RandomForestRegressor

In [None]:
# Hyper-params:
query_number = 25          # The number of the AL Iterations in each Exp
iteration = 20             # Total number of the Exps
batch_size = 10
total_initail_size = 12
initial_size = 5

seed_rf = np.load(file="..\..\Seeds\seed2.npy")
seed_initial = np.load(file="..\..\Seeds\seed3.npy")
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [None]:
def main_function(query_number, iters):
    # For saving results:
    rf_model_training_r2 = []
    rf_model_training_mse = []
    rf_model_testing_r2 = []
    rf_model_testing_mse = []
    
    # The model for evaluation:
    rf_model = RandomForestRegressor(random_state=seed_rf[iters], n_estimators=100)

    # Load the data:
    name1 = "..\..\Datasets\\NO2\X_train" + str(iters) + ".npy"
    name2 = "..\..\Datasets\\NO2\X_test" + str(iters) + ".npy"
    name3 = "..\..\Datasets\\NO2\y_train" + str(iters) + ".npy"
    name4 = "..\..\Datasets\\NO2\y_test" + str(iters) + ".npy"
    
    X_train = np.load(name1, allow_pickle=True).astype(np.float32)
    X_test = np.load(name2, allow_pickle=True).astype(np.float32)
    y_train = np.load(name3, allow_pickle=True).astype(np.float32).reshape(-1, 1)
    y_test = np.load(name4, allow_pickle=True).astype(np.float32).reshape(-1, 1)
    
    # Feature Dim；
    X = X_train.shape[1]
    # The unlabeled pool
    X_index = np.arange(X_train.shape[0])
    
    # Queried samples and labels
    used_data = np.empty(shape=(0, X))
    used_label = np.empty(shape=(0)).reshape(-1, 1)
    
    # Initial samples and labels
    X_initial = np.empty(shape=(0,X))
    y_initial = np.empty(shape=(0)).reshape(-1, 1)

    # Initial Stage 1:
    np.random.seed(seed_initial[iters])
    idx = np.random.choice(range(len(X_index)), size=initial_size, replace=False)
    train_idx = X_index[idx]

    X_initial = X_train[train_idx]
    y_initial = y_train[train_idx].reshape(-1, 1)

    used_data = np.append(used_data, X_initial, axis=0).astype(np.float32)
    used_label = np.append(used_label, y_initial, axis=0).astype(np.float32).reshape(-1, 1)
    X_index = np.delete(X_index, idx, axis=0)
    
    # Initial Sagte 2:
    rest_initial_X = np.empty(shape=(0,X))
    rest_initial_y = np.empty(shape=(0)).reshape(-1, 1)
    
    np.random.seed(seed_initial[iters])
    idx = np.random.choice(range(len(X_index)), size=total_initail_size-initial_size, replace=False)
    train_idx = X_index[idx]
    
    rest_initial_X = np.append(rest_initial_X, X_train[train_idx], axis=0).astype(np.float32)
    rest_initial_y = np.append(rest_initial_y, y_train[train_idx], axis=0).astype(np.float32).reshape(-1, 1)

    X_index = np.delete(X_index, idx, axis=0)
    
    used_data = np.append(used_data, rest_initial_X, axis=0).astype(np.float32)
    used_label = np.append(used_label, rest_initial_y, axis=0).astype(np.float32).reshape(-1, 1)
    
    # Finished the Initialization Stages:
    # RF Regressor for evaluation:
    rf_model.fit(used_data, used_label.ravel())
    # Training Scores:
    rf_training_r2 = r2_score(used_label, rf_model.predict(used_data))
    rf_training_mse = mean_squared_error(used_label, rf_model.predict(used_data))
    rf_model_training_r2.append(rf_training_r2)
    rf_model_training_mse.append(rf_training_mse)
    
    # Test Scores:
    rf_model_r2 = r2_score(y_test, rf_model.predict(X_test))
    rf_model_mse = mean_squared_error(y_test, rf_model.predict(X_test))
    rf_model_testing_r2.append(rf_model_r2)
    rf_model_testing_mse.append(rf_model_mse)

    print("After Initialization RF R2:", rf_model_r2)

    for idx in range(query_number):
        np.random.seed(None)
        
        print('Query no. %d' % (idx+1))
        idx = np.random.choice(range(X_index.shape[0]), size=batch_size, replace=False)

        # Query the new sample:
        X_train_index = X_index[idx]

        new_X = X_train[X_train_index].reshape(batch_size, -1)
        new_y = y_train[X_train_index].reshape(batch_size, -1)

        # Adding the used data to the used_data pool
        used_data = np.append(used_data, new_X, axis=0).astype(np.float32)
        used_label = np.append(used_label, new_y, axis=0).astype(np.float32).reshape(-1, 1)

        # RF Evaluation
        rf_model.fit(used_data, used_label.ravel())
        
        # Training Evaluation:
        rf_training_r2 = r2_score(used_label, rf_model.predict(used_data))
        rf_training_mse = mean_squared_error(used_label, rf_model.predict(used_data))
        rf_model_training_r2.append(rf_training_r2)
        rf_model_training_mse.append(rf_training_mse)
        
        # Test Evaluation:
        rf_model_r2 = r2_score(y_test, rf_model.predict(X_test))
        rf_model_mse = mean_squared_error(y_test, rf_model.predict(X_test))
        rf_model_testing_r2.append(rf_model_r2)
        rf_model_testing_mse.append(rf_model_mse)

        # remove queried instance from pool
        X_index = np.delete(X_index, idx, axis=0)
        
        print(np.unique(used_data, axis=0).shape)
        print("RF R2:", rf_model_r2)
        print("Remaining:", X_index.shape[0])


    # RF
    rf_model_testing_r2 = np.array(rf_model_testing_r2)
    rf_model_testing_mse = np.array(rf_model_testing_mse)
    rf_model_training_r2 = np.array(rf_model_training_r2)
    rf_model_training_mse = np.array(rf_model_training_mse)
    
    # Used_data and Unsed_label:
    np.save(file="..\..\Results\Res_NO2\Random\Summary\\used_data" + str(iters) + ".npy", arr=used_data)
    np.save(file="..\..\Results\Res_NO2\Random\Summary\\used_labels" + str(iters) + ".npy", arr=used_label)

    np.save(file="..\..\Results\Res_NO2\Random\Summary\\testing_rf_r2_" + str(iters) + ".npy", arr=rf_model_testing_r2)
    np.save(file="..\..\Results\Res_NO2\Random\Summary\\testing_rf_mse_" + str(iters) + ".npy", arr=rf_model_testing_mse)

In [None]:
for i in range(iteration):
    print("The Iteration is ", i)
    main_function(query_number, i)