In [47]:
import numpy as np
import pandas as pd
from sklearn.dummy import DummyRegressor
from sklearn.metrics import mean_squared_error
from sklearn.gaussian_process.kernels import WhiteKernel, RBF, ConstantKernel, Matern
from modAL.models import ActiveLearner
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.gaussian_process.kernels import WhiteKernel, RBF, ConstantKernel, Matern
from modAL.acquisition import max_EI, max_PI, max_UCB
from modAL.models import BayesianOptimizer

np.set_printoptions(linewidth=np.inf)
np.set_printoptions(precision=2, suppress=True)

In [48]:
def GP_regression_std(regressor, X_pool):
    _, std = regressor.predict(X_pool, return_std=True)
    query_idx = np.argmax(std)
#     print(max(std))
#     print(min(std))
#     print(query_idx)
    return query_idx, X_pool[query_idx]

In [49]:
from sklearn import preprocessing

# import dataset
dataset = np.loadtxt(r'airfoil_self_noise.csv', delimiter=',', skiprows=1)
np.random.shuffle(dataset)

X = dataset[:,:-1]
# Create the Scaler object
scaler = preprocessing.StandardScaler()
# Fit your data on the scaler object
X = scaler.fit_transform(X)

y = dataset[:,-1].reshape(-1,1)
# y = scaler.fit_transform(y)

# the dataset is broken as follows: 9% for training, 1% for 10-fold cross validating, 2 person for test set and the
# the rest is treated as the pool for active learning
n_train = round(0.09 * X.shape[0])
n_val = round(0.01 * X.shape[0])
n_test = round(0.02 * X.shape[0])

X_train = X[:n_train,:]
y_train = y[:n_train]

X_test = X[n_train : n_train + n_test, :]
y_test = y[n_train : n_train + n_test]

X_pool = X[n_train + n_test :, :]
y_pool = y[n_train + n_test :]

In [51]:
# print(X_train[:20])

In [52]:
query_strats = [max_EI, max_PI, max_UCB]

for query_strat in query_strats:
    X_pool_temp = X_pool
    y_pool_temp = y_pool
    print('***')
#     print(query_strat)
    
    kernel = RBF(length_scale=1.0, length_scale_bounds=(1e-2, 1e3)) \
         + WhiteKernel(noise_level=1, noise_level_bounds=(1e-10, 1e+1))

    regressor = BayesianOptimizer(
        estimator=GaussianProcessRegressor(kernel=kernel),
        X_training=X_train, y_training=y_train,
        query_strategy=query_strat
    )

    y_pred = regressor.predict(X_test)
    print(mean_squared_error(y_test, y_pred))

    n_queries = 100
    for idx in range(n_queries):
        query_idx, query_instance = regressor.query(X_pool_temp)
#         print(query_idx)
        query_instance = query_instance.reshape(1,-1)
        query_label = y[query_idx].reshape(1,-1)
        regressor.teach(query_instance, query_label)
        X_pool_temp = np.delete(X_pool_temp, query_idx, 0)
        y_pool_temp = np.delete(y_pool_temp, query_idx, 0)

    y_pred_final = regressor.predict(X_test)
    print(mean_squared_error(y_test, y_pred_final))

***
118.62176119597072
73.81135775423988
***
118.62176119597072
73.32063448856658
***
118.62176119597072
70.54602298676623


In [53]:
from modAL.models import CommitteeRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.linear_model import BayesianRidge
from modAL.disagreement import max_std_sampling

  
kernel1 = RBF(length_scale=1.0, length_scale_bounds=(1e-2, 1e3)) \
     + WhiteKernel(noise_level=1, noise_level_bounds=(1e-10, 1e+1))
regressor1 = ActiveLearner(
        estimator=GaussianProcessRegressor(kernel=kernel1),
        X_training=X_train, y_training=y_train)
kernel2 = RBF(length_scale=1.0, length_scale_bounds=(1e-1, 1e2)) \
     + WhiteKernel(noise_level=2, noise_level_bounds=(1e-10, 1e+1))
regressor2 = ActiveLearner(
        estimator=GaussianProcessRegressor(kernel=kernel2),
        X_training=X_train, y_training=y_train)

learner_list = [regressor1, regressor2]

# initializing the Committee
committee = CommitteeRegressor(
    learner_list=learner_list,
    query_strategy=max_std_sampling
)

y_pred = committee.predict(X_test)
print(mean_squared_error(y_test, y_pred))

# active regression
n_queries = 100
for idx in range(n_queries):
    query_idx, query_instance = committee.query(X_pool)
    committee.teach(X_pool[query_idx], y_pool[query_idx].reshape(-1, 1))
    X_pool = np.delete(X_pool, query_idx, 0)
    y_pool = np.delete(y_pool, query_idx, 0)

y_pred_final = committee.predict(X_test)
print(mean_squared_error(y_test, y_pred_final))

118.5902268464111
83.89134548973418
