In [None]:
# uq360 version 0.2 needs to be installed
#!pip install uq360

# results are same if we select 60K or 10K points the graphs look same

In [1]:
try:
    from uq360.metrics import picp, mpiw, compute_regression_metrics
    from uq360.metrics import UncertaintyCharacteristicsCurve as ucc

    from uq360.algorithms import * 
    from uq360.algorithms.actively_learned_model import ActivelyLearnedModel
    from uq360.algorithms.ensemble_heteroscedastic_regression import EnsembleHeteroscedasticRegression

    import pandas as pd
    import numpy as np
    import torch

    from sklearn.preprocessing import StandardScaler
    from sklearn.model_selection import train_test_split
    from sklearn.metrics import mean_squared_error
    import matplotlib.pyplot as plt
    %matplotlib inline
    
    print('All the libraries are found')
    
except:
    print("One or more libraries need to be installed")

All the libraries are found


In [2]:
file_name = '/data/MGP/TestPointsN2_CH4_H2O_000.xlsx'
df=pd.read_excel(file_name,header=1).dropna(how='all', axis=1)
df.drop('#',axis=1,inplace=True)

In [3]:
def read_data(file_name):
    
    #xls_new = pd.ExcelFile(file_name)
    df=pd.read_excel(file_name,header=1).dropna(how='all', axis=1)
    df.drop('#',axis=1,inplace=True)
    
    return df

data = read_data(r'/data/MGP/TestPointsN2_CH4_H2O_000.xlsx')

In [4]:
data.shape

(63116, 36)

In [5]:
def create_new_col(df,col1,col2):
    phi_surge = 0.076
    df[col2] = 100*(df[col1]-phi_surge)/phi_surge
    
    return df

col1 = 'phi'
col2 = 'surge_distance_from_eq'
data = create_new_col(data,col1,col2)

In [6]:
def select_columns(df,columns,input_columns,output_columns):
    
    df = df[columns]
    
    df = pd.concat([df[input_columns],df[output_columns]],axis=1)
    
    return df

In [7]:
columns=['Pin [kPa]','Tin [K]','N [rpm]','Differential Pressure [kPa]','Total Consumed power','phi',
         'Surge Distance','surge_distance_from_eq','GVFin','Qin [m3/s]','GVFout','Qv_out [m3/s]']
INPUT_C = ['Pin [kPa]','Tin [K]','N [rpm]','Differential Pressure [kPa]','Total Consumed power']
OUTPUT_C = ['surge_distance_from_eq']
data_1 = select_columns(data,columns,INPUT_C,OUTPUT_C)

In [8]:
data_1.shape

(63116, 6)

In [9]:
# scale the data and select the number of samples to be considered
def scale_data(df,samples):
    df_1 = df[0:samples]
    df_x = df_1.iloc[:, :-1].values
    y_labels = np.squeeze(df_1.iloc[:, -1:].values, axis=1)
    y_labels = y_labels.reshape((-1,1))

    # scale the values
    scaler = StandardScaler()
    scaling = scaler.fit(df_x)
    x_data = scaling.transform(df_x)
    
    
    return df, y_labels, x_data

data,y_labels,x_data = scale_data(data_1,10000)

In [10]:
''' 
    Offline sample and query, two mandatory arguments (and the data):
    - Position where to start sampling
    - Number of points to sample
'''
def sample_(start_index, n_points, X_data=x_data):
    return x_data[start_index:start_index+n_points,:]

def querry_(start_index, n_points, y_labels=y_labels):
    return y_labels[start_index:start_index+n_points]


In [11]:
# define configuration for both models, regression baseline and regression with Active Learning
def config_():
    device = torch.device("cpu")

    # define config for Heteroscedastic regression
    config_HR = {"num_features": 5, "num_hidden": 32, "num_outputs": 1, "batch_size": 16, "num_epochs": 10,
                      "lr": 0.001}
    HR_kwargs = {"model_type":'mlp',
                   "config": config_HR,
                   "device": device}
    # define config for ensemble
    config_ensemble = {"num_models": 1, 
              "batch_size": 16,
              "model_kwargs":HR_kwargs, }

    ninit = 128 
    T = 2 #4 # do not change this,
    # define config for active learning object
    # T = # no of iterations
    # K = # no of uncertain points
    #K=64
    config_AL = {"num_init": 512 , 
     "T": 2, 
     "K": 16, 
     "M": 4, 
     "sampling_function": sample_, 
     "querry_function" : querry_,
     "model_function": EnsembleHeteroscedasticRegression,
     "model_kwargs": {"model_type":'ensembleheteroscedasticregression', 
                                                 "config":config_ensemble, 
                                                 "device":device}, }
    
    return config_HR,HR_kwargs,config_AL
config_HR,HR_kwargs,config_AL = config_()

In [12]:
# Verify that the data set has the good dimension
def verify_dimension(data_x,config_AL,config_HR):
    
    assert(data_x.shape[0] >= config_AL["num_init"] + config_AL["T"]*config_AL["M"]*config_AL["K"])
    assert(data_x.shape[1] == config_HR["num_features"])
    
    return True

verify_dimension(x_data,config_AL,config_HR)

True

In [13]:
import al_model
from al_model import ActivelyLearnedModel

def baseline(config_AL):
    # Baseline without AL
    
    K_train_list = [8,16, 32, 64, 128, 256,512,1024]  # T=2 better graph
    #K_train_list = [10,20,40,80,160,320,640,1280]   # T=2 better graph
    
    frac_err_baseline = []
    ninit=128
    N_test = 512
    device = torch.device("cpu")
    T=2
    for i in range(len(K_train_list)):

        # Update dictiorary to have no active learning and the correct amount of points
        config_AL["model_kwargs"]["config"]["num_models"] = 5
        config_AL["num_init"] = ninit + K_train_list[i] * T
        print(config_AL["num_init"])
        config_AL["T"] = 0  # no AL here

        # Instantiate the class object and train the model
        uq_model = ActivelyLearnedModel(config=config_AL, device=device, online=False)
        uq_model = uq_model.fit() 

        # Create a test dataset
        X_test = sample_(int(config_AL["num_init"] + config_AL["T"]*config_AL["M"]*config_AL["K"]), int(N_test))
        y_test = querry_(int(config_AL["num_init"] + config_AL["T"]*config_AL["M"]*config_AL["K"]), int(N_test))
        y_test = np.reshape(y_test, (-1,))
        print(X_test.shape,y_test.shape)

        res = uq_model.predict(X_test) 
        
        y_test_pred = np.squeeze(res.y_mean, axis=1)

        frac_err_baseline.append(np.sqrt(np.sum(np.square(y_test - y_test_pred)))/np.sqrt(np.sum(np.square(y_test))))
        print('iteration---------',i)
        
    return  frac_err_baseline

In [14]:
errors_baseline=baseline(config_AL)

144
(144, 5) (144, 1)

Training model 0

Epoch: 0, loss = 1339.5959472656252
Epoch: 1, loss = 1230.609219021267
Epoch: 2, loss = 1138.1463216145833
Epoch: 3, loss = 1059.0402018229167
Epoch: 4, loss = 990.7792426215276
Epoch: 5, loss = 931.0671488444011
Epoch: 6, loss = 877.9655015733506
Epoch: 7, loss = 830.0348239474826
Epoch: 8, loss = 786.2442660861547
Epoch: 9, loss = 745.7560356987847

Training model 1

Epoch: 0, loss = 1093.1488037109375
Epoch: 1, loss = 1012.4629380967881
Epoch: 2, loss = 941.2650553385419
Epoch: 3, loss = 877.8430955674913
Epoch: 4, loss = 821.1180623372397
Epoch: 5, loss = 770.0280253092448
Epoch: 6, loss = 723.4728190104166
Epoch: 7, loss = 680.8946228027344
Epoch: 8, loss = 641.7275288899739
Epoch: 9, loss = 605.594723171658

Training model 2

Epoch: 0, loss = 1159.1131117078994
Epoch: 1, loss = 1068.4368218315974
Epoch: 2, loss = 989.362772623698
Epoch: 3, loss = 919.8985358344185
Epoch: 4, loss = 858.6429578993055
Epoch: 5, loss = 804.3587578667534
Epoch:

Epoch: 5, loss = 381.9777914683024
Epoch: 6, loss = 338.32680066426593
Epoch: 7, loss = 302.54032834370935
Epoch: 8, loss = 272.85818862915033
Epoch: 9, loss = 247.94705645243323

Training model 2

Epoch: 0, loss = 759.3615201314291
Epoch: 1, loss = 651.3670476277667
Epoch: 2, loss = 564.3905232747395
Epoch: 3, loss = 492.19024785359704
Epoch: 4, loss = 431.4542719523112
Epoch: 5, loss = 380.10530853271484
Epoch: 6, loss = 336.8119335174561
Epoch: 7, loss = 300.3490091959635
Epoch: 8, loss = 269.5759569803874
Epoch: 9, loss = 243.56820551554358

Training model 3

Epoch: 0, loss = 924.4806709289552
Epoch: 1, loss = 768.8586273193358
Epoch: 2, loss = 655.981669108073
Epoch: 3, loss = 569.4523035685221
Epoch: 4, loss = 499.9615828196208
Epoch: 5, loss = 442.40200297037774
Epoch: 6, loss = 393.8150018056234
Epoch: 7, loss = 352.5420627593994
Epoch: 8, loss = 317.4135106404622
Epoch: 9, loss = 287.45487213134766

Training model 4

Epoch: 0, loss = 938.7757466634114
Epoch: 1, loss = 795.5588

In [15]:
errors_baseline

[0.9857035541465546,
 0.9812534058777639,
 0.9772264974048933,
 0.968115522238074,
 0.9476199500144877,
 0.9030822492576017,
 0.8047329166560927,
 0.6545328163490248]

In [16]:
import al_model
from al_model import ActivelyLearnedModel


def with_al(config_AL):
    # AL, ensemble of 5 NNs
    frac_err_AL_ens = []
    device = torch.device("cpu")
    
    #checkpoint=keras.callbacks.ModelCheckpoint("mcp_AL.h5", save_best_only=True)
    K_train_list = [8,16, 32, 64, 128, 256,512,1024] # make it as a global variable
    #K_train_list = [10,20,40,80,160,320,640,1280]
    N_test = 512
    ninit=128
    T=2
    for i in range(len(K_train_list)):

        # Update dictiorary for the correct amount of points
        config_AL["model_kwargs"]["config"]["num_models"] = 5
        config_AL["num_init"] = ninit
        config_AL["K"] = K_train_list[i]
        config_AL["M"] = 4
        config_AL["T"] = T

        # Instantiate the class object and train the model
        uq_model = ActivelyLearnedModel(config=config_AL, device=device, online=False)
        uq_model = uq_model.fit()

        # Create a test dataset
        X_test = sample_(int(config_AL["num_init"] + config_AL["T"]*config_AL["M"]*config_AL["K"]), int(N_test))
        y_test = querry_(int(config_AL["num_init"] + config_AL["T"]*config_AL["M"]*config_AL["K"]), int(N_test))
        y_test = np.reshape(y_test, (-1,))
        
        print('test set size is', X_test.shape)

        res = uq_model.predict(X_test)
        y_test_pred = np.squeeze(res.y_mean, axis=1)

        frac_err_AL_ens.append(np.sqrt(np.sum(np.square(y_test - y_test_pred)))/np.sqrt(np.sum(np.square(y_test))))
        
    return frac_err_AL_ens


In [17]:
errors_al = with_al(config_AL)

(128, 5) (128, 1)

Training model 0

Epoch: 0, loss = 897.7526092529297
Epoch: 1, loss = 848.5229949951172
Epoch: 2, loss = 804.1202239990234
Epoch: 3, loss = 763.4480133056641
Epoch: 4, loss = 725.9991760253906
Epoch: 5, loss = 691.2446517944336
Epoch: 6, loss = 658.7970314025879
Epoch: 7, loss = 628.2924003601074
Epoch: 8, loss = 599.5081977844238
Epoch: 9, loss = 572.2824592590332

Training model 1

Epoch: 0, loss = 972.528881072998
Epoch: 1, loss = 899.6927032470703
Epoch: 2, loss = 835.6984024047852
Epoch: 3, loss = 779.0978622436523
Epoch: 4, loss = 728.9292640686035
Epoch: 5, loss = 684.1504936218262
Epoch: 6, loss = 643.9094848632812
Epoch: 7, loss = 607.5042190551758
Epoch: 8, loss = 574.3970108032227
Epoch: 9, loss = 544.1415100097656

Training model 2

Epoch: 0, loss = 837.0923919677734
Epoch: 1, loss = 792.8727951049805
Epoch: 2, loss = 752.4189338684082
Epoch: 3, loss = 714.944034576416
Epoch: 4, loss = 679.9913101196289
Epoch: 5, loss = 647.3750648498535
Epoch: 6, loss = 

Epoch: 7, loss = 313.84133232964405
Epoch: 8, loss = 296.57304043240015
Epoch: 9, loss = 280.91895039876306

Training model 2

Epoch: 0, loss = 496.0865851508247
Epoch: 1, loss = 465.86676025390625
Epoch: 2, loss = 437.9652540418837
Epoch: 3, loss = 412.0892605251736
Epoch: 4, loss = 388.117436726888
Epoch: 5, loss = 365.9635128445096
Epoch: 6, loss = 345.4506310356988
Epoch: 7, loss = 326.4936625162761
Epoch: 8, loss = 309.00893147786456
Epoch: 9, loss = 292.87135314941406

Training model 3

Epoch: 0, loss = 571.5410902235244
Epoch: 1, loss = 534.1484205457899
Epoch: 2, loss = 500.50803290473084
Epoch: 3, loss = 469.9461144341363
Epoch: 4, loss = 442.18251715766064
Epoch: 5, loss = 416.87645128038196
Epoch: 6, loss = 393.73914082845056
Epoch: 7, loss = 372.5487891303169
Epoch: 8, loss = 353.07392205132373
Epoch: 9, loss = 335.1445617675781

Training model 4

Epoch: 0, loss = 586.887196858724
Epoch: 1, loss = 549.0422939724392
Epoch: 2, loss = 514.7956475151909
Epoch: 3, loss = 483.610

Epoch: 2, loss = 251.59333038330075
Epoch: 3, loss = 232.90170923868817
Epoch: 4, loss = 216.43096160888672
Epoch: 5, loss = 201.87908871968588
Epoch: 6, loss = 188.93476994832358
Epoch: 7, loss = 177.32235908508298
Epoch: 8, loss = 166.86341094970703
Epoch: 9, loss = 157.3921750386556

Training model 4

Epoch: 0, loss = 296.88513946533203
Epoch: 1, loss = 272.9752324422201
Epoch: 2, loss = 251.81521097819012
Epoch: 3, loss = 233.0875091552734
Epoch: 4, loss = 216.51594543457028
Epoch: 5, loss = 201.8022263844808
Epoch: 6, loss = 188.66959381103516
Epoch: 7, loss = 176.90728378295898
Epoch: 8, loss = 166.34072240193686
Epoch: 9, loss = 156.81050109863284
(192, 5) (192, 1)
test set size is (512, 5)
(128, 5) (128, 1)

Training model 0

Epoch: 0, loss = 897.7526092529297
Epoch: 1, loss = 848.5229949951172
Epoch: 2, loss = 804.1202239990234
Epoch: 3, loss = 763.4480133056641
Epoch: 4, loss = 725.9991760253906
Epoch: 5, loss = 691.2446517944336
Epoch: 6, loss = 658.7970314025879
Epoch: 7, l

(128, 5) (128, 1)

Training model 0

Epoch: 0, loss = 530.8883991241455
Epoch: 1, loss = 470.4007978439331
Epoch: 2, loss = 420.3075866699219
Epoch: 3, loss = 378.23023414611816
Epoch: 4, loss = 342.45516204833984
Epoch: 5, loss = 311.7868938446045
Epoch: 6, loss = 285.3089599609375
Epoch: 7, loss = 262.30015563964844
Epoch: 8, loss = 242.19718265533447
Epoch: 9, loss = 224.55310344696045

Training model 1

Epoch: 0, loss = 484.8396415710449
Epoch: 1, loss = 421.4496383666992
Epoch: 2, loss = 371.05803203582764
Epoch: 3, loss = 330.5278911590576
Epoch: 4, loss = 297.3754873275757
Epoch: 5, loss = 269.833945274353
Epoch: 6, loss = 246.6099157333374
Epoch: 7, loss = 226.7713770866394
Epoch: 8, loss = 209.6322422027588
Epoch: 9, loss = 194.67218017578125

Training model 2

Epoch: 0, loss = 467.53407096862793
Epoch: 1, loss = 415.7057685852051
Epoch: 2, loss = 371.9167137145996
Epoch: 3, loss = 334.5950222015381
Epoch: 4, loss = 302.6195068359375
Epoch: 5, loss = 275.1696376800537
Epoch: 6

Epoch: 6, loss = 39.62528133392333
Epoch: 7, loss = 35.11491639614105
Epoch: 8, loss = 31.421671891212455
Epoch: 9, loss = 28.364833593368523

Training model 2

Epoch: 0, loss = 112.95289134979248
Epoch: 1, loss = 89.010072517395
Epoch: 2, loss = 72.71795010566711
Epoch: 3, loss = 60.97576723098756
Epoch: 4, loss = 52.07804622650147
Epoch: 5, loss = 45.07319936752318
Epoch: 6, loss = 39.45432169437408
Epoch: 7, loss = 34.900456476211545
Epoch: 8, loss = 31.17993927001953
Epoch: 9, loss = 28.116901159286503

Training model 3

Epoch: 0, loss = 132.90819368362423
Epoch: 1, loss = 104.30334339141848
Epoch: 2, loss = 84.86042757034305
Epoch: 3, loss = 70.82139596939088
Epoch: 4, loss = 60.19446153640748
Epoch: 5, loss = 51.89540438652038
Epoch: 6, loss = 45.28352317810059
Epoch: 7, loss = 39.93671293258666
Epoch: 8, loss = 35.55444283485413
Epoch: 9, loss = 31.92900936603546

Training model 4

Epoch: 0, loss = 135.65000419616703
Epoch: 1, loss = 106.39943714141849
Epoch: 2, loss = 86.677296

Epoch: 8, loss = 646.1874465942383
Epoch: 9, loss = 615.9990577697754

Training model 4

Epoch: 0, loss = 1040.334945678711
Epoch: 1, loss = 977.7631530761719
Epoch: 2, loss = 921.0476760864258
Epoch: 3, loss = 869.0232009887695
Epoch: 4, loss = 821.2963371276855
Epoch: 5, loss = 777.464771270752
Epoch: 6, loss = 737.0840835571289
Epoch: 7, loss = 699.7607879638672
Epoch: 8, loss = 665.0955924987793
Epoch: 9, loss = 632.8350448608398

T = 0

(1024, 5) (1024, 1)

Training model 0

Epoch: 0, loss = 393.0636806488036
Epoch: 1, loss = 244.1439666748047
Epoch: 2, loss = 171.70213180118142
Epoch: 3, loss = 129.48386679755316
Epoch: 4, loss = 102.04987955093384
Epoch: 5, loss = 82.65873283810083
Epoch: 6, loss = 68.30274656083847
Epoch: 7, loss = 57.4078000386556
Epoch: 8, loss = 48.99597777260673
Epoch: 9, loss = 42.414575709237

Training model 1

Epoch: 0, loss = 348.93428315056684
Epoch: 1, loss = 212.60245503319638
Epoch: 2, loss = 150.33471319410538
Epoch: 3, loss = 114.33254861831665
Ep

In [18]:
errors_al

[0.9293802521732432,
 0.9258422335970136,
 0.9140231568534581,
 0.8880773403590337,
 0.8294082156666809,
 0.7317469707959956,
 0.6010039033083436,
 0.557883292852054]

In [19]:
def compare_err_plot(frac_err_baseline,frac_err_AL_ens):
    
    K_train_list = [8,16, 32, 64, 128, 256,512,1024]  # make it global
    #K_train_list = [10,20,40,80,160,320,640,1280]
    ninit = 128   #128
    T=2     
    N_train_list = ninit + T * np.array(K_train_list)
    plt.style.use("classic")
    fig = plt.figure()
    plt.plot(N_train_list,frac_err_baseline, ".-", label="Baseline")
    plt.plot(N_train_list,frac_err_AL_ens, ".-", label="Active learning, ensemble")
    plt.ylabel('Fractional error on test set')
    plt.xlabel('Number of training points')
    plt.legend()
    plt.show()
    
    return 

In [1]:
compare_err_plot(errors_baseline,errors_al)

NameError: name 'compare_err_plot' is not defined

In [32]:
AL_results_table = pd.DataFrame({'Errors_baseline':errors_baseline,'Errors_al':errors_al})

In [34]:
AL_results_table.T

Unnamed: 0,0,1,2,3,4,5,6,7
Errors_baseline,0.985704,0.981253,0.977226,0.968116,0.94762,0.903082,0.804733,0.654533
Errors_al,0.92938,0.925842,0.914023,0.888077,0.829408,0.731747,0.601004,0.557883
