In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pathlib

# Temperature Scaling

In [2]:
from sklearn.preprocessing import OneHotEncoder
from scipy.optimize import Bounds, minimize



In [3]:
ohe = OneHotEncoder(sparse=False).fit([[0], [1]])

def TS_probs(logits, temp):
    logits_ts = logits  / temp
    probs_ts = np.exp(logits_ts) / np.sum(np.exp(logits_ts), axis=1, keepdims=True)
    
    return probs_ts

def f(x, *args):
    probs = args[0]
    labels = ohe.transform(args[1].reshape(-1, 1))
    
    probs_ets = TS_probs(probs, x)
    nll = np.sum(labels * np.log(probs_ets), axis=1)
    
    return - np.mean(nll)

my_bounds = Bounds(lb=np.array([0]), ub=np.array([np.inf]))
x0 = np.float64([1])

First run the `model-training.ipynb` notebook to generate predicted probability vectors.

### BIC Dataset

##### Temperature Scaling of Individual Networks

In [4]:
# A list of training set sizes used for training the model:
tr_sizes = [200, 500, 1000, 2000, 5000, 10000]

# Dataset versions:
datasets = [1, 2, 3]

for d_num in datasets:
    p = pathlib.Path(f'results/BIC{d_num}/NN-TS/')
    p.mkdir(parents=True, exist_ok=True)

    labels_cal = np.loadtxt(f'data/BIC{d_num}/train_output_data.txt')[10000:11000]
    labels_test = np.loadtxt(f'data/BIC{d_num}/test_output_data.txt')
    
    for tr_size in tr_sizes:
        for model_num in range(10):
            probs_cal = np.load(f'results/BIC{d_num}/NN/probs-cal-tr_size-{tr_size}-model_num-{model_num}.npy')
            probs_test = np.load(f'results/BIC{d_num}/NN/probs-test-tr_size-{tr_size}-model_num-{model_num}.npy')
            res = minimize(fun=f, x0=x0, args=(np.log(probs_cal).astype('float64'), labels_cal), method='SLSQP', bounds=my_bounds, tol=1e-15)
            probs_test_ts = TS_probs(np.log(probs_test), res.x)
            np.save(f'results/BIC{d_num}/NN-TS/probs-test-tr_size-{tr_size}-model_num-{model_num}.npy', probs_test_ts)

##### Temperature Scaling of Aggregated Probability Vectors

In [5]:
# A list of training set sizes used for training the model:
tr_sizes = [200, 500, 1000, 2000, 5000, 10000]

# Dataset versions:
datasets = [1, 2, 3]

for d_num in datasets:
    p = pathlib.Path(f'results/BIC{d_num}/NN-AGG-TS/')
    p.mkdir(parents=True, exist_ok=True)
    p = pathlib.Path(f'results/BIC{d_num}/NN-AGG/')
    p.mkdir(parents=True, exist_ok=True)

    labels_cal = np.loadtxt(f'data/BIC{d_num}/train_output_data.txt')[10000:11000]
    labels_test = np.loadtxt(f'data/BIC{d_num}/test_output_data.txt')
    
    for tr_size in tr_sizes:
        probs_cal_agg = np.zeros((len(labels_cal), 2))
        probs_test_agg = np.zeros((len(labels_test), 2))
        for model_num in range(10):
            probs_cal_agg += np.load(f'results/BIC{d_num}/NN/probs-cal-tr_size-{tr_size}-model_num-{model_num}.npy') / 10
            probs_test_agg += np.load(f'results/BIC{d_num}/NN/probs-test-tr_size-{tr_size}-model_num-{model_num}.npy') / 10
        res = minimize(fun=f, x0=x0, args=(np.log(probs_cal_agg).astype('float64'), labels_cal), method='SLSQP', bounds=my_bounds, tol=1e-15)
        probs_test_agg_ts = TS_probs(np.log(probs_test_agg), res.x)
        np.save(f'results/BIC{d_num}/NN-AGG-TS/probs-test-tr_size-{tr_size}.npy', probs_test_agg_ts)
        np.save(f'results/BIC{d_num}/NN-AGG/probs-test-tr_size-{tr_size}.npy', probs_test_agg)

##### Aggregating Individual Temperature Scaled Networks

In [6]:
# A list of training set sizes used for training the model:
tr_sizes = [200, 500, 1000, 2000, 5000, 10000]

# Dataset versions:
datasets = [1, 2, 3]

for d_num in datasets:
    labels_test = np.loadtxt(f'data/BIC{d_num}/test_output_data.txt')
    p = pathlib.Path(f'results/BIC{d_num}/NN-TS-AGG/')
    p.mkdir(parents=True, exist_ok=True)
    for tr_size in tr_sizes:
        probs_test_ts_agg = np.zeros((len(labels_test), 2))
        for model_num in range(10):
            probs_test_ts_agg += np.load(f'results/BIC{d_num}/NN-TS/probs-test-tr_size-{tr_size}-model_num-{model_num}.npy')
        np.save(f'results/BIC{d_num}/NN-TS-AGG/probs-test-tr_size-{tr_size}.npy', probs_test_ts_agg)

### ABC Dataset

##### Temperature Scaling of Individual Networks

In [5]:
# Dataset versions:
datasets = [1, 2, 3]

for d_num in datasets:
    p = pathlib.Path(f'results/ABC{d_num}/NN-TS/')
    p.mkdir(parents=True, exist_ok=True)

    labels_cal = np.load(f'data/ABC{d_num}/labels_val.npy')[:1000]
    labels_test = np.load(f'data/ABC{d_num}/labels_test.npy')
    
    for model_num in range(10):
        probs_cal = np.load(f'results/ABC{d_num}/NN/probs-cal-model_num-{model_num}.npy')
        probs_test = np.load(f'results/ABC{d_num}/NN/probs-test-model_num-{model_num}.npy')
        res = minimize(fun=f, x0=x0, args=(np.log(probs_cal).astype('float64'), labels_cal), method='SLSQP', bounds=my_bounds, tol=1e-15)
        probs_test_ts = TS_probs(np.log(probs_test), res.x)
        np.save(f'results/ABC{d_num}/NN-TS/probs-test-model_num-{model_num}.npy', probs_test_ts)

##### Temperature Scaling of Aggregated Probability Vectors

In [6]:
# Dataset versions:
datasets = [1, 2, 3]

for d_num in datasets:
    p = pathlib.Path(f'results/ABC{d_num}/NN-AGG-TS/')
    p.mkdir(parents=True, exist_ok=True)
    p = pathlib.Path(f'results/ABC{d_num}/NN-AGG/')
    p.mkdir(parents=True, exist_ok=True)

    labels_cal = np.load(f'data/ABC{d_num}/labels_val.npy')[:1000]
    labels_test = np.load(f'data/ABC{d_num}/labels_test.npy')
    
    probs_cal_agg = np.zeros((len(labels_cal), 2))
    probs_test_agg = np.zeros((len(labels_test), 2))
    for model_num in range(10):
        probs_cal_agg += np.load(f'results/ABC{d_num}/NN/probs-cal-model_num-{model_num}.npy') / 10
        probs_test_agg += np.load(f'results/ABC{d_num}/NN/probs-test-model_num-{model_num}.npy') / 10
    res = minimize(fun=f, x0=x0, args=(np.log(probs_cal_agg).astype('float64'), labels_cal), method='SLSQP', bounds=my_bounds, tol=1e-15)
    probs_test_agg_ts = TS_probs(np.log(probs_test_agg), res.x)
    np.save(f'results/ABC{d_num}/NN-AGG-TS/probs-test.npy', probs_test_agg_ts)
    np.save(f'results/ABC{d_num}/NN-AGG/probs-test.npy', probs_test_agg)

##### Aggregating Individual Temperature Scaled Networks

In [9]:
# Dataset versions:
datasets = [1, 2, 3]

for d_num in datasets:
    p = pathlib.Path(f'results/ABC{d_num}/NN-TS-AGG/')
    p.mkdir(parents=True, exist_ok=True)
    labels_test = np.load(f'data/ABC{d_num}/labels_test.npy')
    probs_test_ts_agg = np.zeros((len(labels_test), 2))
    for model_num in range(10):
        probs_test_ts_agg += np.load(f'results/ABC{d_num}/NN-TS/probs-test-model_num-{model_num}.npy') / 10
    np.save(f'results/ABC{d_num}/NN-TS-AGG/probs-test.npy', probs_test_ts_agg)

### Crack Path Dataset

##### Temperature Scaling of Individual Networks

In [11]:
p = pathlib.Path(f'results/Crack-Path/NN-TS/')
p.mkdir(parents=True, exist_ok=True)

labels_cal = np.load(f'data/Crack-Path/dmg-train.npy')[20000:21000].reshape(-1)
labels_test = np.load(f'data/Crack-Path/dmg-test.npy').reshape(-1)
    
for model_num in range(10):
    probs_cal = np.load(f'results/Crack-Path/NN/probs-cal-model_num-{model_num}.npy')
    probs_test = np.load(f'results/Crack-Path/NN/probs-test-model_num-{model_num}.npy')
    res = minimize(fun=f, x0=x0, args=(np.log(probs_cal).astype('float64'), labels_cal), method='SLSQP', bounds=my_bounds, tol=1e-15)
    probs_test_ts = TS_probs(np.log(probs_test), res.x)
    np.save(f'results/Crack-Path/NN-TS/probs-test-model_num-{model_num}.npy', probs_test_ts)

##### Temperature Scaling of Aggregated Probability Vectors

In [14]:
p = pathlib.Path(f'results/Crack-Path/NN-AGG-TS/')
p.mkdir(parents=True, exist_ok=True)
p = pathlib.Path(f'results/Crack-Path/NN-AGG/')
p.mkdir(parents=True, exist_ok=True)

labels_cal = np.load(f'data/Crack-Path/dmg-train.npy')[20000:21000].reshape(-1)
labels_test = np.load(f'data/Crack-Path/dmg-test.npy').reshape(-1)

probs_cal_agg = np.zeros((len(labels_cal), 2))
probs_test_agg = np.zeros((len(labels_test), 2))
for model_num in range(10):
    probs_cal_agg += np.load(f'results/Crack-Path/NN/probs-cal-model_num-{model_num}.npy') / 10
    probs_test_agg += np.load(f'results/Crack-Path/NN/probs-test-model_num-{model_num}.npy') / 10
res = minimize(fun=f, x0=x0, args=(np.log(probs_cal_agg).astype('float64'), labels_cal), method='SLSQP', bounds=my_bounds, tol=1e-15)
probs_test_agg_ts = TS_probs(np.log(probs_test_agg), res.x)
np.save(f'results/Crack-Path/NN-AGG-TS/probs-test.npy', probs_test_agg_ts)
np.save(f'results/Crack-Path/NN-AGG/probs-test.npy', probs_test_agg)

##### Aggregating Individual Temperature Scaled Networks

In [16]:
labels_test = np.load(f'data/Crack-Path/dmg-test.npy').reshape(-1)
p = pathlib.Path(f'results/Crack-Path/NN-TS-AGG/')
p.mkdir(parents=True, exist_ok=True)
probs_test_ts_agg = np.zeros((len(labels_test), 2))
for model_num in range(10):
    probs_test_ts_agg += np.load(f'results/Crack-Path/NN-TS/probs-test-model_num-{model_num}.npy')
np.save(f'results/Crack-Path/NN-TS-AGG/probs-test.npy', probs_test_ts_agg)