# Experiment 1 - Accuracy Test

This experiment aims to evaluate the metric proposed in the paper by generating artificial learning curves based on the *exp3* parametric model, using parameter values from actual fits from the LCDB repository.

### Imports

In [1]:
import numpy as np
import pandas as pd
from tqdm import tqdm

from sklearn.linear_model import LinearRegression

In [2]:
### Extract exp3 parameters (betas) from the df_total.gz file, obtained from LCDB

pd.set_option('use_inf_as_na',True)
df_total = pd.read_pickle('data/df_total.gz').dropna()

data = df_total[(df_total['curve_model'] == 'exp3')]
data = data.loc[data.groupby(['openmlid', 'learner'])['max_anchor_seen'].idxmax()]

q50 = data['MSE_tst_last'].quantile(0.5)
data = data[data['MSE_tst_last'] < q50]

x = [x[0] for x in data['beta']]
y = [x[1] for x in data['beta']]
z = [x[2] for x in data['beta']]

# Create DataFrame with Beta values
betas = pd.DataFrame(zip(x,y,z), columns=['a', 'b', 'c']) 
betas

Unnamed: 0,a,b,c
0,-1.973068e-01,0.059096,0.919998
1,-5.865619e-02,0.040433,0.921195
2,-4.723473e-01,0.016816,0.944205
3,1.906562e-07,0.547670,0.923500
4,1.265768e+02,36.947609,0.923500
...,...,...,...
3764,-9.129767e-02,0.069850,0.475706
3765,-3.092072e-01,0.107097,0.593951
3766,-3.772776e-02,0.076114,0.876866
3767,-9.856983e-02,0.016540,0.618293


### Metric 

In [3]:
def gen_params(idx):
    a = betas['a'].iloc[idx]
    b = betas['b'].iloc[idx]
    c = betas['c'].iloc[idx]
    
    return a,b,c

def run_experiment():
    n = len(betas) # number of LCs considered
    true_pos = 0
    true_neg = 0
    false_pos = 0
    false_neg = 0
    asc = 0
    desc = 0
    
    for i in tqdm(range(0,n)):
        is_asc = False
        flag = False
        final_slopes = []
        
        lern = data['learner'].iloc[i]
        dataset = data['openmlid'].iloc[i]
        
        a,b,c = gen_params(i)
        exp3 = lambda x: a * np.exp((-b) * x) + c # exp3 from LCDB
        anch = data['anchor_prediction'].iloc[i] # array of anchors
        
        if (a < 0 and b > 0) or (a > 0 and b < 0):
            is_asc = True
            asc+=1
        else:
            is_asc = False
            desc+=1
        
        for j in range(0, len(anch)):
            points = range(anch[j] - 10, anch[j] + 11)
            errors_iterations = []
            slopes = []
            for it in range(0, 25):
                noise = np.random.normal(0,0.002)
                linreg_errs = [exp3(p)+noise for p in points]
        
                model = LinearRegression()
                model.fit(np.array(points).reshape(-1,1), np.array(linreg_errs).reshape(-1,1))
                if(model.coef_[0] > 0):
                    slopes.append(1)
                else:
                    slopes.append(-1)
            final_slopes.append(np.mean(slopes))
        
        for j in range(0, len(anch) - 1):
            if final_slopes[j] > 0 and final_slopes[j+1] > 0:
                if is_asc:
                    true_pos+=1
                else:
                    false_pos+=1

                flag = True
                break
                
        if flag == False:
            if is_asc:
                false_neg+=1
            else:
                true_neg+=1

    print(f"Out of {n} Learning curves:")
    print(f"-----------{asc} Non-monotonic LCs")
    print(f"-----------{desc} Monotonic LCs\n")
    print(f" - {true_pos} Have been classified correctly as non-monotonic ({round((true_pos/asc)*100,2)}%)")
    print(f" - {true_neg} Have been classified correctly as monotonic ({round((true_neg/desc)*100,2)}%)")
    print(f" - {false_pos} Have been classified incorrectly as non-monotonic (Type I Error)")
    print(f" - {false_neg} Have been classified incorrectly as monotonic (Type II Error)")
    return

In [6]:
run_experiment()

100%|██████████████████████████████████████████████████████████████████████████████| 3769/3769 [15:10<00:00,  4.14it/s]

Out of 3769 Learning curves:
-----------3179 Non-monotonic LCs
-----------590 Monotonic LCs

 - 3140 Have been classified correctly as non-monotonic (98.77%)
 - 548 Have been classified correctly as monotonic (92.88%)
 - 42 Have been classified incorrectly as non-monotonic (Type I Error)
 - 39 Have been classified incorrectly as monotonic (Type II Error)





[1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0]