In this notebook I compare the models we selected using the initial test set and the augmented test set

In [1]:
import os
import pandas as pd
import numpy as np

from sklearn.ensemble import RandomForestClassifier

from accuracy_info_df import accuracy_info_df

from joblib import load

In [2]:
# **************************************************************
# TRAIN MODELS
# **************************************************************

root = '/home/jovyan/msai4earth-esa/iceplant_detection/data/INITIAL_DATASET/'
train_name = 'train_2500.csv'

X_train = pd.read_csv(os.path.join(root, train_name))
y_train = pd.read_csv(os.path.join(root, train_name)).loc[:,'iceplant'] 
y_train = y_train.to_numpy()

# ------------------------------
# spectral
bands = ['r','g','b','nir']
X_train_sub = X_train[bands].to_numpy()

rfc_spectral = RandomForestClassifier(n_estimators = 100, random_state = 42)
rfc_spectral.fit(X_train_sub, y_train)

# ------------------------------
# 13x13

box_s = 13
window_features = [band + x + str(box_s) for band in bands+['ndvi'] for x in ['_avg', '_entr']]
cols_13x13 =  bands + ['ndvi'] + window_features + ['month', 'day_in_year']

X_train_sub = X_train[cols_13x13].to_numpy()

rfc_13x13 = RandomForestClassifier(n_estimators = 100, random_state = 42)
rfc_13x13.fit(X_train_sub, y_train)
    
# ------------------------------
# salt13_p30
root = '/home/jovyan/msai4earth-esa/iceplant_detection/data/EXTENDED_DATASET_salt13_p30/'
train_name = 'salt13_p30_train.csv'

X_train_salt = pd.read_csv(os.path.join(root, train_name))
y_train_salt = pd.read_csv(os.path.join(root, train_name)).loc[:,'iceplant'] 
y_train_salt = y_train_salt.to_numpy()


cols_salt = ['r', 
        'r_avg13', 'r_entr13',         
        'g',
        'g_avg13', 'g_entr13',                 
        'b',
        'b_avg13', 'b_entr13',                 
        'nir',
        'nir_avg13', 'nir_entr13',                 
        'ndvi',
        'ndvi_avg13', 'ndvi_entr13',        
        'month', 
        'day_in_year']

X_train_sub = X_train_salt[cols_salt].to_numpy()

rfc_salt = RandomForestClassifier(n_estimators = 100, random_state = 42)
rfc_salt.fit(X_train_sub, y_train_salt)

rfcs = [rfc_spectral, rfc_13x13, rfc_salt]

In [3]:
# **************************************************************
# ACCURACIES WITH INITIAL TEST SET
# **************************************************************
root = '/home/jovyan/msai4earth-esa/iceplant_detection/data/INITIAL_DATASET/'
test_name = 'test_2500.csv'


X_test = pd.read_csv(os.path.join(root, test_name))
y_test = pd.read_csv(os.path.join(root, test_name)).loc[:,'iceplant'] 
y_test = y_test.to_numpy()

results = []
for rfc, cols_names in zip(rfcs,[bands, cols_13x13, cols_salt]):
    
    X_test_sub = X_test[cols_names].to_numpy()
    preds = rfc.predict(X_test_sub)
    
    results.append(accuracy_info_df(y_test, preds))

pd.concat(results)

Unnamed: 0,acc,prod_acc_P,prod_acc_N,user_acc_P,user_acc_N,TP,TN,FP,FN
0,81.87,82.12,81.67,77.87,85.32,271,343,77,59
0,92.13,91.21,92.86,90.94,93.08,301,390,30,29
0,86.8,71.52,98.81,97.93,81.53,236,415,5,94


In [6]:
# **************************************************************
# ACCURACIES WITH EXTENDED TEST SET
# **************************************************************
root = '/home/jovyan/msai4earth-esa/iceplant_detection/data/EXTENDED_DATASET_salt13_p30/'
test_name = 'salt13_p30_test.csv'


X_test = pd.read_csv(os.path.join(root, test_name))

y_test = pd.read_csv(os.path.join(root, test_name)).loc[:,'iceplant'] 
y_test = y_test.to_numpy()

results_extended = []
for rfc, cols_names in zip(rfcs,[bands, cols_13x13, cols_salt]):
    
    X_test_sub = X_test[cols_names].to_numpy()
    preds = rfc.predict(X_test_sub)
    
    results_extended.append(accuracy_info_df(y_test, preds))
    
pd.concat(results_extended)

Unnamed: 0,acc,prod_acc_P,prod_acc_N,user_acc_P,user_acc_N,TP,TN,FP,FN
0,68.0,78.56,61.4,55.99,82.09,491,614,386,134
0,68.74,88.96,56.1,55.88,89.05,556,561,439,69
0,82.71,67.2,92.4,84.68,81.84,420,924,76,205
