This notebook computes the accuracies on models with diferent features from points in the 2500 dataset. 

In [2]:
import os
import time
import pandas as pd

from sklearn.ensemble import RandomForestClassifier

from sklearn.inspection import permutation_importance
import matplotlib.pyplot as plt

from joblib import dump

from accuracy_info_df import accuracy_info_df

In [3]:
# **************************************************************
root = '/home/jovyan/msai4earth-esa/iceplant_detection/data/INITIAL_DATASET/'
train_name = 'train_2500.csv'
test_name = 'test_2500.csv'

label_name = 'iceplant'

# ------------------------------
# IMPORT TRAIN DATA
X_train = pd.read_csv(os.path.join(root, train_name))
y_train = pd.read_csv(os.path.join(root, train_name)).loc[:,label_name] 
y_train = y_train.to_numpy()

# ------------------------------
# IMPORT TEST DATA
X_test = pd.read_csv(os.path.join(root, test_name))
y_test = pd.read_csv(os.path.join(root, test_name)).loc[:,label_name] 
y_test = y_test.to_numpy()

# ------------------------------
box_sides = [3,5,7,9,11,13,15,17,19,21,23,25,27]  

In [None]:
# #********** checkpoints ************
# X_train
# print(X_test.columns == X_train.columns)
# print(X_train.columns)
# mpe.test_train_proportions(y_train, y_test)

In [4]:
results = []

# ---------------------------------------------------
# calculate accuracies for spectral bands and spectral + dates

# feats is only vector with the labels for the different models
feats = ['spectral','spectral+date','spectral+ndvi','spectral+ndvi+date']

bands = ['r','g','b','nir','ndvi']

for cols in [['r','g','b','nir'], 
             ['r','g','b','nir','month', 'day_in_year'], 
             bands, 
             bands + ['month', 'day_in_year']]:

    X_train_sub = X_train[cols].to_numpy()
    X_test_sub = X_test[cols].to_numpy()    

    rfc = RandomForestClassifier(n_estimators = 100, random_state = 42)
    rfc.fit(X_train_sub, y_train)
    
    preds = rfc.predict(X_test_sub)
    
    results.append(accuracy_info_df(y_test, preds))

# ---------------------------------------------------
# calculate accuracies for average + entropy textures
feats = feats + ['avg_ent_'+str(x) for x in box_sides]

for box_s in box_sides:
    
    window_features = [band + x + str(box_s) for band in bands for x in ['_avg', '_entr']]
    cols =  bands + window_features + ['month', 'day_in_year']

    X_train_sub = X_train[cols].to_numpy()
    X_test_sub = X_test[cols].to_numpy()    

    rfc = RandomForestClassifier(n_estimators = 100, random_state = 42)
    rfc.fit(X_train_sub, y_train)
    
    preds = rfc.predict(X_test_sub)
    
    results.append(accuracy_info_df(y_test, preds))

# ---------------------------------------------------
# calculate accuracies for glcm correlation + contrast features
feats = feats + ['glcm_'+str(x) for x in box_sides]

for box_s in box_sides:
    
    window_features = [band + x + str(box_s) for band in ['r','g','b','nir'] for x in ['_contN_', '_corrN_', '_contE_','_corrE_']]
    cols =  bands + window_features + ['month', 'day_in_year']

    X_train_sub = X_train[cols].to_numpy()
    X_test_sub = X_test[cols].to_numpy()    

    rfc = RandomForestClassifier(n_estimators = 100, random_state = 42)
    rfc.fit(X_train_sub, y_train)
    
    preds = rfc.predict(X_test_sub)
    
    results.append(accuracy_info_df(y_test, preds))
    
# ---------------------------------------------------
# calculate accuracies for entropy textures
feats = feats + ['entr_'+str(x) for x in box_sides]

for box_s in box_sides:
    
    window_features = [band + '_entr' + str(box_s) for band in bands]
    cols =  bands + window_features + ['month', 'day_in_year']

    X_train_sub = X_train[cols].to_numpy()
    X_test_sub = X_test[cols].to_numpy()    

    rfc = RandomForestClassifier(n_estimators = 100, random_state = 42)
    rfc.fit(X_train_sub, y_train)
    
    preds = rfc.predict(X_test_sub)
    
    results.append(accuracy_info_df(y_test, preds))

    
# ---------------------------------------------------
# calculate accuracies for entropy textures
feats = feats + ['avg_'+str(x) for x in box_sides]

for box_s in box_sides:
    
    window_features = [band + '_avg' + str(box_s) for band in bands]
    cols =  bands + window_features + ['month', 'day_in_year']

    X_train_sub = X_train[cols].to_numpy()
    X_test_sub = X_test[cols].to_numpy()    

    rfc = RandomForestClassifier(n_estimators = 100, random_state = 42)
    rfc.fit(X_train_sub, y_train)
    
    preds = rfc.predict(X_test_sub)
    
    results.append(accuracy_info_df(y_test, preds))


In [5]:
R = pd.concat(results).reset_index(drop=True)
R.insert(loc=0, 
         column = 'features',
         value = feats)
R

Unnamed: 0,features,acc,prod_acc_P,prod_acc_N,user_acc_P,user_acc_N,TP,TN,FP,FN
0,spectral,81.87,82.12,81.67,77.87,85.32,271,343,77,59
1,spectral+date,87.2,89.09,85.71,83.05,90.91,294,360,60,36
2,spectral+ndvi,82.27,83.03,81.67,78.06,85.96,274,343,77,56
3,spectral+ndvi+date,86.53,87.27,85.95,83.0,89.58,288,361,59,42
4,avg_ent_3,88.4,86.97,89.52,86.71,89.74,287,376,44,43
5,avg_ent_5,90.13,87.88,91.9,89.51,90.61,290,386,34,40
6,avg_ent_7,90.13,86.67,92.86,90.51,89.86,286,390,30,44
7,avg_ent_9,90.93,88.48,92.86,90.68,91.12,292,390,30,38
8,avg_ent_11,90.93,89.39,92.14,89.94,91.71,295,387,33,35
9,avg_ent_13,92.13,91.21,92.86,90.94,93.08,301,390,30,29


In [6]:
R.to_csv('accuracies_feature_experiments.csv',index=False)