In [1]:
import pickle
import pandas as pd
import xgboost as xgb
import numpy as np

from sklearn.metrics import classification_report
from sklearn.preprocessing import LabelBinarizer
from sklearn.preprocessing import StandardScaler
import tensorflow as tf
from tensorflow import keras

2023-04-20 14:23:32.765746: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE4.1 SSE4.2 AVX AVX2 AVX_VNNI FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


## Model Comparison

This notebook serves to run a comparison of various models on the existing Proboscidia and Felid datasets in order to determine the most promising model, to be used. 

Models to be compared include: 
- Decision Tree
- Random Forest
- XG Boost
- AdaBoost
- Support Vector Machine
- Neural Network

Please note that the data transformation pipelines, make use of oversampling in order to reduce the dataset imbalance. And where available the below models will weight the classes during training.

In [2]:
report_collection_df = pd.DataFrame()

In [16]:
def add_model_report(report_collection_df: pd.DataFrame, y_val, y_pred, taxon_level:str, model: str):
    classes = y_val.nunique()
    report = classification_report(y_val, y_pred, output_dict=True)
    
    report_df = pd.DataFrame(report).transpose()
    report_df['taxon_level'] = taxon_level
    report_df['model_type'] = model
    report_df = report_df.head(classes)
    
    return pd.concat([report_collection_df, report_df], sort=False)

### Pre-required Methods

In [3]:
def prep_dt_validationSet(df:pd.DataFrame, taxon_target:str):
    taxon_y = df[taxon_target]
    
    if taxon_y.isnull().any():
        df = df.dropna(subset=[taxon_target])
    
    y = df[taxon_target]
    X = df.drop(columns=['taxon_kingdom_name', 'taxon_phylum_name',
                             'taxon_class_name', 'taxon_order_name', 'taxon_family_name',
                             'taxon_genus_name', 'taxon_species_name', 'sub_species', 'common_name'])

    return X, y

## Proboscidia Dataset

### Decision Tree

#### Genus Taxonomy

Load the model

In [69]:
dt_proboscidia_genus_model = pickle.load(open('model_comparison_cache/proboscidia_genus_dt_model.sav', 'rb'))

Import the evaluation Dataset.

In [70]:
dt_genus_df_val = pd.read_csv('model_comparison_cache/proboscidia_genus_dt_validation.csv', index_col=0)

Generate True labels and predicted labels

In [71]:
X_val, y_val = prep_dt_validationSet(dt_genus_df_val, 'taxon_genus_name')
y_pred = dt_proboscidia_genus_model.predict(X_val)

Classification report

In [72]:
proboscidia_genus_df_report = classification_report(y_val, y_pred)
print(proboscidia_genus_df_report)

              precision    recall  f1-score   support

     Elephas       1.00      1.00      1.00         5
   Loxodonta       1.00      1.00      1.00         5

    accuracy                           1.00        10
   macro avg       1.00      1.00      1.00        10
weighted avg       1.00      1.00      1.00        10



Save classification report for model comparison

In [73]:
report_collection_df = add_model_report(report_collection_df, y_val, y_pred, 'Genus', 'Decision tree')

In [74]:
report_collection_df

Unnamed: 0,precision,recall,f1-score,support,taxon_level,model_type
Elephas,1.0,1.0,1.0,5.0,Genus,Decision tree
Loxodonta,1.0,1.0,1.0,5.0,Genus,Decision tree


#### Species Taxonomy

Note, Elephas Genus only contains a single species, such that model would not be required, due to the single species possibility.

##### Loxodonta

In [75]:
dt_loxodonta_species_model = pickle.load(open('model_comparison_cache/loxodonta_species_dt_model.sav', 'rb'))

In [76]:
dt_species_df_val = pd.read_csv('model_comparison_cache/loxodonta_species_dt_validation.csv', index_col=0)

In [77]:
X_val, y_val = prep_dt_validationSet(dt_species_df_val, 'taxon_species_name')
y_pred = dt_loxodonta_species_model.predict(X_val)

In [78]:
loxodonta_species_dt_report = classification_report(y_val, y_pred)
print(loxodonta_species_dt_report)

                    precision    recall  f1-score   support

Loxodonta africana       0.71      1.00      0.83        10
Loxodonta cyclotis       1.00      0.60      0.75        10

          accuracy                           0.80        20
         macro avg       0.86      0.80      0.79        20
      weighted avg       0.86      0.80      0.79        20



In [79]:
report_collection_df = add_model_report(report_collection_df, y_val, y_pred, 'Species', 'Decision tree')

In [80]:
report_collection_df.head()

Unnamed: 0,precision,recall,f1-score,support,taxon_level,model_type
Elephas,1.0,1.0,1.0,5.0,Genus,Decision tree
Loxodonta,1.0,1.0,1.0,5.0,Genus,Decision tree
Loxodonta africana,0.714286,1.0,0.833333,10.0,Species,Decision tree
Loxodonta cyclotis,1.0,0.6,0.75,10.0,Species,Decision tree


#### Elephas Sub-species

In [81]:
dt_elephas_subspecies_model = pickle.load(open('model_comparison_cache/elephas_subspecies_dt_model.sav', 'rb'))

In [82]:
dt_subspecies_df_val = pd.read_csv('model_comparison_cache/elephas_subspecies_dt_validation.csv', index_col=0)

In [83]:
X_val, y_val = prep_dt_validationSet(dt_subspecies_df_val, 'sub_species')
y_pred = dt_elephas_subspecies_model.predict(X_val)

In [84]:
elephas_subspecies_report = classification_report(y_val, y_pred)
print(elephas_subspecies_report)

                            precision    recall  f1-score   support

Elephas maximus borneensis       1.00      1.00      1.00        10
   Elephas maximus indicus       0.77      1.00      0.87        10
   Elephas maximus maximus       1.00      1.00      1.00        10
Elephas maximus sumatranus       1.00      0.70      0.82        10

                  accuracy                           0.93        40
                 macro avg       0.94      0.93      0.92        40
              weighted avg       0.94      0.93      0.92        40



In [85]:
report_collection_df = add_model_report(report_collection_df, y_val, y_pred, 'Sub-species', 'Decision tree')

In [86]:
report_collection_df.head(10)

Unnamed: 0,precision,recall,f1-score,support,taxon_level,model_type
Elephas,1.0,1.0,1.0,5.0,Genus,Decision tree
Loxodonta,1.0,1.0,1.0,5.0,Genus,Decision tree
Loxodonta africana,0.714286,1.0,0.833333,10.0,Species,Decision tree
Loxodonta cyclotis,1.0,0.6,0.75,10.0,Species,Decision tree
Elephas maximus borneensis,1.0,1.0,1.0,10.0,Sub-species,Decision tree
Elephas maximus indicus,0.769231,1.0,0.869565,10.0,Sub-species,Decision tree
Elephas maximus maximus,1.0,1.0,1.0,10.0,Sub-species,Decision tree
Elephas maximus sumatranus,1.0,0.7,0.823529,10.0,Sub-species,Decision tree


### Random Forest

#### Genus Taxonomy

In [87]:
rf_proboscidia_genus_model = pickle.load(open('model_comparison_cache/proboscidia_genus_rf_model.sav', 'rb'))

In [88]:
rf_genus_df_val = pd.read_csv('model_comparison_cache/proboscidia_genus_rf_validation.csv', index_col=0)

In [89]:
X_val, y_val = prep_dt_validationSet(rf_genus_df_val, 'taxon_genus_name')
y_pred = rf_proboscidia_genus_model.predict(X_val)

In [90]:
proboscidia_genus_rf_report = classification_report(y_val, y_pred)
print(proboscidia_genus_rf_report)

              precision    recall  f1-score   support

     Elephas       1.00      1.00      1.00        10
   Loxodonta       1.00      1.00      1.00        10

    accuracy                           1.00        20
   macro avg       1.00      1.00      1.00        20
weighted avg       1.00      1.00      1.00        20



In [91]:
report_collection_df = add_model_report(report_collection_df, y_val, y_pred, 'Genus', 'Random forest')

In [92]:
report_collection_df.head(15)

Unnamed: 0,precision,recall,f1-score,support,taxon_level,model_type
Elephas,1.0,1.0,1.0,5.0,Genus,Decision tree
Loxodonta,1.0,1.0,1.0,5.0,Genus,Decision tree
Loxodonta africana,0.714286,1.0,0.833333,10.0,Species,Decision tree
Loxodonta cyclotis,1.0,0.6,0.75,10.0,Species,Decision tree
Elephas maximus borneensis,1.0,1.0,1.0,10.0,Sub-species,Decision tree
Elephas maximus indicus,0.769231,1.0,0.869565,10.0,Sub-species,Decision tree
Elephas maximus maximus,1.0,1.0,1.0,10.0,Sub-species,Decision tree
Elephas maximus sumatranus,1.0,0.7,0.823529,10.0,Sub-species,Decision tree
Elephas,1.0,1.0,1.0,10.0,Genus,Random forest
Loxodonta,1.0,1.0,1.0,10.0,Genus,Random forest


#### Species Taxonomy

In [93]:
rf_loxodonta_species_model = pickle.load(open('model_comparison_cache/loxodonta_species_rf_model.sav', 'rb'))
rf_species_df_val = pd.read_csv('model_comparison_cache/loxodonta_species_rf_validation.csv', index_col=0)
X_val, y_val = prep_dt_validationSet(rf_species_df_val, 'taxon_species_name')
y_pred = rf_loxodonta_species_model.predict(X_val)

In [94]:
loxodonta_species_rf_report = classification_report(y_val, y_pred)
print(loxodonta_species_rf_report)

                    precision    recall  f1-score   support

Loxodonta africana       0.77      1.00      0.87        10
Loxodonta cyclotis       1.00      0.70      0.82        10

          accuracy                           0.85        20
         macro avg       0.88      0.85      0.85        20
      weighted avg       0.88      0.85      0.85        20



In [95]:
report_collection_df = add_model_report(report_collection_df, y_val, y_pred, 'Species', 'Random forest')

In [96]:
report_collection_df.tail()

Unnamed: 0,precision,recall,f1-score,support,taxon_level,model_type
Elephas maximus sumatranus,1.0,0.7,0.823529,10.0,Sub-species,Decision tree
Elephas,1.0,1.0,1.0,10.0,Genus,Random forest
Loxodonta,1.0,1.0,1.0,10.0,Genus,Random forest
Loxodonta africana,0.769231,1.0,0.869565,10.0,Species,Random forest
Loxodonta cyclotis,1.0,0.7,0.823529,10.0,Species,Random forest


#### Sub-species Taxonomy

In [97]:
rf_elephas_subspecies_model = pickle.load(open('model_comparison_cache/elephas_subspecies_rf_model.sav', 'rb'))
rf_subspecies_df_val = pd.read_csv('model_comparison_cache/elephas_subspecies_rf_validation.csv', index_col=0)
X_val, y_val = prep_dt_validationSet(rf_subspecies_df_val, 'sub_species')
y_pred = rf_elephas_subspecies_model.predict(X_val)

In [98]:
elephas_subspecies_rf_report = classification_report(y_val, y_pred)
print(elephas_subspecies_rf_report)

                            precision    recall  f1-score   support

Elephas maximus borneensis       1.00      0.90      0.95        10
   Elephas maximus indicus       0.62      1.00      0.77        10
   Elephas maximus maximus       0.75      0.90      0.82        10
Elephas maximus sumatranus       1.00      0.30      0.46        10

                  accuracy                           0.78        40
                 macro avg       0.84      0.77      0.75        40
              weighted avg       0.84      0.78      0.75        40



In [99]:
report_collection_df = add_model_report(report_collection_df, y_val, y_pred, 'Sub-species', 'Random forest')

In [100]:
report_collection_df.tail()

Unnamed: 0,precision,recall,f1-score,support,taxon_level,model_type
Loxodonta cyclotis,1.0,0.7,0.823529,10.0,Species,Random forest
Elephas maximus borneensis,1.0,0.9,0.947368,10.0,Sub-species,Random forest
Elephas maximus indicus,0.625,1.0,0.769231,10.0,Sub-species,Random forest
Elephas maximus maximus,0.75,0.9,0.818182,10.0,Sub-species,Random forest
Elephas maximus sumatranus,1.0,0.3,0.461538,10.0,Sub-species,Random forest


### XGBoost

In [4]:
def prep_xgb_validationSet(df:pd.DataFrame, taxon_target:str):
    taxon_y = df[taxon_target]
    
    if taxon_y.isnull().any():
        df = df.dropna(subset=[taxon_target])
    
    y = df[taxon_target]
    X = df.drop(columns=['taxon_kingdom_name', 'taxon_phylum_name',
                             'taxon_class_name', 'taxon_order_name', 'taxon_family_name',
                             'taxon_genus_name', 'taxon_species_name', 'sub_species', 'common_name'])
    classes = y.nunique()
    
    # Encode labels
    classes = y.nunique()
    lb = LabelBinarizer()
    lb.fit(y)
    y = lb.transform(y)
    
    if classes == 2:
        y = nn_binary_label_handling(y)
    
    return X, y, lb

In [5]:
def nn_binary_label_handling(y):
    return np.hstack((y, 1 - y))

In [6]:
def convert_to_labels(y_val, y_pred, lb):
    ohe_length = len(y_val[0])
    y_pred_ohe = np.zeros(shape=(len(y_val), ohe_length))
    
    y_val = pd.Series(lb.inverse_transform(y_val))
    y_pred_indices = np.argmax(y_pred, axis=1)
    
    for i in range(len(y_val)):
        y_pred_ohe[i, y_pred_indices[i]] = 1
    y_pred = lb.inverse_transform(y_pred_ohe)
    return y_val, y_pred

#### Proboscidia Genus

In [111]:
xgb_proboscidia_genus_model = xgb.XGBClassifier()
xgb_proboscidia_genus_model.load_model('model_comparison_cache/proboscidia_genus_xgb_model.json')

In [112]:
xgb_genus_df_val = pd.read_csv('model_comparison_cache/proboscidia_genus_xgb_validation.csv', index_col=0)

In [113]:
X_val, y_val, lb = prep_xgb_validationSet(xgb_genus_df_val, 'taxon_genus_name')
y_pred = xgb_proboscidia_genus_model.predict(X_val)

In [114]:
y_val, y_pred = convert_to_labels(y_val, y_pred, lb)

In [115]:
proboscidia_genus_xgb_report = classification_report(y_val, y_pred)
print(proboscidia_genus_xgb_report)

              precision    recall  f1-score   support

     Elephas       1.00      1.00      1.00        10
   Loxodonta       1.00      1.00      1.00        10

    accuracy                           1.00        20
   macro avg       1.00      1.00      1.00        20
weighted avg       1.00      1.00      1.00        20



In [116]:
report_collection_df = add_model_report(report_collection_df, y_val, y_pred, 'Genus', 'Xgboost')

In [119]:
report_collection_df.tail()

Unnamed: 0,precision,recall,f1-score,support,taxon_level,model_type
Elephas maximus indicus,0.625,1.0,0.769231,10.0,Sub-species,Random forest
Elephas maximus maximus,0.75,0.9,0.818182,10.0,Sub-species,Random forest
Elephas maximus sumatranus,1.0,0.3,0.461538,10.0,Sub-species,Random forest
Elephas,1.0,1.0,1.0,10.0,Genus,Xgboost
Loxodonta,1.0,1.0,1.0,10.0,Genus,Xgboost


#### Species Taxonomy (Loxodonta)

In [120]:
xgb_loxodonta_species_model = xgb.XGBClassifier()
xgb_loxodonta_species_model.load_model('model_comparison_cache/loxodonta_species_xgb_model.json')

In [124]:
xgb_species_df_val = pd.read_csv('model_comparison_cache/loxodonta_species_xgb_validation.csv', index_col=0)
X_val, y_val, lb = prep_xgb_validationSet(xgb_species_df_val, 'taxon_species_name')
y_pred = xgb_loxodonta_species_model.predict(X_val)

In [125]:
y_val, y_pred = convert_to_labels(y_val, y_pred, lb)

In [126]:
report_collection_df = add_model_report(report_collection_df, y_val, y_pred, 'Species', 'Xgboost')

In [127]:
report_collection_df.tail()

Unnamed: 0,precision,recall,f1-score,support,taxon_level,model_type
Elephas maximus sumatranus,1.0,0.3,0.461538,10.0,Sub-species,Random forest
Elephas,1.0,1.0,1.0,10.0,Genus,Xgboost
Loxodonta,1.0,1.0,1.0,10.0,Genus,Xgboost
Loxodonta africana,1.0,0.8,0.888889,10.0,Species,Xgboost
Loxodonta cyclotis,0.833333,1.0,0.909091,10.0,Species,Xgboost


#### Sub-species Taxonomy (Elephas maximus)

In [134]:
xgb_elephas_subspecies_model = xgb.XGBClassifier()
xgb_elephas_subspecies_model.load_model('model_comparison_cache/elephas_subspecies_xgb_model.json')

In [142]:
xgb_subspecies_df_val = pd.read_csv('model_comparison_cache/elephas_subspecies_xgb_validation.csv', index_col=0)
X_val, y_val, lb = prep_xgb_validationSet(xgb_subspecies_df_val, 'sub_species')
y_pred = xgb_elephas_subspecies_model.predict(X_val)

In [143]:
y_val, y_pred = convert_to_labels(y_val, y_pred, lb)

In [144]:
elephas_subspecies_xgb_report = classification_report(y_val, y_pred)
print(elephas_subspecies_xgb_report)

                            precision    recall  f1-score   support

Elephas maximus borneensis       0.91      1.00      0.95        10
   Elephas maximus indicus       0.77      1.00      0.87        10
   Elephas maximus maximus       1.00      1.00      1.00        10
Elephas maximus sumatranus       1.00      0.60      0.75        10

                  accuracy                           0.90        40
                 macro avg       0.92      0.90      0.89        40
              weighted avg       0.92      0.90      0.89        40



In [145]:
report_collection_df = add_model_report(report_collection_df, y_val, y_pred, 'Sub-species', 'Xgboost')

In [147]:
report_collection_df.tail()

Unnamed: 0,precision,recall,f1-score,support,taxon_level,model_type
Loxodonta cyclotis,0.833333,1.0,0.909091,10.0,Species,Xgboost
Elephas maximus borneensis,0.909091,1.0,0.952381,10.0,Sub-species,Xgboost
Elephas maximus indicus,0.769231,1.0,0.869565,10.0,Sub-species,Xgboost
Elephas maximus maximus,1.0,1.0,1.0,10.0,Sub-species,Xgboost
Elephas maximus sumatranus,1.0,0.6,0.75,10.0,Sub-species,Xgboost


## Automate Collection

In [7]:
def prep_nn_validationSet(df:pd.DataFrame, taxon_target:str):
    taxon_y = df[taxon_target]
    
    if taxon_y.isnull().any():
        df = df.dropna(subset=[taxon_target])
    
    y = df[taxon_target]
    X = df.drop(columns=['taxon_kingdom_name', 'taxon_phylum_name',
                             'taxon_class_name', 'taxon_order_name', 'taxon_family_name',
                             'taxon_genus_name', 'taxon_species_name', 'sub_species', 'common_name'])
    
    classes = y.nunique()
    
    # Encode labels
    classes = y.nunique()
    lb = LabelBinarizer()
    lb.fit(y)
    y = lb.transform(y)
    
    if classes == 2:
        y = nn_binary_label_handling(y)
    
    
    # Normalize numerical columns in the dataset
    norm_columns = ['apparent_temperature', 'apparent_temperature_max', 'apparent_temperature_min',
                    'cloudcover', 'cloudcover_high', 'cloudcover_low', 'cloudcover_mid', 'dewpoint_2m',
                    'diffuse_radiation', 'direct_radiation', 'elevation', 'et0_fao_evapotranspiration_daily',
                    'et0_fao_evapotranspiration_hourly', 'precipitation', 'precipitation_hours',
                    'precipitation_sum', 'rain', 'rain_sum', 'relativehumidity_2m', 'shortwave_radiation',
                    'shortwave_radiation_sum', 'snowfall', 'snowfall_sum', 'soil_moisture_0_to_7cm',
                    'soil_moisture_28_to_100cm', 'soil_moisture_7_to_28cm', 'soil_temperature_0_to_7cm',
                    'soil_temperature_28_to_100cm', 'soil_temperature_7_to_28cm', 'surface_pressure',
                    'temperature_2m', 'temperature_2m_max', 'temperature_2m_min', 'vapor_pressure_deficit',
                    'winddirection_100m', 'winddirection_10m', 'winddirection_10m_dominant',
                    'windgusts_10m', 'windgusts_10m_max', 'windspeed_100m', 'windspeed_10m',
                    'windspeed_10m_max']

    X[norm_columns] = StandardScaler().fit_transform(X[norm_columns])
    return X, y, lb

In [8]:
model_abbreviations = {'Neural network': 'nn',
                       'Decision tree': 'dt',
                       'Random forest': 'rf',
                       'Xgboost': 'xgb',
                       'AdaBoost': 'ada'}

In [9]:
def model_selection(model_type: str, file_identifier: str):
    match model_type:
        case 'Neural network':
            model = keras.models.load_model('model_comparison_cache/' + file_identifier +  '_nn_model')
            return model
        case 'Decision tree':
            model = pickle.load(open('model_comparison_cache/' + file_identifier + '_dt_model.sav', 'rb'))
            return model
        case 'Random forest':
            model = pickle.load(open('model_comparison_cache/' + file_identifier + '_rf_model.sav', 'rb'))
            return model
        case 'Xgboost':
            model = xgb.XGBClassifier()
            model.load_model('model_comparison_cache/' + file_identifier + '_xgb_model.json')
            return model
        case 'AdaBoost':
            model = pickle.load(open('model_comparison_cache/' + file_identifier + '_ada_model.sav', 'rb'))
            return model

In [10]:
def validation_set_process(validation_type: str, taxon_target: str, validation_df: pd.DataFrame):
    match validation_type:
        case 'dt':
            X_val, y_val = prep_dt_validationSet(validation_df, taxon_target)
            return X_val, y_val, _
        case 'rf':
            X_val, y_val = prep_dt_validationSet(validation_df, taxon_target)
            return X_val, y_val, _
        case 'ada':
            X_val, y_val = prep_dt_validationSet(validation_df, taxon_target)
            return X_val, y_val, _
        case 'xgb':
            X_val, y_val, lb = prep_xgb_validationSet(validation_df, taxon_target)
            return X_val, y_val, lb
        case 'nn':
            X_val, y_val, lb = prep_nn_validationSet(validation_df, taxon_target)
            return X_val, y_val, lb

In [11]:
taxon_label = {'taxon_genus_name': 'Genus',
              'taxon_species_name': 'Species',
              'sub_species': 'Sub-species'}

In [12]:
def classification_report_collection(report_collection_df: pd.DataFrame, file_identifier: str, model_type: str, data_process: str, taxon_target: str, ohe_status: bool):
    # Create the model
    model = model_selection(model_type, file_identifier)
    
    # Generate validation dataset split
    validation_df = pd.read_csv('model_comparison_cache/' + 
                                file_identifier + '_' + 
                                model_abbreviations[model_type] + 
                                '_validation.csv', index_col=0)

    # Prepare the model's validation set
    X_val, y_val, lb = validation_set_process(data_process, taxon_target, validation_df)
    
    # Predict
    y_pred = model.predict(X_val)
    
    # Convert to labels if ohe
    if ohe_status:
        y_val, y_pred = convert_to_labels(y_val, y_pred, lb)
        
        
    # Add the report to the report collection
    report_collection_df = add_model_report(report_collection_df, y_val, y_pred, taxon_label[taxon_target], model_type)
    return report_collection_df

    

In [13]:
def collection_cycle(report_collection_df: pd.DataFrame, 
                     file_identifiers: list, 
                     model_types: list,
                     taxon_targets: list):
    for i in range(len(model_types)):
        print('------------------------')
        print(model_types[i])
        print('------------------------')
        
        
        for j in range(len(file_identifiers)):
            print(file_identifiers[j])
            report_collection_df = classification_report_collection(report_collection_df=report_collection_df,
                                                                     file_identifier=file_identifiers[j], 
                                                                     model_type=model_types[i], 
                                                                     data_process=data_processes[i],
                                                                     taxon_target=taxon_targets[j],
                                                                     ohe_status=ohe_status_list[i])
    return report_collection_df

In [14]:
report_collection_df = pd.DataFrame()

In [17]:
file_identifiers = ['Felidae_family', 'Lynx_genus', 'Panthera_genus', 'Caracal_genus', 'Leopardus_genus', 'Felis_genus', 'Prionailurus_genus', 'Lynx_rufus_species', 'Panthera_leo_species', 'Panthera_pardus_species', 'Acinonyx_jubatus_species', 'Panthera_tigris_species', 'Puma_concolor_species', 'Leopardus_pardalis_species', 'Felis_lybica_species', 'Leptailurus_serval_species', 'Prionailurus_bengalensis_species']
taxon_targets = ['taxon_genus_name', 'taxon_species_name', 'taxon_species_name', 'taxon_species_name', 'taxon_species_name', 'taxon_species_name', 'taxon_species_name', 'sub_species', 'sub_species', 'sub_species', 'sub_species', 'sub_species', 'sub_species', 'sub_species', 'sub_species', 'sub_species', 'sub_species']
model_types = ['Neural network', 'Decision tree', 'Random forest', 'Xgboost', 'AdaBoost']
data_processes = ['nn', 'dt', 'rf', 'xgb', 'ada']
ohe_status_list = [True, False, False, True, False]

report_collection_df = collection_cycle(report_collection_df,
                                        file_identifiers, 
                                        model_types,
                                        taxon_targets)

------------------------
Neural network
------------------------
Felidae_family
Lynx_genus
Panthera_genus


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Caracal_genus
Leopardus_genus
Felis_genus
Prionailurus_genus


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Lynx_rufus_species
Panthera_leo_species
Panthera_pardus_species
Acinonyx_jubatus_species
Panthera_tigris_species


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Puma_concolor_species
Leopardus_pardalis_species
Felis_lybica_species
Leptailurus_serval_species


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Prionailurus_bengalensis_species
------------------------
Decision tree
------------------------
Felidae_family
Lynx_genus
Panthera_genus
Caracal_genus
Leopardus_genus
Felis_genus
Prionailurus_genus
Lynx_rufus_species
Panthera_leo_species
Panthera_pardus_species
Acinonyx_jubatus_species
Panthera_tigris_species
Puma_concolor_species
Leopardus_pardalis_species
Felis_lybica_species
Leptailurus_serval_species
Prionailurus_bengalensis_species
------------------------
Random forest
------------------------
Felidae_family


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_pr

Lynx_genus
Panthera_genus
Caracal_genus
Leopardus_genus
Felis_genus
Prionailurus_genus
Lynx_rufus_species
Panthera_leo_species
Panthera_pardus_species


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Acinonyx_jubatus_species
Panthera_tigris_species
Puma_concolor_species
Leopardus_pardalis_species
Felis_lybica_species
Leptailurus_serval_species
Prionailurus_bengalensis_species
------------------------
Xgboost
------------------------
Felidae_family


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Lynx_genus
Panthera_genus
Caracal_genus
Leopardus_genus
Felis_genus
Prionailurus_genus
Lynx_rufus_species
Panthera_leo_species
Panthera_pardus_species
Acinonyx_jubatus_species
Panthera_tigris_species
Puma_concolor_species
Leopardus_pardalis_species
Felis_lybica_species
Leptailurus_serval_species
Prionailurus_bengalensis_species
------------------------
AdaBoost
------------------------
Felidae_family
Lynx_genus
Panthera_genus
Caracal_genus
Leopardus_genus
Felis_genus
Prionailurus_genus
Lynx_rufus_species
Panthera_leo_species
Panthera_pardus_species
Acinonyx_jubatus_species
Panthera_tigris_species
Puma_concolor_species
Leopardus_pardalis_species
Felis_lybica_species
Leptailurus_serval_species
Prionailurus_bengalensis_species


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_pr

In [19]:
report_collection_df.tail(50)

Unnamed: 0,precision,recall,f1-score,support,taxon_level,model_type
Panthera tigris,0.526316,1.0,0.689655,10.0,Species,AdaBoost
Panthera uncia,1.0,1.0,1.0,10.0,Species,AdaBoost
Caracal aurata,1.0,0.4,0.571429,10.0,Species,AdaBoost
Caracal caracal,0.625,1.0,0.769231,10.0,Species,AdaBoost
Leopardus colocola,0.0,0.0,0.0,10.0,Species,AdaBoost
Leopardus emiliae,0.0,0.0,0.0,10.0,Species,AdaBoost
Leopardus garleppi,1.0,0.7,0.823529,10.0,Species,AdaBoost
Leopardus geoffroyi,0.0,0.0,0.0,10.0,Species,AdaBoost
Leopardus guigna,0.0,0.0,0.0,10.0,Species,AdaBoost
Leopardus guttulus,0.225806,0.7,0.341463,10.0,Species,AdaBoost


In [18]:
report_collection_df.shape

(350, 6)