In [None]:
from pycaret.classification import *

import pandas as pd
import numpy as np
import re

### Load your Data

In [None]:
df = pd.read_csv("./your_directory/your_data.csv")


## Inspect your data. Predictors must contain only float values and be formatted in atomic percentages.

In [None]:
df.info(verbose=True, null_counts=True)

In [None]:
MODEL_NAMES = {
    'model1': 'strunz_classifier',
    'model2': 'classifier'
}

def prediction_function(df, model_key):
    """
    This function performs the following operations:
    * Loads the specified pre-trained model
    * Performs the probability prediction on the dataframe (df).
    * Filters the columns containing the word 'Score' for output formatting purposes
    * Selects the three highest probabilities, sorts them in descending order and presents them in a new column.
    * Finally it returns the dataframe with two new columns of results 'Label' and 'top_3 Probs'.
    """
    
    model = load_model(MODEL_NAMES[model_key])
    pred_prob = predict_model(estimator=model, data=df, raw_score=True)
    
    selected = [] 
    patron = re.compile('.*Score.*', re.IGNORECASE)
    
    for columna in pred_prob.columns:
        if patron.match(columna):
            selected.append(columna)
    
    def select_top_three_cols(row):
        top_three_cols = row.nlargest(3)
        top_three_cols_dict = dict(zip(top_three_cols.index, top_three_cols))
        return top_three_cols_dict
    
    top_three_cols = pred_prob[selected].apply(select_top_three_cols, axis=1)
    pred_prob['top_3_probs'] = top_three_cols
    
    pred_prob = pred_prob.drop(pred_prob[selected].columns, axis=1)
     
    return pred_prob
    

Here a small adjustment is necessary as we are going to use the prediction function twice using the two different models. We must rename the output columns to avoid confusion. 

In [None]:
# Model 1
preds = prediction_function(df, 'model1')
preds.rename(columns={'prediction_label': 'Predicted_strunz', 'top_3_probs': 'top_3_probs_strunz'}, inplace=True)
preds_model1 = preds.drop(['top_3_probs_strunz'], axis=1)

In [None]:
# Model 2
preds2 = prediction_function(preds_model1, 'model2')
preds2['top_3_probs_strunz'] = preds['top_3_probs_strunz']
preds2.head()

### Save your predictions for download

In [None]:
#preds.to_csv("./intermediate_DATA/evaluation_DATA/predictions_realw_mspeciesF4_20230810.csv") 
preds.to_csv("./intermediate_data/your_predictions.csv") #rename if neccesary