In [9]:
import pandas as pd

# IMPORT DATA
We import the results from the SpatialRandomForest ran in R, and add it to a object in Python

In [10]:
def read_csv(run:int, object:str, dropIndex=True)->pd.DataFrame:
    """
    Small to read in a csv with or without indexcolumn
    """
    if dropIndex:
        return pd.read_csv(f"data/trees/{str(run)}{object}.csv").drop(columns=["Unnamed: 0"])
    else:
        return pd.read_csv(f"data/trees/{str(run)}{object}.csv")

In [11]:
"""
Some classes were made to reconstruc the repeated randomforest model from R. 
This is because we wanted to plot results in Python and we had trouble loading the model back in R
"""

class residualsClass:
    def __init__(self, run):
        self.values = read_csv(run, "residuals_values").rename(columns={"x":"values"})
        self.values_median = read_csv(run, "residuals_values_median")
        self.values_repetitions = read_csv(run, "residuals_values_repetitions")

class evaluationClass:
    def __init__(self, run):
        self.PLACEHOLDER = 0

class predictionClass:
    def __init__(self, run):
        self.values = read_csv(run, "predictions_values").rename(columns={"x":"values"})
        self.values_per_repetion = read_csv(run, "prediction_values_per_repetition")
        self.values_median = read_csv(run, "prediction_values_median")

class importanceClass:
    def __init__(self, run):
        self.per_variable = read_csv(run, "importance_per_variable")
        self.per_repetition = read_csv(run, "importance_per_repetition")
        self.local = read_csv(run, "importance_local")



In [12]:
class SpatialRandomForestRepeat:
    def __init__(self, run):
        self.run = run
        self.num_trees = read_csv(run, "num_trees").iloc[0,0]
        self.num_independent_variables = read_csv(run, "num_independent_variables").iloc[0,0]
        self.mtry = read_csv(run, "mtry").iloc[0,0]
        self.min_node_size = read_csv(run, "min_node_size").iloc[0,0]
        self.variable_importance = read_csv(run, "variable_importance")
        self.variable_importance_local = read_csv(run, "variable_importance_local")
        self.prediction_error = read_csv(run, "prediction_error").iloc[0,0]
        self.r_squared = read_csv(run, "r_squared").iloc[0,0]
        self.num_samples = read_csv(run, "num_samples").iloc[0,0]
        self.performance = read_csv(run, "performance")
        self.residuals = residualsClass(run)
        self.evaluation = evaluationClass(run)
        self.predictions = predictionClass(run)
        self.importance = importanceClass(run)

    def info(self):
        print(f"---------------------------\n" \
              f"Model from run: {self.run} \n"\
              f"num_trees: {self.num_trees} \n"\
              f"mtry: {self.mtry} \n"\
              f"num_samples: {self.num_samples} \n"
              f"repetitions: {len(self.predictions.values_per_repetion.columns)} \n")

treeResult = SpatialRandomForestRepeat(run=10)


# Retrieve importance scores
For the final report we decided only to use the importance scores. So we will save these to a new file

In [13]:
imp = treeResult.importance.per_variable.copy()

def keep(x:list[str])->bool:
    if "spatial_predictor" in x[0]:
        return False
    else:
        return True

imp["keep"] = imp.apply(lambda x: keep(x), axis = 1)
imp = imp[imp["keep"]].drop(columns=["keep"])
imp["importance"] = imp["importance"].abs()

imp.to_csv("data/Tree_importance.csv", index=False)