# A Demo of How To Use ELUC Predictors

In [1]:
import pandas as pd
from sklearn.metrics import mean_absolute_error

from predictors.predictor import Predictor
from predictors.NeuralNetwork.NeuralNetPredictor import NeuralNetPredictor
from predictors.LinearRegression.LinearRegressionPredictor import LinearRegressionPredictor
from predictors.RandomForest.RandomForestPredictor import RandomForestPredictor
from data.data import ELUCData
from data import constants

In [3]:
data = ELUCData("projectresilience/ELUC-committed")

Downloading data:   0%|          | 0.00/220M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/220M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/222M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/228M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/229M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/226M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/232M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/233M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/232M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/232M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/236M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/236M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/231M [00:00<?, ?B/s]

Generating train split: 0 examples [00:00, ? examples/s]

### Initialize and fit models a la `sklearn` API

In [3]:
nnp = NeuralNetPredictor(constants.NN_FEATS, epochs=1, train_pct=0.1, hidden_sizes=[128])
nnp.fit(data.train_df.drop("ELUC", axis=1), data.train_df["ELUC"], verbose=True)
print(mean_absolute_error(data.test_df["ELUC"], nnp.predict(data.test_df.drop("ELUC", axis=1))))

  Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass
100%|██████████| 1927/1927 [00:30<00:00, 63.14it/s]


0.061624523


In [8]:
linreg = LinearRegressionPredictor(features=constants.DIFF_LAND_USE_COLS, n_jobs=-1)
linreg.fit(data.train_df, data.train_df["ELUC"])
print(mean_absolute_error(data.test_df["ELUC"], linreg.predict(data.test_df[constants.DIFF_LAND_USE_COLS])))

0.07567061


In [5]:
rf = RandomForestPredictor(n_jobs=-1, max_features="sqrt", random_state=42)
rf.fit(data.train_df.loc[2010:][constants.NN_FEATS], data.train_df.loc[2010:]["ELUC"])
print(mean_absolute_error(data.test_df["ELUC"], rf.predict(data.test_df[constants.NN_FEATS])))

0.045818872993118626


### Save models

In [6]:
nnp.save("predictors/NeuralNetwork/test")
linreg.save("predictors/LinearRegression/test.joblib")
rf.save("predictors/RandomForest/test.joblib")

### Load model back with exact same path used for save
We can evaluate it as a `Predictor` object in order to evaluate many different types of predictors

In [6]:
def evaluate_model(model: Predictor, test_df: pd.DataFrame) -> float:
    return mean_absolute_error(test_df["ELUC"], model.predict(test_df.drop("ELUC", axis=1)))

In [9]:
nnp_test = NeuralNetPredictor()
nnp_test.load("predictors/NeuralNetwork/test")
linreg_test = LinearRegressionPredictor()
linreg_test.load("predictors/LinearRegression/test")
rf_test = RandomForestPredictor()
rf_test.load("predictors/RandomForest/test.joblib")

evaluate_model(linreg_test, data.test_df)

models = [nnp_test, linreg_test, rf_test]

print([evaluate_model(model, data.test_df) for model in models])

0.07567061

With this flow we can see that we evaluated to the same value!