# Prediction with Sub-Catchment Areas For Each Site

## Imports

In [1]:
import pandas as pd
import prediction_of_H_indicator_with_subCatchmentData as prediction
import ipywidgets as widgets
import numpy as np
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error
from sklearn.metrics import r2_score

## Variables

In [2]:
RESULT_FOLDER = "data"
site_number = 0

## Import Data and Clean them

In [3]:
input_data = prediction.import_input_data()
input_data

Unnamed: 0,Site,SubCatch,Slope,Elevation,LC,SAR,Area,CV,HV,HError,Rate
0,1,1,1.203898,8.936689,319.676414,1.000254,1648125,167,211,0.000000,1.0
1,1,1,1.203898,8.936689,319.676414,1.000254,1648125,167,211,0.003310,2.0
2,1,1,1.203898,8.936689,319.676414,1.000254,1648125,167,211,0.003310,7.0
3,1,1,1.203898,8.936689,319.676414,1.000254,1648125,167,211,0.003310,15.0
4,1,1,1.203898,8.936689,319.676414,1.000254,1648125,167,211,0.035656,21.0
...,...,...,...,...,...,...,...,...,...,...,...
6805,40,15,8.875044,224.490265,933.385135,1.006683,4387500,0,12,,
6806,40,16,6.585227,178.075058,700.135624,1.003876,2930625,0,20,,
6807,40,17,5.182087,173.802765,685.592278,1.002990,3661875,0,34,,
6808,40,18,4.617903,196.079285,597.577939,1.002033,5023125,0,68,,


In [4]:
input_data_cleaned = prediction.clean_data(input_data)
input_data_cleaned

Unnamed: 0,Site,SubCatch,Slope,Elevation,LC,SAR,Area,CV,HV,HError,Rate
0,1,1,1.203898,8.936689,319.676414,1.000254,1648125,167,211,0.000000,1.0
1,1,1,1.203898,8.936689,319.676414,1.000254,1648125,167,211,0.003310,2.0
2,1,1,1.203898,8.936689,319.676414,1.000254,1648125,167,211,0.003310,7.0
3,1,1,1.203898,8.936689,319.676414,1.000254,1648125,167,211,0.003310,15.0
4,1,1,1.203898,8.936689,319.676414,1.000254,1648125,167,211,0.035656,21.0
...,...,...,...,...,...,...,...,...,...,...,...
6784,39,9,5.605823,217.943283,640.819977,1.001974,5394375,0,26,0.003682,2000.0
6785,39,9,5.605823,217.943283,640.819977,1.001974,5394375,0,26,0.003682,2250.0
6786,39,9,5.605823,217.943283,640.819977,1.001974,5394375,0,26,0.003682,3000.0
6787,39,9,5.605823,217.943283,640.819977,1.001974,5394375,0,26,0.003682,3182.0


## Choosing the new site to make predictions for

In [11]:
# CAUTION : if running from jupyterlba => jupyter labextension install @jupyter-widgets/jupyterlab-manager
## IF this cell is not running => SKIP THIS CELL and GO TO NEXT CELL :D


from IPython.display import display, Javascript

drop = widgets.Dropdown(
    options=[('Breville-Sur-Mer', 1), ('Agon-Coutainville', 2), ('Saint-Germain-Sur-Ay', 3), ('Jullouville', 4), ('Etréham', 5), ('Genêts', 6), ('Saint-Pair-sur-Mer', 7), ('Arromanches-les-Bains', 8), ('Port-en-Bessin', 9), ('Courtils', 10), ('Lessay', 11), ('Doville', 12), ('Graye-sur-Mer', 13), ('Saint-Potan', 14), ('Saint-Vaast-la-Hougue', 15), ('La_Pernelle', 16), ('Lestre', 17), ('Banville', 18), ('Isigny-sur-Mer', 19), ('Saint-Malo', 20), ('Pierreville', 21), ('La_Feuillie', 22), ('Tourlaville', 23), ('Octeville', 24), ('Saint-Briac-sur-Mer', 25), ('Granville', 26), ('Blainville-sur-Mer', 27), ('Hauteville-sur-Mer', 28), ('Sainte-Marie-du-Mont', 29), ('Vierville-sur-Mer', 30), ('Cherrueix', 31), ('Chef-du-Pont', 32), ('Saint-Lô', 33), ('Couvains', 34), ('Rocheville', 35), ('Lison', 36), ('Marigny', 37), ('Carville', 38), ('Percy', 39), ('Saint-Armand', 40)],
    value=7,
    description='Site:',
)
button = widgets.Button(description="Valid")
output = widgets.Output()

display(drop, button, output)

def on_button_clicked(b):
    global site_number
    site_number = drop.value

button.on_click(on_button_clicked)

Dropdown(description='Site:', index=6, options=(('Breville-Sur-Mer', 1), ('Agon-Coutainville', 2), ('Saint-Ger…

Button(description='Valid', style=ButtonStyle())

Output()

In [12]:
# Manually change the site to predict the H values for ;p

site_number = 3

## Prepare datasets for training the prediction model

In [13]:
features, variable_to_predict = prediction.split_dataset_into_features_and_variable_to_predict(input_data_cleaned)
features_train, features_test, variable_train, variable_test = prediction.get_train_and_test_data(features, variable_to_predict, site_number)

## Training prediction model

In [14]:
def train_random_forest_model(X_train, y_train):
    forest = RandomForestRegressor(
        n_estimators=1000, criterion="mse", random_state=1, n_jobs=-1
    )
    forest.fit(X_train, y_train.values.ravel())
    return forest

prediction_model = train_random_forest_model(features_train, variable_train)

## Prediction

In [15]:
def predict_with_trained_model(forest, X_train, X_test):
    # Predicting results
    y_train_pred = forest.predict(X_train)
    y_test_pred = forest.predict(X_test)
    return y_train_pred, y_test_pred

variable_train_pred, variable_test_pred = predict_with_trained_model(prediction_model, features_train, features_test)

## Quality metrics for H values (MSE & NSE/R² scores)

In [16]:
subCatchment_numbers = prediction.get_subcatchment_numbers_for_a_site(site_number, variable_to_predict)
liste_variable_test_HError = prediction.get_list_variable_test_Hind_Values(variable_test)
liste_variable_test_pred_HError = variable_test_pred
mse_test, r2_test = prediction.get_standard_quality_metrics(subCatchment_numbers, liste_variable_test_HError, liste_variable_test_pred_HError)



## Get Pmax real and Pmax predicted

In [17]:
rates = prediction.get_rates_for_a_site(site_number, features)
pmax_test, pmax_pred = prediction.get_real_and_pred_pmax(subCatchment_numbers, rates, liste_variable_test_HError, variable_test_pred)

Real value of p:  {2: 1.0, 3: 1.0, 4: 1.0, 5: 1.0}
Predicted value of p:  {2: 100.0, 3: 182.0, 4: 100.0, 5: 1.0}


## Save results into files

In [18]:
prediction.save_Hind_results_into_file(site_number, subCatchment_numbers, rates, liste_variable_test_HError, variable_test_pred, approx=0, chronicle=0, permeability=27.32)
prediction.save_Pmax_results_into_file(site_number, pmax_test, pmax_pred, mse_test, r2_test, approx=0, chronicle=0, permeability=27.32)

File created here:  output/Approx0/Chronicle0/SiteTest3/Prediction_HErrorValues_SubCatch_Chronicle0_Approx0_K27.32_Slope_Elevation_LC_SAR_Area_CV_HV.csv
File created here: output/Approx0/Chronicle0/SiteTest3/Prediction_PMax_SubCatch_Chronicle0_Approx0_K27.32_Slope_Elevation_LC_SAR_Area_CV_HV.csv
