In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.impute import KNNImputer
from sklearn.linear_model import LinearRegression
from sklearn.neighbors import KNeighborsRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error, median_absolute_error, mean_squared_error, mean_squared_log_error, r2_score

In [None]:
aquifer_auser = pd.read_csv("Aquifer Dataset\Aquifer_Auser.csv")
aquifer_doganella = pd.read_csv("Aquifer Dataset\Aquifer_Doganella.csv")
aquifer_luco = pd.read_csv("Aquifer Dataset\Aquifer_Luco.csv")
aquifer_petrignano = pd.read_csv("Aquifer Dataset\Aquifer_Petrignano.csv")
lake_bilancino = pd.read_csv("Lake Dataset\Lake_Bilancino.csv")
river_arno = pd.read_csv("River Dataset\River_Arno.csv")
water_spring_amiata = pd.read_csv("Water Spring Dataset\Water_Spring_Amiata.csv")
water_spring_lupa = pd.read_csv("Water Spring Dataset\Water_Spring_Lupa.csv")
water_spring_madonna_di_canneto = pd.read_csv("Water Spring Dataset\Water_Spring_Madonna_di_Canneto.csv")

## Aquifer Auser 

In [None]:
print("Missing Values Percentage:\n", aquifer_auser.isnull().sum() * 100 / len(aquifer_auser))

In [None]:
x = aquifer_auser[['Rainfall_Gallicano', 'Rainfall_Pontetetto',
       'Rainfall_Monte_Serra', 'Rainfall_Orentano', 'Rainfall_Borgo_a_Mozzano',
       'Rainfall_Piaggione', 'Rainfall_Calavorno', 'Rainfall_Croce_Arcana',
       'Rainfall_Tereglio_Coreglia_Antelminelli',
       'Rainfall_Fabbriche_di_Vallico', 'Temperature_Orentano', 'Temperature_Monte_Serra',
       'Temperature_Ponte_a_Moriano', 'Temperature_Lucca_Orto_Botanico',
       'Volume_POL', 'Volume_CC1', 'Volume_CC2', 'Volume_CSA', 'Volume_CSAL',
       'Hydrometry_Monte_S_Quirico', 'Hydrometry_Piaggione']]
y = aquifer_auser[['Depth_to_Groundwater_SAL', 'Depth_to_Groundwater_CoS', 'Depth_to_Groundwater_LT2', 'Depth_to_Groundwater_PAG',
       'Depth_to_Groundwater_DIEC']]

k_value = [1, 3, 5, 7, 9, 11, 13, 17]
mean_error = []
median_error = []
mse_error = []
rmse_error = []
rmsle_error = []
r2_error = []

for i in k_value:
    print("k = ", i)
    imputer = KNNImputer(n_neighbors = i)
    
    imputer.fit(x)
    x_transform = imputer.transform(x)
    print("Missing values in 'x' features: ", sum(np.isnan(x_transform).flatten()))
    
    imputer.fit(y)
    y_transform = imputer.transform(y)
    print("Missing values in 'y' features: ", sum(np.isnan(y_transform).flatten()))
    
    random_forest = RandomForestRegressor()
    random_forest.fit(x_transform, y_transform)
    predict_random_forest = random_forest.predict(x_transform)
    
    mean_error.append(mean_absolute_error(y_transform, predict_random_forest))
    median_error.append(median_absolute_error(y_transform, predict_random_forest))
    mse_error.append(mean_squared_error(y_transform, predict_random_forest))
    rmse_error.append(np.sqrt(mean_squared_error(y_transform, predict_random_forest)))
    rmsle_error.append(np.sqrt(mean_squared_log_error(abs(y_transform), abs(predict_random_forest))))
    r2_error.append(r2_score(y_transform, predict_random_forest))

In [None]:
fig, axes = plt.subplots(3, 2, figsize = (10, 12))
fig.suptitle('Error Estimation')

axes[0, 0].plot(k_value, mean_error)
axes[0, 0].set_title('Mean Error')
axes[0, 1].plot(k_value, median_error)
axes[0, 1].set_title('Median Error')
axes[1, 0].plot(k_value, mse_error)
axes[1, 0].set_title('Mean Square  Error')
axes[1, 1].plot(k_value, rmse_error)
axes[1, 1].set_title('Root Mean Square Error')
axes[2, 0].plot(k_value, rmsle_error)
axes[2, 0].set_title('Root Mean Square Log Error')
axes[2, 1].plot(k_value, r2_error)
axes[2, 1].set_title('R2 Error')

In [None]:
filter_aquifer_auser = aquifer_auser[aquifer_auser[['Depth_to_Groundwater_SAL', 'Depth_to_Groundwater_CoS', 'Depth_to_Groundwater_LT2', 'Depth_to_Groundwater_PAG',
       'Depth_to_Groundwater_DIEC']].notna().all(axis = 1)]

x = filter_aquifer_auser[['Rainfall_Gallicano', 'Rainfall_Pontetetto',
       'Rainfall_Monte_Serra', 'Rainfall_Orentano', 'Rainfall_Borgo_a_Mozzano',
       'Rainfall_Piaggione', 'Rainfall_Calavorno', 'Rainfall_Croce_Arcana',
       'Rainfall_Tereglio_Coreglia_Antelminelli',
       'Rainfall_Fabbriche_di_Vallico', 'Temperature_Orentano', 'Temperature_Monte_Serra',
       'Temperature_Ponte_a_Moriano', 'Temperature_Lucca_Orto_Botanico',
       'Volume_POL', 'Volume_CC1', 'Volume_CC2', 'Volume_CSA', 'Volume_CSAL',
       'Hydrometry_Monte_S_Quirico', 'Hydrometry_Piaggione']]
y = filter_aquifer_auser[['Depth_to_Groundwater_SAL', 'Depth_to_Groundwater_CoS', 'Depth_to_Groundwater_LT2', 'Depth_to_Groundwater_PAG',
       'Depth_to_Groundwater_DIEC']]

k_value = [1, 3, 5, 7, 9, 11, 13, 17]
result_rf_median = []
result_knn_median = []
result_dt_median = []
result_lr_median = []
result_rf_r2 = []
result_knn_r2 = []
result_dt_r2 = []
result_lr_r2 = []

for i in k_value:
    imputer = KNNImputer(n_neighbors = i)
    imputer.fit(x)
    x_transform = imputer.transform(x)
       
    x = x_transform
        
    random_forest = RandomForestRegressor()
    random_forest.fit(x, y)
    predict_random_forest = random_forest.predict(x)
    
    score_rf_median = median_absolute_error(y, predict_random_forest)
    score_rf_r2 = r2_score(y, predict_random_forest)
    
    decision_tree = DecisionTreeRegressor()
    decision_tree.fit(x, y)
    predict_decision_tree = decision_tree.predict(x)
    
    score_dt_median = median_absolute_error(y, predict_decision_tree)
    score_dt_r2 = r2_score(y, predict_decision_tree)
    
    linear_regression = LinearRegression()
    linear_regression.fit(x, y)
    predict_linear_regression = linear_regression.predict(x)
    
    score_lr_median = median_absolute_error(y, predict_linear_regression)
    score_lr_r2 = r2_score(y, predict_linear_regression)
    
    knn_regressor = KNeighborsRegressor()
    knn_regressor.fit(x, y)
    predict_knn_regressor = knn_regressor.predict(x)
    
    score_knn_median = median_absolute_error(y, predict_knn_regressor)
    score_knn_r2 = r2_score(y, predict_knn_regressor)
    
    result_rf_median.append(score_rf_median)
    result_knn_median.append(score_knn_median)
    result_dt_median.append(score_dt_median)
    result_lr_median.append(score_lr_median)
    
    result_rf_r2.append(score_rf_r2)
    result_knn_r2.append(score_knn_r2)
    result_dt_r2.append(score_dt_r2)
    result_lr_r2.append(score_lr_r2)

In [None]:
fig, axes = plt.subplots(2, 2, figsize = (10, 10))
fig.suptitle('Median Error Performance')

axes[0, 0].plot(k_value, result_rf_median)
axes[0, 0].set_title('Random Forest Regressor')
axes[0, 1].plot(k_value, result_dt_median)
axes[0, 1].set_title('Decision Tree Regressor')
axes[1, 0].plot(k_value, result_lr_median)
axes[1, 0].set_title('Linear Regression')
axes[1, 1].plot(k_value, result_knn_median)
axes[1, 1].set_title('KNeighbors Regressor')

In [None]:
fig, axes = plt.subplots(2, 2, figsize = (10, 10))
fig.suptitle('R2 Score Error Performance')

axes[0, 0].plot(k_value, result_rf_r2)
axes[0, 0].set_title('Random Forest Regressor')
axes[0, 1].plot(k_value, result_dt_r2)
axes[0, 1].set_title('Decision Tree Regressor')
axes[1, 0].plot(k_value, result_lr_r2)
axes[1, 0].set_title('Linear Regression')
axes[1, 1].plot(k_value, result_knn_r2)
axes[1, 1].set_title('KNeighbors Regressor')

In [None]:
x = aquifer_auser[['Rainfall_Gallicano', 'Rainfall_Pontetetto',
       'Rainfall_Monte_Serra', 'Rainfall_Orentano', 'Rainfall_Borgo_a_Mozzano',
       'Rainfall_Piaggione', 'Rainfall_Calavorno', 'Rainfall_Croce_Arcana',
       'Rainfall_Tereglio_Coreglia_Antelminelli',
       'Rainfall_Fabbriche_di_Vallico', 'Temperature_Orentano', 'Temperature_Monte_Serra',
       'Temperature_Ponte_a_Moriano', 'Temperature_Lucca_Orto_Botanico',
       'Volume_POL', 'Volume_CC1', 'Volume_CC2', 'Volume_CSA', 'Volume_CSAL',
       'Hydrometry_Monte_S_Quirico', 'Hydrometry_Piaggione']]

imputer = KNNImputer(n_neighbors = 17)
imputer.fit(x)
x_transform = imputer.transform(x)

x = x_transform

filter_aquifer_auser = aquifer_auser[aquifer_auser[['Depth_to_Groundwater_SAL', 'Depth_to_Groundwater_CoS', 'Depth_to_Groundwater_LT2', 'Depth_to_Groundwater_PAG',
       'Depth_to_Groundwater_DIEC']].notna().all(axis = 1)]

non_null_final_x = filter_aquifer_auser[['Rainfall_Gallicano', 'Rainfall_Pontetetto',
       'Rainfall_Monte_Serra', 'Rainfall_Orentano', 'Rainfall_Borgo_a_Mozzano',
       'Rainfall_Piaggione', 'Rainfall_Calavorno', 'Rainfall_Croce_Arcana',
       'Rainfall_Tereglio_Coreglia_Antelminelli',
       'Rainfall_Fabbriche_di_Vallico', 'Temperature_Orentano', 'Temperature_Monte_Serra',
       'Temperature_Ponte_a_Moriano', 'Temperature_Lucca_Orto_Botanico',
       'Volume_POL', 'Volume_CC1', 'Volume_CC2', 'Volume_CSA', 'Volume_CSAL',
       'Hydrometry_Monte_S_Quirico', 'Hydrometry_Piaggione']]
non_null_final_y = filter_aquifer_auser[['Depth_to_Groundwater_SAL', 'Depth_to_Groundwater_CoS', 'Depth_to_Groundwater_LT2', 'Depth_to_Groundwater_PAG',
       'Depth_to_Groundwater_DIEC']]

imputer = KNNImputer(n_neighbors = 17)
imputer.fit(non_null_final_x)
non_null_final_x = imputer.transform(non_null_final_x)

random_forest = RandomForestRegressor()
random_forest.fit(non_null_final_x, non_null_final_y)
final_prediction = random_forest.predict(x)

In [None]:
final_aquifer_auser = pd.DataFrame(x, columns = ['Rainfall_Gallicano', 'Rainfall_Pontetetto',
       'Rainfall_Monte_Serra', 'Rainfall_Orentano', 'Rainfall_Borgo_a_Mozzano',
       'Rainfall_Piaggione', 'Rainfall_Calavorno', 'Rainfall_Croce_Arcana',
       'Rainfall_Tereglio_Coreglia_Antelminelli',
       'Rainfall_Fabbriche_di_Vallico', 'Temperature_Orentano', 'Temperature_Monte_Serra',
       'Temperature_Ponte_a_Moriano', 'Temperature_Lucca_Orto_Botanico',
       'Volume_POL', 'Volume_CC1', 'Volume_CC2', 'Volume_CSA', 'Volume_CSAL',
       'Hydrometry_Monte_S_Quirico', 'Hydrometry_Piaggione'])

final_aquifer_auser['Date'] = aquifer_auser['Date']
final_aquifer_auser['Depth_to_Groundwater_SAL'] = final_prediction[:, 0]
final_aquifer_auser['Depth_to_Groundwater_CoS'] = final_prediction[:, 1]
final_aquifer_auser['Depth_to_Groundwater_LT2'] = final_prediction[:, 2]
final_aquifer_auser['Depth_to_Groundwater_PAG'] = final_prediction[:, 3]
final_aquifer_auser['Depth_to_Groundwater_DIEC'] = final_prediction[:, 4]

In [None]:
final_aquifer_auser.head()

In [None]:
final_aquifer_auser.to_csv('Preprocess Datasets/final_aquifer_auser.csv', index = False)

## Aquifer Doganella

In [None]:
print("Missing Values Percentage:\n", aquifer_doganella.isnull().sum() * 100 / len(aquifer_doganella))

In [None]:
x = aquifer_doganella[['Rainfall_Monteporzio', 'Rainfall_Velletri', 'Volume_Pozzo_1', 'Volume_Pozzo_2',
       'Volume_Pozzo_3', 'Volume_Pozzo_4', 'Volume_Pozzo_5+6',
       'Volume_Pozzo_7', 'Volume_Pozzo_8', 'Volume_Pozzo_9',
       'Temperature_Monteporzio', 'Temperature_Velletri']]
y = aquifer_doganella[['Depth_to_Groundwater_Pozzo_1', 'Depth_to_Groundwater_Pozzo_2',
       'Depth_to_Groundwater_Pozzo_3', 'Depth_to_Groundwater_Pozzo_4',
       'Depth_to_Groundwater_Pozzo_5', 'Depth_to_Groundwater_Pozzo_6',
       'Depth_to_Groundwater_Pozzo_7', 'Depth_to_Groundwater_Pozzo_8',
       'Depth_to_Groundwater_Pozzo_9']]

k_value = [1, 3, 5, 7, 9, 11, 13, 17]
median_error = []
rmsle_error = []
r2_error = []

for i in k_value:
    print("k = ", i)

    imputer = KNNImputer(n_neighbors = i)
    imputer.fit(x)
    x_transform = imputer.transform(x)
    print("Missing values in 'x' features: ", sum(np.isnan(x_transform).flatten()))
    
    imputer.fit(y)
    y_transform = imputer.transform(y)
    print("Missing values in 'y' features: ", sum(np.isnan(y_transform).flatten()))
    
    random_forest = RandomForestRegressor()
    random_forest.fit(x_transform, y_transform)
    predict_random_forest = random_forest.predict(x_transform)
    
    median_error.append(median_absolute_error(y_transform, predict_random_forest))
    rmsle_error.append(np.sqrt(mean_squared_log_error(abs(y_transform), abs(predict_random_forest))))
    r2_error.append(r2_score(y_transform, predict_random_forest))

In [None]:
fig, axes = plt.subplots(1, 3, figsize = (15, 6))
fig.suptitle('Error Estimation')

axes[0].plot(k_value, median_error)
axes[0].set_title('Median Error')
axes[1].plot(k_value, rmsle_error)
axes[1].set_title('Root Mean Square Log Error')
axes[2].plot(k_value, r2_error)
axes[2].set_title('R2 Error')

In [None]:
filter_aquifer_doganella = aquifer_doganella[aquifer_doganella[['Depth_to_Groundwater_Pozzo_1', 'Depth_to_Groundwater_Pozzo_2',
       'Depth_to_Groundwater_Pozzo_3', 'Depth_to_Groundwater_Pozzo_4',
       'Depth_to_Groundwater_Pozzo_5', 'Depth_to_Groundwater_Pozzo_6',
       'Depth_to_Groundwater_Pozzo_7', 'Depth_to_Groundwater_Pozzo_8',
       'Depth_to_Groundwater_Pozzo_9']].notna().all(axis = 1)]

x = filter_aquifer_doganella[['Rainfall_Monteporzio', 'Rainfall_Velletri', 'Volume_Pozzo_1', 'Volume_Pozzo_2',
       'Volume_Pozzo_3', 'Volume_Pozzo_4', 'Volume_Pozzo_5+6',
       'Volume_Pozzo_7', 'Volume_Pozzo_8', 'Volume_Pozzo_9',
       'Temperature_Monteporzio', 'Temperature_Velletri']]
y = filter_aquifer_doganella[['Depth_to_Groundwater_Pozzo_1', 'Depth_to_Groundwater_Pozzo_2',
       'Depth_to_Groundwater_Pozzo_3', 'Depth_to_Groundwater_Pozzo_4',
       'Depth_to_Groundwater_Pozzo_5', 'Depth_to_Groundwater_Pozzo_6',
       'Depth_to_Groundwater_Pozzo_7', 'Depth_to_Groundwater_Pozzo_8',
       'Depth_to_Groundwater_Pozzo_9']]

k_value = [1, 3, 5, 7, 9, 11, 13, 17]
result_rf_median = []
result_knn_median = []
result_dt_median = []
result_lr_median = []
result_rf_r2 = []
result_knn_r2 = []
result_dt_r2 = []
result_lr_r2 = []

for i in k_value:
    imputer = KNNImputer(n_neighbors = i)
    imputer.fit(x)
    x_transform = imputer.transform(x)
       
    x = x_transform
        
    random_forest = RandomForestRegressor()
    random_forest.fit(x, y)
    predict_random_forest = random_forest.predict(x)
    
    score_rf_median = median_absolute_error(y, predict_random_forest)
    score_rf_r2 = r2_score(y, predict_random_forest)
    
    decision_tree = DecisionTreeRegressor()
    decision_tree.fit(x, y)
    predict_decision_tree = decision_tree.predict(x)
    
    score_dt_median = median_absolute_error(y, predict_decision_tree)
    score_dt_r2 = r2_score(y, predict_decision_tree)
    
    linear_regression = LinearRegression()
    linear_regression.fit(x, y)
    predict_linear_regression = linear_regression.predict(x)
    
    score_lr_median = median_absolute_error(y, predict_linear_regression)
    score_lr_r2 = r2_score(y, predict_linear_regression)
    
    knn_regressor = KNeighborsRegressor()
    knn_regressor.fit(x, y)
    predict_knn_regressor = knn_regressor.predict(x)
    
    score_knn_median = median_absolute_error(y, predict_knn_regressor)
    score_knn_r2 = r2_score(y, predict_knn_regressor)
    
    result_rf_median.append(score_rf_median)
    result_knn_median.append(score_knn_median)
    result_dt_median.append(score_dt_median)
    result_lr_median.append(score_lr_median)
    
    result_rf_r2.append(score_rf_r2)
    result_knn_r2.append(score_knn_r2)
    result_dt_r2.append(score_dt_r2)
    result_lr_r2.append(score_lr_r2)

In [None]:
fig, axes = plt.subplots(2, 2, figsize = (10, 10))
fig.suptitle('Median Error Performance')

axes[0, 0].plot(k_value, result_rf_median)
axes[0, 0].set_title('Random Forest Regressor')
axes[0, 1].plot(k_value, result_dt_median)
axes[0, 1].set_title('Decision Tree Regressor')
axes[1, 0].plot(k_value, result_lr_median)
axes[1, 0].set_title('Linear Regression')
axes[1, 1].plot(k_value, result_knn_median)
axes[1, 1].set_title('KNeighbors Regressor')

In [None]:
fig, axes = plt.subplots(2, 2, figsize = (10, 10))
fig.suptitle('R2 Score Error Performance')

axes[0, 0].plot(k_value, result_rf_r2)
axes[0, 0].set_title('Random Forest Regressor')
axes[0, 1].plot(k_value, result_dt_r2)
axes[0, 1].set_title('Decision Tree Regressor')
axes[1, 0].plot(k_value, result_lr_r2)
axes[1, 0].set_title('Linear Regression')
axes[1, 1].plot(k_value, result_knn_r2)
axes[1, 1].set_title('KNeighbors Regressor')

In [None]:
x = aquifer_doganella[['Rainfall_Monteporzio', 'Rainfall_Velletri', 'Volume_Pozzo_1', 'Volume_Pozzo_2',
       'Volume_Pozzo_3', 'Volume_Pozzo_4', 'Volume_Pozzo_5+6',
       'Volume_Pozzo_7', 'Volume_Pozzo_8', 'Volume_Pozzo_9',
       'Temperature_Monteporzio', 'Temperature_Velletri']]

imputer = KNNImputer(n_neighbors = 17)
imputer.fit(x)
x_transform = imputer.transform(x)

x = x_transform

filter_aquifer_doganella = aquifer_doganella[aquifer_doganella[['Depth_to_Groundwater_Pozzo_1', 'Depth_to_Groundwater_Pozzo_2',
       'Depth_to_Groundwater_Pozzo_3', 'Depth_to_Groundwater_Pozzo_4',
       'Depth_to_Groundwater_Pozzo_5', 'Depth_to_Groundwater_Pozzo_6',
       'Depth_to_Groundwater_Pozzo_7', 'Depth_to_Groundwater_Pozzo_8',
       'Depth_to_Groundwater_Pozzo_9']].notna().all(axis = 1)]

non_null_final_x = filter_aquifer_doganella[['Rainfall_Monteporzio', 'Rainfall_Velletri', 'Volume_Pozzo_1', 'Volume_Pozzo_2',
       'Volume_Pozzo_3', 'Volume_Pozzo_4', 'Volume_Pozzo_5+6',
       'Volume_Pozzo_7', 'Volume_Pozzo_8', 'Volume_Pozzo_9',
       'Temperature_Monteporzio', 'Temperature_Velletri']]
non_null_final_y = filter_aquifer_doganella[['Depth_to_Groundwater_Pozzo_1', 'Depth_to_Groundwater_Pozzo_2',
       'Depth_to_Groundwater_Pozzo_3', 'Depth_to_Groundwater_Pozzo_4',
       'Depth_to_Groundwater_Pozzo_5', 'Depth_to_Groundwater_Pozzo_6',
       'Depth_to_Groundwater_Pozzo_7', 'Depth_to_Groundwater_Pozzo_8',
       'Depth_to_Groundwater_Pozzo_9']]

imputer = KNNImputer(n_neighbors = 13)
imputer.fit(non_null_final_x)
non_null_final_x = imputer.transform(non_null_final_x)

random_forest = DecisionTreeRegressor()
random_forest.fit(non_null_final_x, non_null_final_y)
final_prediction = random_forest.predict(x)

In [None]:
final_aquifer_doganella = pd.DataFrame(x, columns = ['Rainfall_Monteporzio', 'Rainfall_Velletri', 'Volume_Pozzo_1', 'Volume_Pozzo_2',
       'Volume_Pozzo_3', 'Volume_Pozzo_4', 'Volume_Pozzo_5+6',
       'Volume_Pozzo_7', 'Volume_Pozzo_8', 'Volume_Pozzo_9',
       'Temperature_Monteporzio', 'Temperature_Velletri'])

final_aquifer_doganella['Date'] = aquifer_doganella['Date']
final_aquifer_doganella['Depth_to_Groundwater_Pozzo_1'] = final_prediction[:, 0]
final_aquifer_doganella['Depth_to_Groundwater_Pozzo_2'] = final_prediction[:, 1]
final_aquifer_doganella['Depth_to_Groundwater_Pozzo_3'] = final_prediction[:, 2]
final_aquifer_doganella['Depth_to_Groundwater_Pozzo_4'] = final_prediction[:, 3]
final_aquifer_doganella['Depth_to_Groundwater_Pozzo_5'] = final_prediction[:, 4]
final_aquifer_doganella['Depth_to_Groundwater_Pozzo_6'] = final_prediction[:, 5]
final_aquifer_doganella['Depth_to_Groundwater_Pozzo_7'] = final_prediction[:, 6]
final_aquifer_doganella['Depth_to_Groundwater_Pozzo_8'] = final_prediction[:, 7]
final_aquifer_doganella['Depth_to_Groundwater_Pozzo_9'] = final_prediction[:, 8]

In [None]:
final_aquifer_doganella.to_csv('Preprocess Datasets/final_aquifer_doganella.csv', index = False)

## Aquifer Luco

In [None]:
print("Missing Values Percentage:\n", aquifer_luco.isnull().sum() * 100 / len(aquifer_luco))

In [None]:
x = aquifer_luco[['Rainfall_Simignano', 'Rainfall_Siena_Poggio_al_Vento',
       'Rainfall_Mensano', 'Rainfall_Montalcinello',
       'Rainfall_Monticiano_la_Pineta', 'Rainfall_Sovicille',
       'Rainfall_Ponte_Orgia', 'Rainfall_Scorgiano', 'Rainfall_Pentolina',
       'Rainfall_Monteroni_Arbia_Biena', 'Temperature_Siena_Poggio_al_Vento',
       'Temperature_Mensano', 'Temperature_Pentolina',
       'Temperature_Monteroni_Arbia_Biena', 'Volume_Pozzo_1', 'Volume_Pozzo_3',
       'Volume_Pozzo_4']]
y = aquifer_luco[['Depth_to_Groundwater_Podere_Casetta',
       'Depth_to_Groundwater_Pozzo_1', 'Depth_to_Groundwater_Pozzo_3',
       'Depth_to_Groundwater_Pozzo_4']]

k_value = [1, 3, 5, 7, 9, 11, 13, 17]
median_error = []
rmsle_error = []
r2_error = []

for i in k_value:
    print("k = ", i)

    imputer = KNNImputer(n_neighbors = i)
    imputer.fit(x)
    x_transform = imputer.transform(x)
    print("Missing values in 'x' features: ", sum(np.isnan(x_transform).flatten()))
    
    imputer.fit(y)
    y_transform = imputer.transform(y)
    print("Missing values in 'y' features: ", sum(np.isnan(y_transform).flatten()))
    
    random_forest = RandomForestRegressor()
    random_forest.fit(x_transform, y_transform)
    predict_random_forest = random_forest.predict(x_transform)
    
    median_error.append(median_absolute_error(y_transform, predict_random_forest))
    rmsle_error.append(np.sqrt(mean_squared_log_error(abs(y_transform), abs(predict_random_forest))))
    r2_error.append(r2_score(y_transform, predict_random_forest))

In [None]:
fig, axes = plt.subplots(1, 3, figsize = (15, 6))
fig.suptitle('Error Estimation')

axes[0].plot(k_value, median_error)
axes[0].set_title('Median Error')
axes[1].plot(k_value, rmsle_error)
axes[1].set_title('Root Mean Square Log Error')
axes[2].plot(k_value, r2_error)
axes[2].set_title('R2 Error')

In [None]:
filter_aquifer_luco = aquifer_luco[aquifer_luco[['Depth_to_Groundwater_Podere_Casetta',
       'Depth_to_Groundwater_Pozzo_1', 'Depth_to_Groundwater_Pozzo_3',
       'Depth_to_Groundwater_Pozzo_4']].notna().all(axis = 1)]

x = filter_aquifer_luco[['Rainfall_Simignano', 'Rainfall_Siena_Poggio_al_Vento',
       'Rainfall_Mensano', 'Rainfall_Montalcinello',
       'Rainfall_Monticiano_la_Pineta', 'Rainfall_Sovicille',
       'Rainfall_Ponte_Orgia', 'Rainfall_Scorgiano', 'Rainfall_Pentolina',
       'Rainfall_Monteroni_Arbia_Biena', 'Temperature_Siena_Poggio_al_Vento',
       'Temperature_Mensano', 'Temperature_Pentolina',
       'Temperature_Monteroni_Arbia_Biena', 'Volume_Pozzo_1', 'Volume_Pozzo_3',
       'Volume_Pozzo_4']]
y = filter_aquifer_luco[['Depth_to_Groundwater_Podere_Casetta',
       'Depth_to_Groundwater_Pozzo_1', 'Depth_to_Groundwater_Pozzo_3',
       'Depth_to_Groundwater_Pozzo_4']]

k_value = [1, 3, 5, 7, 9, 11, 13, 17]
result_rf_median = []
result_knn_median = []
result_dt_median = []
result_lr_median = []
result_rf_r2 = []
result_knn_r2 = []
result_dt_r2 = []
result_lr_r2 = []

for i in k_value:
    imputer = KNNImputer(n_neighbors = i)
    imputer.fit(x)
    x_transform = imputer.transform(x)
       
    x = x_transform
        
    random_forest = RandomForestRegressor()
    random_forest.fit(x, y)
    predict_random_forest = random_forest.predict(x)
    
    score_rf_median = median_absolute_error(y, predict_random_forest)
    score_rf_r2 = r2_score(y, predict_random_forest)
    
    decision_tree = DecisionTreeRegressor()
    decision_tree.fit(x, y)
    predict_decision_tree = decision_tree.predict(x)
    
    score_dt_median = median_absolute_error(y, predict_decision_tree)
    score_dt_r2 = r2_score(y, predict_decision_tree)
    
    linear_regression = LinearRegression()
    linear_regression.fit(x, y)
    predict_linear_regression = linear_regression.predict(x)
    
    score_lr_median = median_absolute_error(y, predict_linear_regression)
    score_lr_r2 = r2_score(y, predict_linear_regression)
    
    knn_regressor = KNeighborsRegressor()
    knn_regressor.fit(x, y)
    predict_knn_regressor = knn_regressor.predict(x)
    
    score_knn_median = median_absolute_error(y, predict_knn_regressor)
    score_knn_r2 = r2_score(y, predict_knn_regressor)
    
    result_rf_median.append(score_rf_median)
    result_knn_median.append(score_knn_median)
    result_dt_median.append(score_dt_median)
    result_lr_median.append(score_lr_median)
    
    result_rf_r2.append(score_rf_r2)
    result_knn_r2.append(score_knn_r2)
    result_dt_r2.append(score_dt_r2)
    result_lr_r2.append(score_lr_r2)

In [None]:
fig, axes = plt.subplots(2, 2, figsize = (10, 10))
fig.suptitle('Median Error Performance')

axes[0, 0].plot(k_value, result_rf_median)
axes[0, 0].set_title('Random Forest Regressor')
axes[0, 1].plot(k_value, result_dt_median)
axes[0, 1].set_title('Decision Tree Regressor')
axes[1, 0].plot(k_value, result_lr_median)
axes[1, 0].set_title('Linear Regression')
axes[1, 1].plot(k_value, result_knn_median)
axes[1, 1].set_title('KNeighbors Regressor')

In [None]:
fig, axes = plt.subplots(2, 2, figsize = (10, 10))
fig.suptitle('R2 Score Error Performance')

axes[0, 0].plot(k_value, result_rf_r2)
axes[0, 0].set_title('Random Forest Regressor')
axes[0, 1].plot(k_value, result_dt_r2)
axes[0, 1].set_title('Decision Tree Regressor')
axes[1, 0].plot(k_value, result_lr_r2)
axes[1, 0].set_title('Linear Regression')
axes[1, 1].plot(k_value, result_knn_r2)
axes[1, 1].set_title('KNeighbors Regressor')

In [None]:
x = aquifer_luco[['Rainfall_Simignano', 'Rainfall_Siena_Poggio_al_Vento',
       'Rainfall_Mensano', 'Rainfall_Montalcinello',
       'Rainfall_Monticiano_la_Pineta', 'Rainfall_Sovicille',
       'Rainfall_Ponte_Orgia', 'Rainfall_Scorgiano', 'Rainfall_Pentolina',
       'Rainfall_Monteroni_Arbia_Biena', 'Temperature_Siena_Poggio_al_Vento',
       'Temperature_Mensano', 'Temperature_Pentolina',
       'Temperature_Monteroni_Arbia_Biena', 'Volume_Pozzo_1', 'Volume_Pozzo_3',
       'Volume_Pozzo_4']]

imputer = KNNImputer(n_neighbors = 13)
imputer.fit(x)
x_transform = imputer.transform(x)

x = x_transform

filter_aquifer_luco = aquifer_luco[aquifer_luco[['Depth_to_Groundwater_Podere_Casetta',
       'Depth_to_Groundwater_Pozzo_1', 'Depth_to_Groundwater_Pozzo_3',
       'Depth_to_Groundwater_Pozzo_4']].notna().all(axis = 1)]

non_null_final_x = filter_aquifer_luco[['Rainfall_Simignano', 'Rainfall_Siena_Poggio_al_Vento',
       'Rainfall_Mensano', 'Rainfall_Montalcinello',
       'Rainfall_Monticiano_la_Pineta', 'Rainfall_Sovicille',
       'Rainfall_Ponte_Orgia', 'Rainfall_Scorgiano', 'Rainfall_Pentolina',
       'Rainfall_Monteroni_Arbia_Biena', 'Temperature_Siena_Poggio_al_Vento',
       'Temperature_Mensano', 'Temperature_Pentolina',
       'Temperature_Monteroni_Arbia_Biena', 'Volume_Pozzo_1', 'Volume_Pozzo_3',
       'Volume_Pozzo_4']]
non_null_final_y = filter_aquifer_luco[['Depth_to_Groundwater_Podere_Casetta',
       'Depth_to_Groundwater_Pozzo_1', 'Depth_to_Groundwater_Pozzo_3',
       'Depth_to_Groundwater_Pozzo_4']]

imputer = KNNImputer(n_neighbors = 13)
imputer.fit(non_null_final_x)
non_null_final_x = imputer.transform(non_null_final_x)

random_forest = RandomForestRegressor()
random_forest.fit(non_null_final_x, non_null_final_y)
final_prediction = random_forest.predict(x)

In [None]:
final_aquifer_luco = pd.DataFrame(x, columns = ['Rainfall_Simignano', 'Rainfall_Siena_Poggio_al_Vento',
       'Rainfall_Mensano', 'Rainfall_Montalcinello',
       'Rainfall_Monticiano_la_Pineta', 'Rainfall_Sovicille',
       'Rainfall_Ponte_Orgia', 'Rainfall_Scorgiano', 'Rainfall_Pentolina',
       'Rainfall_Monteroni_Arbia_Biena', 'Temperature_Siena_Poggio_al_Vento',
       'Temperature_Mensano', 'Temperature_Pentolina',
       'Temperature_Monteroni_Arbia_Biena', 'Volume_Pozzo_1', 'Volume_Pozzo_3',
       'Volume_Pozzo_4'])

final_aquifer_luco['Date'] = aquifer_luco['Date']
final_aquifer_luco['Depth_to_Groundwater_Podere_Casetta'] = final_prediction[:, 0]
final_aquifer_luco['Depth_to_Groundwater_Pozzo_1'] = final_prediction[:, 1]
final_aquifer_luco['Depth_to_Groundwater_Pozzo_3'] = final_prediction[:, 2]
final_aquifer_luco['Depth_to_Groundwater_Pozzo_4'] = final_prediction[:, 3]

In [None]:
final_aquifer_luco.to_csv('Preprocess Datasets/final_aquifer_luco.csv', index = False)

## Aquifer Petrignano

In [None]:
print("Missing Values Percentage:\n", aquifer_petrignano.isnull().sum() * 100 / len(aquifer_petrignano))

In [None]:
x = aquifer_petrignano[['Rainfall_Bastia_Umbra', 'Temperature_Bastia_Umbra',
       'Temperature_Petrignano', 'Volume_C10_Petrignano',
       'Hydrometry_Fiume_Chiascio_Petrignano']]
y = aquifer_petrignano[['Depth_to_Groundwater_P24', 'Depth_to_Groundwater_P25']]

k_value = [1, 3, 5, 7, 9, 11, 13, 17]
median_error = []
rmsle_error = []
r2_error = []

for i in k_value:
    print("k = ", i)

    imputer = KNNImputer(n_neighbors = i)
    imputer.fit(x)
    x_transform = imputer.transform(x)
    print("Missing values in 'x' features: ", sum(np.isnan(x_transform).flatten()))
    
    imputer.fit(y)
    y_transform = imputer.transform(y)
    print("Missing values in 'y' features: ", sum(np.isnan(y_transform).flatten()))
    
    random_forest = RandomForestRegressor()
    random_forest.fit(x_transform, y_transform)
    predict_random_forest = random_forest.predict(x_transform)
    
    median_error.append(median_absolute_error(y_transform, predict_random_forest))
    rmsle_error.append(np.sqrt(mean_squared_log_error(abs(y_transform), abs(predict_random_forest))))
    r2_error.append(r2_score(y_transform, predict_random_forest))

In [None]:
fig, axes = plt.subplots(1, 3, figsize = (15, 6))
fig.suptitle('Error Estimation')

axes[0].plot(k_value, median_error)
axes[0].set_title('Median Error')
axes[1].plot(k_value, rmsle_error)
axes[1].set_title('Root Mean Square Log Error')
axes[2].plot(k_value, r2_error)
axes[2].set_title('R2 Error')

In [None]:
filter_aquifer_petrignano = aquifer_petrignano[aquifer_petrignano[['Depth_to_Groundwater_P24', 'Depth_to_Groundwater_P25']].notna().all(axis = 1)]

x = filter_aquifer_petrignano[['Rainfall_Bastia_Umbra', 'Temperature_Bastia_Umbra',
       'Temperature_Petrignano', 'Volume_C10_Petrignano',
       'Hydrometry_Fiume_Chiascio_Petrignano']]
y = filter_aquifer_petrignano[['Depth_to_Groundwater_P24', 'Depth_to_Groundwater_P25']]

k_value = [1, 3, 5, 7, 9, 11, 13, 17]
result_rf_median = []
result_knn_median = []
result_dt_median = []
result_lr_median = []
result_rf_r2 = []
result_knn_r2 = []
result_dt_r2 = []
result_lr_r2 = []

for i in k_value:
    imputer = KNNImputer(n_neighbors = i)
    imputer.fit(x)
    x_transform = imputer.transform(x)
       
    x = x_transform
        
    random_forest = RandomForestRegressor()
    random_forest.fit(x, y)
    predict_random_forest = random_forest.predict(x)
    
    score_rf_median = median_absolute_error(y, predict_random_forest)
    score_rf_r2 = r2_score(y, predict_random_forest)
    
    decision_tree = DecisionTreeRegressor()
    decision_tree.fit(x, y)
    predict_decision_tree = decision_tree.predict(x)
    
    score_dt_median = median_absolute_error(y, predict_decision_tree)
    score_dt_r2 = r2_score(y, predict_decision_tree)
    
    linear_regression = LinearRegression()
    linear_regression.fit(x, y)
    predict_linear_regression = linear_regression.predict(x)
    
    score_lr_median = median_absolute_error(y, predict_linear_regression)
    score_lr_r2 = r2_score(y, predict_linear_regression)
    
    knn_regressor = KNeighborsRegressor()
    knn_regressor.fit(x, y)
    predict_knn_regressor = knn_regressor.predict(x)
    
    score_knn_median = median_absolute_error(y, predict_knn_regressor)
    score_knn_r2 = r2_score(y, predict_knn_regressor)
    
    result_rf_median.append(score_rf_median)
    result_knn_median.append(score_knn_median)
    result_dt_median.append(score_dt_median)
    result_lr_median.append(score_lr_median)
    
    result_rf_r2.append(score_rf_r2)
    result_knn_r2.append(score_knn_r2)
    result_dt_r2.append(score_dt_r2)
    result_lr_r2.append(score_lr_r2)

In [None]:
fig, axes = plt.subplots(2, 2, figsize = (10, 10))
fig.suptitle('Median Error Performance')

axes[0, 0].plot(k_value, result_rf_median)
axes[0, 0].set_title('Random Forest Regressor')
axes[0, 1].plot(k_value, result_dt_median)
axes[0, 1].set_title('Decision Tree Regressor')
axes[1, 0].plot(k_value, result_lr_median)
axes[1, 0].set_title('Linear Regression')
axes[1, 1].plot(k_value, result_knn_median)
axes[1, 1].set_title('KNeighbors Regressor')

In [None]:
fig, axes = plt.subplots(2, 2, figsize = (10, 10))
fig.suptitle('R2 Score Error Performance')

axes[0, 0].plot(k_value, result_rf_r2)
axes[0, 0].set_title('Random Forest Regressor')
axes[0, 1].plot(k_value, result_dt_r2)
axes[0, 1].set_title('Decision Tree Regressor')
axes[1, 0].plot(k_value, result_lr_r2)
axes[1, 0].set_title('Linear Regression')
axes[1, 1].plot(k_value, result_knn_r2)
axes[1, 1].set_title('KNeighbors Regressor')

In [None]:
x = aquifer_petrignano[['Rainfall_Bastia_Umbra', 'Temperature_Bastia_Umbra',
       'Temperature_Petrignano', 'Volume_C10_Petrignano',
       'Hydrometry_Fiume_Chiascio_Petrignano']]

imputer = KNNImputer(n_neighbors = 17)
imputer.fit(x)
x_transform = imputer.transform(x)

x = x_transform

filter_aquifer_petrignano = aquifer_petrignano[aquifer_petrignano[['Depth_to_Groundwater_P24', 'Depth_to_Groundwater_P25']].notna().all(axis = 1)]

non_null_final_x = filter_aquifer_petrignano[['Rainfall_Bastia_Umbra', 'Temperature_Bastia_Umbra',
       'Temperature_Petrignano', 'Volume_C10_Petrignano',
       'Hydrometry_Fiume_Chiascio_Petrignano']]
non_null_final_y = filter_aquifer_petrignano[['Depth_to_Groundwater_P24', 'Depth_to_Groundwater_P25']]

imputer = KNNImputer(n_neighbors = 17)
imputer.fit(non_null_final_x)
non_null_final_x = imputer.transform(non_null_final_x)

random_forest = DecisionTreeRegressor()
random_forest.fit(non_null_final_x, non_null_final_y)
final_prediction = random_forest.predict(x)

In [None]:
final_aquifer_petrignano = pd.DataFrame(x, columns = ['Rainfall_Bastia_Umbra', 'Temperature_Bastia_Umbra',
       'Temperature_Petrignano', 'Volume_C10_Petrignano',
       'Hydrometry_Fiume_Chiascio_Petrignano'])

final_aquifer_petrignano['Date'] = aquifer_petrignano['Date']
final_aquifer_petrignano['Depth_to_Groundwater_P24'] = final_prediction[:, 0]
final_aquifer_petrignano['Depth_to_Groundwater_P25'] = final_prediction[:, 1]

In [None]:
final_aquifer_petrignano.to_csv('Preprocess Datasets/final_aquifer_petrignano.csv', index = False)

## Lake Bilancino

In [None]:
print("Missing Values Percentage:\n", aquifer_auser.isnull().sum() * 100 / len(aquifer_auser))

In [None]:
x = lake_bilancino[['Rainfall_S_Piero', 'Rainfall_Mangona', 'Rainfall_S_Agata', 'Rainfall_Cavallina',
                    'Rainfall_Le_Croci', 'Temperature_Le_Croci']]
y = lake_bilancino[['Lake_Level', 'Flow_Rate']]

k_value = [1, 3, 5, 7, 9, 11, 13, 17]
mean_error = []
median_error = []
mse_error = []
rmse_error = []
rmsle_error = []
r2_error = []

for i in k_value:
    print("k = ", i)
    imputer = KNNImputer(n_neighbors = i)
    
    imputer.fit(x)
    x_transform = imputer.transform(x)
    print("Missing values in 'x' features: ", sum(np.isnan(x_transform).flatten()))
    
    imputer.fit(y)
    y_transform = imputer.transform(y)
    print("Missing values in 'y' features: ", sum(np.isnan(y_transform).flatten()))
    
    random_forest = RandomForestRegressor()
    random_forest.fit(x_transform, y_transform)
    predict_random_forest = random_forest.predict(x_transform)
    
    mean_error.append(mean_absolute_error(y_transform, predict_random_forest))
    median_error.append(median_absolute_error(y_transform, predict_random_forest))
    mse_error.append(mean_squared_error(y_transform, predict_random_forest))
    rmse_error.append(np.sqrt(mean_squared_error(y_transform, predict_random_forest)))
    rmsle_error.append(np.sqrt(mean_squared_log_error(abs(y_transform), abs(predict_random_forest))))
    r2_error.append(r2_score(y_transform, predict_random_forest))

In [None]:
fig, axes = plt.subplots(3, 2, figsize = (10, 12))
fig.suptitle('Error Estimation')

axes[0, 0].plot(k_value, mean_error)
axes[0, 0].set_title('Mean Error')
axes[0, 1].plot(k_value, median_error)
axes[0, 1].set_title('Median Error')
axes[1, 0].plot(k_value, mse_error)
axes[1, 0].set_title('Mean Square  Error')
axes[1, 1].plot(k_value, rmse_error)
axes[1, 1].set_title('Root Mean Square Error')
axes[2, 0].plot(k_value, rmsle_error)
axes[2, 0].set_title('Root Mean Square Log Error')
axes[2, 1].plot(k_value, r2_error)
axes[2, 1].set_title('R2 Error')

In [None]:
filter_lake_bilancino = lake_bilancino[lake_bilancino[['Lake_Level', 'Flow_Rate']].notna().all(axis = 1)]

x = filter_lake_bilancino[['Rainfall_S_Piero', 'Rainfall_Mangona', 'Rainfall_S_Agata', 'Rainfall_Cavallina',
                          'Rainfall_Le_Croci', 'Temperature_Le_Croci']]
y = filter_lake_bilancino[['Lake_Level', 'Flow_Rate']]

k_value = [1, 3, 5, 7, 9, 11, 13, 17]
result_rf_median = []
result_knn_median = []
result_dt_median = []
result_lr_median = []
result_rf_r2 = []
result_knn_r2 = []
result_dt_r2 = []
result_lr_r2 = []

for i in k_value:
    imputer = KNNImputer(n_neighbors = i)
    imputer.fit(x)
    x_transform = imputer.transform(x)
       
    x = x_transform
        
    random_forest = RandomForestRegressor()
    random_forest.fit(x, y)
    predict_random_forest = random_forest.predict(x)
    
    score_rf_median = median_absolute_error(y, predict_random_forest)
    score_rf_r2 = r2_score(y, predict_random_forest)
    
    decision_tree = DecisionTreeRegressor()
    decision_tree.fit(x, y)
    predict_decision_tree = decision_tree.predict(x)
    
    score_dt_median = median_absolute_error(y, predict_decision_tree)
    score_dt_r2 = r2_score(y, predict_decision_tree)
    
    linear_regression = LinearRegression()
    linear_regression.fit(x, y)
    predict_linear_regression = linear_regression.predict(x)
    
    score_lr_median = median_absolute_error(y, predict_linear_regression)
    score_lr_r2 = r2_score(y, predict_linear_regression)
    
    knn_regressor = KNeighborsRegressor()
    knn_regressor.fit(x, y)
    predict_knn_regressor = knn_regressor.predict(x)
    
    score_knn_median = median_absolute_error(y, predict_knn_regressor)
    score_knn_r2 = r2_score(y, predict_knn_regressor)
    
    result_rf_median.append(score_rf_median)
    result_knn_median.append(score_knn_median)
    result_dt_median.append(score_dt_median)
    result_lr_median.append(score_lr_median)
    
    result_rf_r2.append(score_rf_r2)
    result_knn_r2.append(score_knn_r2)
    result_dt_r2.append(score_dt_r2)
    result_lr_r2.append(score_lr_r2)

In [None]:
fig, axes = plt.subplots(2, 2, figsize = (10, 10))
fig.suptitle('Median Error Performance')

axes[0, 0].plot(k_value, result_rf_median)
axes[0, 0].set_title('Random Forest Regressor')
axes[0, 1].plot(k_value, result_dt_median)
axes[0, 1].set_title('Decision Tree Regressor')
axes[1, 0].plot(k_value, result_lr_median)
axes[1, 0].set_title('Linear Regression')
axes[1, 1].plot(k_value, result_knn_median)
axes[1, 1].set_title('KNeighbors Regressor')

In [None]:
fig, axes = plt.subplots(2, 2, figsize = (10, 10))
fig.suptitle('R2 Score Error Performance')

axes[0, 0].plot(k_value, result_rf_r2)
axes[0, 0].set_title('Random Forest Regressor')
axes[0, 1].plot(k_value, result_dt_r2)
axes[0, 1].set_title('Decision Tree Regressor')
axes[1, 0].plot(k_value, result_lr_r2)
axes[1, 0].set_title('Linear Regression')
axes[1, 1].plot(k_value, result_knn_r2)
axes[1, 1].set_title('KNeighbors Regressor')

In [None]:
x = lake_bilancino[['Rainfall_S_Piero', 'Rainfall_Mangona', 'Rainfall_S_Agata', 'Rainfall_Cavallina',
                    'Rainfall_Le_Croci', 'Temperature_Le_Croci']]

imputer = KNNImputer(n_neighbors = 17)
imputer.fit(x)
x_transform = imputer.transform(x)

x = x_transform

filter_lake_bilancino = lake_bilancino[lake_bilancino[['Lake_Level', 'Flow_Rate']].notna().all(axis = 1)]

non_null_final_x = filter_lake_bilancino[['Rainfall_S_Piero', 'Rainfall_Mangona', 'Rainfall_S_Agata', 'Rainfall_Cavallina',
                    'Rainfall_Le_Croci', 'Temperature_Le_Croci']]
non_null_final_y = filter_lake_bilancino[['Lake_Level', 'Flow_Rate']]

imputer = KNNImputer(n_neighbors = 17)
imputer.fit(non_null_final_x)
non_null_final_x = imputer.transform(non_null_final_x)

random_forest = RandomForestRegressor()
random_forest.fit(non_null_final_x, non_null_final_y)
final_prediction = random_forest.predict(x)

In [None]:
final_lake_bilancino = pd.DataFrame(x, columns = ['Rainfall_S_Piero', 'Rainfall_Mangona', 'Rainfall_S_Agata',
                                                 'Rainfall_Cavallina', 'Rainfall_Le_Croci', 'Temperature_Le_Croci'])

final_lake_bilancino['Date'] = lake_bilancino['Date']
final_lake_bilancino['Lake_Level'] = final_prediction[:, 0]
final_lake_bilancino['Flow_Rate'] = final_prediction[:, 1]

In [None]:
final_lake_bilancino.head()

In [None]:
final_lake_bilancino.to_csv('Preprocess Datasets/final_lake_bilancino.csv', index = False)

## River Arno

In [None]:
print("Missing Values Percentage:\n", river_arno.isnull().sum() * 100 / len(river_arno))

In [None]:
x = river_arno[['Rainfall_Le_Croci', 'Rainfall_Cavallina', 'Rainfall_S_Agata', 'Rainfall_Mangona', 'Rainfall_S_Piero',
                'Rainfall_Vernio', 'Rainfall_Stia', 'Rainfall_Consuma', 'Rainfall_Incisa', 'Rainfall_Montevarchi',
                'Rainfall_S_Savino', 'Rainfall_Laterina', 'Rainfall_Bibbiena', 'Rainfall_Camaldoli', 'Temperature_Firenze']]
y = river_arno[['Hydrometry_Nave_di_Rosano']]

k_value = [1, 3, 5, 7, 9, 11, 13, 17]
mean_error = []
median_error = []
mse_error = []
rmse_error = []
rmsle_error = []
r2_error = []

for i in k_value:
    print("k = ", i)
    imputer = KNNImputer(n_neighbors = i)
    
    imputer.fit(x)
    x_transform = imputer.transform(x)
    print("Missing values in 'x' features: ", sum(np.isnan(x_transform).flatten()))
    
    imputer.fit(y)
    y_transform = imputer.transform(y)
    print("Missing values in 'y' features: ", sum(np.isnan(y_transform).flatten()))
    
    random_forest = RandomForestRegressor()
    random_forest.fit(x_transform, y_transform)
    predict_random_forest = random_forest.predict(x_transform)
    
    mean_error.append(mean_absolute_error(y_transform, predict_random_forest))
    median_error.append(median_absolute_error(y_transform, predict_random_forest))
    mse_error.append(mean_squared_error(y_transform, predict_random_forest))
    rmse_error.append(np.sqrt(mean_squared_error(y_transform, predict_random_forest)))
    rmsle_error.append(np.sqrt(mean_squared_log_error(abs(y_transform), abs(predict_random_forest))))
    r2_error.append(r2_score(y_transform, predict_random_forest))

In [None]:
fig, axes = plt.subplots(3, 2, figsize = (10, 12))
fig.suptitle('Error Estimation')

axes[0, 0].plot(k_value, mean_error)
axes[0, 0].set_title('Mean Error')
axes[0, 1].plot(k_value, median_error)
axes[0, 1].set_title('Median Error')
axes[1, 0].plot(k_value, mse_error)
axes[1, 0].set_title('Mean Square  Error')
axes[1, 1].plot(k_value, rmse_error)
axes[1, 1].set_title('Root Mean Square Error')
axes[2, 0].plot(k_value, rmsle_error)
axes[2, 0].set_title('Root Mean Square Log Error')
axes[2, 1].plot(k_value, r2_error)
axes[2, 1].set_title('R2 Error')

In [None]:
filter_river_arno = river_arno[river_arno[['Hydrometry_Nave_di_Rosano']].notna().all(axis = 1)]

x = filter_river_arno[['Rainfall_Le_Croci', 'Rainfall_Cavallina', 'Rainfall_S_Agata', 'Rainfall_Mangona', 'Rainfall_S_Piero',
                'Rainfall_Vernio', 'Rainfall_Stia', 'Rainfall_Consuma', 'Rainfall_Incisa', 'Rainfall_Montevarchi',
                'Rainfall_S_Savino', 'Rainfall_Laterina', 'Rainfall_Bibbiena', 'Rainfall_Camaldoli', 'Temperature_Firenze']]
y = filter_river_arno[['Hydrometry_Nave_di_Rosano']]

k_value = [1, 3, 5, 7, 9, 11, 13, 17]
result_rf_median = []
result_knn_median = []
result_dt_median = []
result_lr_median = []
result_rf_r2 = []
result_knn_r2 = []
result_dt_r2 = []
result_lr_r2 = []

for i in k_value:
    imputer = KNNImputer(n_neighbors = i)
    imputer.fit(x)
    x_transform = imputer.transform(x)
       
    x = x_transform
        
    random_forest = RandomForestRegressor()
    random_forest.fit(x, y)
    predict_random_forest = random_forest.predict(x)
    
    score_rf_median = median_absolute_error(y, predict_random_forest)
    score_rf_r2 = r2_score(y, predict_random_forest)
    
    decision_tree = DecisionTreeRegressor()
    decision_tree.fit(x, y)
    predict_decision_tree = decision_tree.predict(x)
    
    score_dt_median = median_absolute_error(y, predict_decision_tree)
    score_dt_r2 = r2_score(y, predict_decision_tree)
    
    linear_regression = LinearRegression()
    linear_regression.fit(x, y)
    predict_linear_regression = linear_regression.predict(x)
    
    score_lr_median = median_absolute_error(y, predict_linear_regression)
    score_lr_r2 = r2_score(y, predict_linear_regression)
    
    knn_regressor = KNeighborsRegressor()
    knn_regressor.fit(x, y)
    predict_knn_regressor = knn_regressor.predict(x)
    
    score_knn_median = median_absolute_error(y, predict_knn_regressor)
    score_knn_r2 = r2_score(y, predict_knn_regressor)
    
    result_rf_median.append(score_rf_median)
    result_knn_median.append(score_knn_median)
    result_dt_median.append(score_dt_median)
    result_lr_median.append(score_lr_median)
    
    result_rf_r2.append(score_rf_r2)
    result_knn_r2.append(score_knn_r2)
    result_dt_r2.append(score_dt_r2)
    result_lr_r2.append(score_lr_r2)

In [None]:
fig, axes = plt.subplots(2, 2, figsize = (10, 10))
fig.suptitle('Median Error Performance')

axes[0, 0].plot(k_value, result_rf_median)
axes[0, 0].set_title('Random Forest Regressor')
axes[0, 1].plot(k_value, result_dt_median)
axes[0, 1].set_title('Decision Tree Regressor')
axes[1, 0].plot(k_value, result_lr_median)
axes[1, 0].set_title('Linear Regression')
axes[1, 1].plot(k_value, result_knn_median)
axes[1, 1].set_title('KNeighbors Regressor')

In [None]:
fig, axes = plt.subplots(2, 2, figsize = (10, 10))
fig.suptitle('R2 Score Error Performance')

axes[0, 0].plot(k_value, result_rf_r2)
axes[0, 0].set_title('Random Forest Regressor')
axes[0, 1].plot(k_value, result_dt_r2)
axes[0, 1].set_title('Decision Tree Regressor')
axes[1, 0].plot(k_value, result_lr_r2)
axes[1, 0].set_title('Linear Regression')
axes[1, 1].plot(k_value, result_knn_r2)
axes[1, 1].set_title('KNeighbors Regressor')

In [None]:
x = river_arno[['Rainfall_Le_Croci', 'Rainfall_Cavallina', 'Rainfall_S_Agata', 'Rainfall_Mangona', 'Rainfall_S_Piero',
                'Rainfall_Vernio', 'Rainfall_Stia', 'Rainfall_Consuma', 'Rainfall_Incisa', 'Rainfall_Montevarchi',
                'Rainfall_S_Savino', 'Rainfall_Laterina', 'Rainfall_Bibbiena', 'Rainfall_Camaldoli', 'Temperature_Firenze']]

imputer = KNNImputer(n_neighbors = 17)
imputer.fit(x)
x_transform = imputer.transform(x)

x = x_transform

filter_river_arno = river_arno[river_arno[['Hydrometry_Nave_di_Rosano']].notna().all(axis = 1)]

non_null_final_x = filter_river_arno[['Rainfall_Le_Croci', 'Rainfall_Cavallina', 'Rainfall_S_Agata', 'Rainfall_Mangona', 'Rainfall_S_Piero',
                'Rainfall_Vernio', 'Rainfall_Stia', 'Rainfall_Consuma', 'Rainfall_Incisa', 'Rainfall_Montevarchi',
                'Rainfall_S_Savino', 'Rainfall_Laterina', 'Rainfall_Bibbiena', 'Rainfall_Camaldoli', 'Temperature_Firenze']]
non_null_final_y = filter_river_arno[['Hydrometry_Nave_di_Rosano']]

imputer = KNNImputer(n_neighbors = 17)
imputer.fit(non_null_final_x)
non_null_final_x = imputer.transform(non_null_final_x)

random_forest = RandomForestRegressor()
random_forest.fit(non_null_final_x, non_null_final_y)
final_prediction = random_forest.predict(x)

In [None]:
final_river_arno = pd.DataFrame(x, columns = ['Rainfall_Le_Croci', 'Rainfall_Cavallina', 'Rainfall_S_Agata', 'Rainfall_Mangona',
                                              'Rainfall_S_Piero', 'Rainfall_Vernio', 'Rainfall_Stia', 'Rainfall_Consuma',
                                              'Rainfall_Incisa', 'Rainfall_Montevarchi', 'Rainfall_S_Savino',
                                              'Rainfall_Laterina', 'Rainfall_Bibbiena', 'Rainfall_Camaldoli',
                                              'Temperature_Firenze'])

final_river_arno['Date'] = river_arno['Date']
final_river_arno['Hydrometry_Nave_di_Rosano'] = final_prediction

In [None]:
final_river_arno.head()

In [None]:
final_river_arno.to_csv('Preprocess Datasets/final_river_arno.csv', index = False)

## Water Spring Amiata

In [None]:
print("Missing Values Percentage:\n", water_spring_amiata.isnull().sum() * 100 / len(water_spring_amiata))

In [None]:
x = water_spring_amiata[['Rainfall_Castel_del_Piano', 'Rainfall_Abbadia_S_Salvatore', 'Rainfall_S_Fiora',
                         'Rainfall_Laghetto_Verde', 'Rainfall_Vetta_Amiata', 'Depth_to_Groundwater_S_Fiora_8',
                         'Depth_to_Groundwater_S_Fiora_11bis', 'Depth_to_Groundwater_David_Lazzaretti',
                         'Temperature_Abbadia_S_Salvatore', 'Temperature_S_Fiora', 'Temperature_Laghetto_Verde']]
y = water_spring_amiata[['Flow_Rate_Bugnano', 'Flow_Rate_Arbure', 'Flow_Rate_Ermicciolo', 'Flow_Rate_Galleria_Alta']]

k_value = [1, 3, 5, 7, 9, 11, 13, 17]
mean_error = []
median_error = []
mse_error = []
rmse_error = []
rmsle_error = []
r2_error = []

for i in k_value:
    print("k = ", i)
    imputer = KNNImputer(n_neighbors = i)
    
    imputer.fit(x)
    x_transform = imputer.transform(x)
    print("Missing values in 'x' features: ", sum(np.isnan(x_transform).flatten()))
    
    imputer.fit(y)
    y_transform = imputer.transform(y)
    print("Missing values in 'y' features: ", sum(np.isnan(y_transform).flatten()))
    
    random_forest = RandomForestRegressor()
    random_forest.fit(x_transform, y_transform)
    predict_random_forest = random_forest.predict(x_transform)
    
    mean_error.append(mean_absolute_error(y_transform, predict_random_forest))
    median_error.append(median_absolute_error(y_transform, predict_random_forest))
    mse_error.append(mean_squared_error(y_transform, predict_random_forest))
    rmse_error.append(np.sqrt(mean_squared_error(y_transform, predict_random_forest)))
    rmsle_error.append(np.sqrt(mean_squared_log_error(abs(y_transform), abs(predict_random_forest))))
    r2_error.append(r2_score(y_transform, predict_random_forest))

In [None]:
fig, axes = plt.subplots(3, 2, figsize = (10, 12))
fig.suptitle('Error Estimation')

axes[0, 0].plot(k_value, mean_error)
axes[0, 0].set_title('Mean Error')
axes[0, 1].plot(k_value, median_error)
axes[0, 1].set_title('Median Error')
axes[1, 0].plot(k_value, mse_error)
axes[1, 0].set_title('Mean Square  Error')
axes[1, 1].plot(k_value, rmse_error)
axes[1, 1].set_title('Root Mean Square Error')
axes[2, 0].plot(k_value, rmsle_error)
axes[2, 0].set_title('Root Mean Square Log Error')
axes[2, 1].plot(k_value, r2_error)
axes[2, 1].set_title('R2 Error')

In [None]:
filter_water_spring_amiata = water_spring_amiata[water_spring_amiata[['Flow_Rate_Bugnano', 'Flow_Rate_Arbure',
                                                                      'Flow_Rate_Ermicciolo',
                                                                      'Flow_Rate_Galleria_Alta']].notna().all(axis = 1)]

x = filter_water_spring_amiata[['Rainfall_Castel_del_Piano', 'Rainfall_Abbadia_S_Salvatore', 'Rainfall_S_Fiora',
                         'Rainfall_Laghetto_Verde', 'Rainfall_Vetta_Amiata', 'Depth_to_Groundwater_S_Fiora_8',
                         'Depth_to_Groundwater_S_Fiora_11bis', 'Depth_to_Groundwater_David_Lazzaretti',
                         'Temperature_Abbadia_S_Salvatore', 'Temperature_S_Fiora', 'Temperature_Laghetto_Verde']]
y = filter_water_spring_amiata[['Flow_Rate_Bugnano', 'Flow_Rate_Arbure', 'Flow_Rate_Ermicciolo', 'Flow_Rate_Galleria_Alta']]

k_value = [1, 3, 5, 7, 9, 11, 13, 17]
result_rf_median = []
result_knn_median = []
result_dt_median = []
result_lr_median = []
result_rf_r2 = []
result_knn_r2 = []
result_dt_r2 = []
result_lr_r2 = []

for i in k_value:
    imputer = KNNImputer(n_neighbors = i)
    imputer.fit(x)
    x_transform = imputer.transform(x)
       
    x = x_transform
        
    random_forest = RandomForestRegressor()
    random_forest.fit(x, y)
    predict_random_forest = random_forest.predict(x)
    
    score_rf_median = median_absolute_error(y, predict_random_forest)
    score_rf_r2 = r2_score(y, predict_random_forest)
    
    decision_tree = DecisionTreeRegressor()
    decision_tree.fit(x, y)
    predict_decision_tree = decision_tree.predict(x)
    
    score_dt_median = median_absolute_error(y, predict_decision_tree)
    score_dt_r2 = r2_score(y, predict_decision_tree)
    
    linear_regression = LinearRegression()
    linear_regression.fit(x, y)
    predict_linear_regression = linear_regression.predict(x)
    
    score_lr_median = median_absolute_error(y, predict_linear_regression)
    score_lr_r2 = r2_score(y, predict_linear_regression)
    
    knn_regressor = KNeighborsRegressor()
    knn_regressor.fit(x, y)
    predict_knn_regressor = knn_regressor.predict(x)
    
    score_knn_median = median_absolute_error(y, predict_knn_regressor)
    score_knn_r2 = r2_score(y, predict_knn_regressor)
    
    result_rf_median.append(score_rf_median)
    result_knn_median.append(score_knn_median)
    result_dt_median.append(score_dt_median)
    result_lr_median.append(score_lr_median)
    
    result_rf_r2.append(score_rf_r2)
    result_knn_r2.append(score_knn_r2)
    result_dt_r2.append(score_dt_r2)
    result_lr_r2.append(score_lr_r2)

In [None]:
fig, axes = plt.subplots(2, 2, figsize = (10, 10))
fig.suptitle('Median Error Performance')

axes[0, 0].plot(k_value, result_rf_median)
axes[0, 0].set_title('Random Forest Regressor')
axes[0, 1].plot(k_value, result_dt_median)
axes[0, 1].set_title('Decision Tree Regressor')
axes[1, 0].plot(k_value, result_lr_median)
axes[1, 0].set_title('Linear Regression')
axes[1, 1].plot(k_value, result_knn_median)
axes[1, 1].set_title('KNeighbors Regressor')

In [None]:
fig, axes = plt.subplots(2, 2, figsize = (10, 10))
fig.suptitle('R2 Score Error Performance')

axes[0, 0].plot(k_value, result_rf_r2)
axes[0, 0].set_title('Random Forest Regressor')
axes[0, 1].plot(k_value, result_dt_r2)
axes[0, 1].set_title('Decision Tree Regressor')
axes[1, 0].plot(k_value, result_lr_r2)
axes[1, 0].set_title('Linear Regression')
axes[1, 1].plot(k_value, result_knn_r2)
axes[1, 1].set_title('KNeighbors Regressor')

In [None]:
x = water_spring_amiata[['Rainfall_Castel_del_Piano', 'Rainfall_Abbadia_S_Salvatore', 'Rainfall_S_Fiora', 'Rainfall_Laghetto_Verde',
                   'Rainfall_Vetta_Amiata', 'Depth_to_Groundwater_S_Fiora_8', 'Depth_to_Groundwater_S_Fiora_11bis',
                   'Depth_to_Groundwater_David_Lazzaretti', 'Temperature_Abbadia_S_Salvatore', 'Temperature_S_Fiora',
                   'Temperature_Laghetto_Verde']]

imputer = KNNImputer(n_neighbors = 17)
imputer.fit(x)
x_transform = imputer.transform(x)

x = x_transform

filter_water_spring_amiata = water_spring_amiata[water_spring_amiata[['Flow_Rate_Bugnano', 'Flow_Rate_Arbure',
                                                                      'Flow_Rate_Ermicciolo',
                                                                      'Flow_Rate_Galleria_Alta']].notna().all(axis = 1)]

non_null_final_x = filter_water_spring_amiata[['Rainfall_Castel_del_Piano', 'Rainfall_Abbadia_S_Salvatore', 'Rainfall_S_Fiora',
                                               'Rainfall_Laghetto_Verde', 'Rainfall_Vetta_Amiata',
                                               'Depth_to_Groundwater_S_Fiora_8', 'Depth_to_Groundwater_S_Fiora_11bis',
                                               'Depth_to_Groundwater_David_Lazzaretti', 'Temperature_Abbadia_S_Salvatore',
                                               'Temperature_S_Fiora', 'Temperature_Laghetto_Verde']]
non_null_final_y = filter_water_spring_amiata[['Flow_Rate_Bugnano', 'Flow_Rate_Arbure', 'Flow_Rate_Ermicciolo',
                                               'Flow_Rate_Galleria_Alta']]

imputer = KNNImputer(n_neighbors = 17)
imputer.fit(non_null_final_x)
non_null_final_x = imputer.transform(non_null_final_x)

random_forest = RandomForestRegressor()
random_forest.fit(non_null_final_x, non_null_final_y)
final_prediction = random_forest.predict(x)

In [None]:
final_water_spring_amiata = pd.DataFrame(x, columns = ['Rainfall_Castel_del_Piano', 'Rainfall_Abbadia_S_Salvatore', 'Rainfall_S_Fiora',
                                               'Rainfall_Laghetto_Verde', 'Rainfall_Vetta_Amiata',
                                               'Depth_to_Groundwater_S_Fiora_8', 'Depth_to_Groundwater_S_Fiora_11bis',
                                               'Depth_to_Groundwater_David_Lazzaretti', 'Temperature_Abbadia_S_Salvatore',
                                               'Temperature_S_Fiora', 'Temperature_Laghetto_Verde'])

final_water_spring_amiata['Date'] = water_spring_amiata['Date']
final_water_spring_amiata['Flow_Rate_Bugnano'] = final_prediction[:, 0]
final_water_spring_amiata['Flow_Rate_Arbure'] = final_prediction[:, 1]
final_water_spring_amiata['Flow_Rate_Ermicciolo'] = final_prediction[:, 2]
final_water_spring_amiata['Flow_Rate_Galleria_Alta'] = final_prediction[:, 3]

In [None]:
final_water_spring_amiata.head()

In [None]:
final_water_spring_amiata.to_csv('Preprocess Datasets/final_water_spring_amiata.csv', index = False)

## Water Spring Lupa

In [None]:
print("Missing Values Percentage:\n", water_spring_lupa.isnull().sum() * 100 / len(water_spring_lupa))

In [None]:
x = water_spring_lupa[['Rainfall_Terni']]
y = water_spring_lupa[['Flow_Rate_Lupa']]

k_value = [1, 3, 5, 7, 9, 11, 13, 17]
mean_error = []
median_error = []
mse_error = []
rmse_error = []
rmsle_error = []
r2_error = []

for i in k_value:
    print("k = ", i)
    imputer = KNNImputer(n_neighbors = i)
    
    imputer.fit(x)
    x_transform = imputer.transform(x)
    print("Missing values in 'x' features: ", sum(np.isnan(x_transform).flatten()))
    
    imputer.fit(y)
    y_transform = imputer.transform(y)
    print("Missing values in 'y' features: ", sum(np.isnan(y_transform).flatten()))
    
    random_forest = RandomForestRegressor()
    random_forest.fit(x_transform, y_transform)
    predict_random_forest = random_forest.predict(x_transform)
    
    mean_error.append(mean_absolute_error(y_transform, predict_random_forest))
    median_error.append(median_absolute_error(y_transform, predict_random_forest))
    mse_error.append(mean_squared_error(y_transform, predict_random_forest))
    rmse_error.append(np.sqrt(mean_squared_error(y_transform, predict_random_forest)))
    rmsle_error.append(np.sqrt(mean_squared_log_error(abs(y_transform), abs(predict_random_forest))))
    r2_error.append(r2_score(y_transform, predict_random_forest))

In [None]:
fig, axes = plt.subplots(3, 2, figsize = (10, 12))
fig.suptitle('Error Estimation')

axes[0, 0].plot(k_value, mean_error)
axes[0, 0].set_title('Mean Error')
axes[0, 1].plot(k_value, median_error)
axes[0, 1].set_title('Median Error')
axes[1, 0].plot(k_value, mse_error)
axes[1, 0].set_title('Mean Square  Error')
axes[1, 1].plot(k_value, rmse_error)
axes[1, 1].set_title('Root Mean Square Error')
axes[2, 0].plot(k_value, rmsle_error)
axes[2, 0].set_title('Root Mean Square Log Error')
axes[2, 1].plot(k_value, r2_error)
axes[2, 1].set_title('R2 Error')

In [None]:
filter_water_spring_lupa = water_spring_lupa[water_spring_lupa[['Flow_Rate_Lupa']].notna().all(axis = 1)]

x = filter_water_spring_lupa[['Rainfall_Terni']]
y = filter_water_spring_lupa[['Flow_Rate_Lupa']]

k_value = [1, 3, 5, 7, 9, 11, 13, 17]
result_rf_median = []
result_knn_median = []
result_dt_median = []
result_lr_median = []
result_rf_r2 = []
result_knn_r2 = []
result_dt_r2 = []
result_lr_r2 = []

for i in k_value:
    imputer = KNNImputer(n_neighbors = i)
    imputer.fit(x)
    x_transform = imputer.transform(x)
       
    x = x_transform
        
    random_forest = RandomForestRegressor()
    random_forest.fit(x, y)
    predict_random_forest = random_forest.predict(x)
    
    score_rf_median = median_absolute_error(y, predict_random_forest)
    score_rf_r2 = r2_score(y, predict_random_forest)
    
    decision_tree = DecisionTreeRegressor()
    decision_tree.fit(x, y)
    predict_decision_tree = decision_tree.predict(x)
    
    score_dt_median = median_absolute_error(y, predict_decision_tree)
    score_dt_r2 = r2_score(y, predict_decision_tree)
    
    linear_regression = LinearRegression()
    linear_regression.fit(x, y)
    predict_linear_regression = linear_regression.predict(x)
    
    score_lr_median = median_absolute_error(y, predict_linear_regression)
    score_lr_r2 = r2_score(y, predict_linear_regression)
    
    knn_regressor = KNeighborsRegressor()
    knn_regressor.fit(x, y)
    predict_knn_regressor = knn_regressor.predict(x)
    
    score_knn_median = median_absolute_error(y, predict_knn_regressor)
    score_knn_r2 = r2_score(y, predict_knn_regressor)
    
    result_rf_median.append(score_rf_median)
    result_knn_median.append(score_knn_median)
    result_dt_median.append(score_dt_median)
    result_lr_median.append(score_lr_median)
    
    result_rf_r2.append(score_rf_r2)
    result_knn_r2.append(score_knn_r2)
    result_dt_r2.append(score_dt_r2)
    result_lr_r2.append(score_lr_r2)

In [None]:
fig, axes = plt.subplots(2, 2, figsize = (10, 10))
fig.suptitle('Median Error Performance')

axes[0, 0].plot(k_value, result_rf_median)
axes[0, 0].set_title('Random Forest Regressor')
axes[0, 1].plot(k_value, result_dt_median)
axes[0, 1].set_title('Decision Tree Regressor')
axes[1, 0].plot(k_value, result_lr_median)
axes[1, 0].set_title('Linear Regression')
axes[1, 1].plot(k_value, result_knn_median)
axes[1, 1].set_title('KNeighbors Regressor')

In [None]:
fig, axes = plt.subplots(2, 2, figsize = (10, 10))
fig.suptitle('R2 Score Error Performance')

axes[0, 0].plot(k_value, result_rf_r2)
axes[0, 0].set_title('Random Forest Regressor')
axes[0, 1].plot(k_value, result_dt_r2)
axes[0, 1].set_title('Decision Tree Regressor')
axes[1, 0].plot(k_value, result_lr_r2)
axes[1, 0].set_title('Linear Regression')
axes[1, 1].plot(k_value, result_knn_r2)
axes[1, 1].set_title('KNeighbors Regressor')

In [None]:
x = water_spring_lupa[['Rainfall_Terni']]

imputer = KNNImputer(n_neighbors = 17)
imputer.fit(x)
x_transform = imputer.transform(x)

x = x_transform

filter_water_spring_lupa = water_spring_lupa[water_spring_lupa[['Flow_Rate_Lupa']].notna().all(axis = 1)]

non_null_final_x = filter_water_spring_lupa[['Rainfall_Terni']]
non_null_final_y = filter_water_spring_lupa[['Flow_Rate_Lupa']]

imputer = KNNImputer(n_neighbors = 17)
imputer.fit(non_null_final_x)
non_null_final_x = imputer.transform(non_null_final_x)

random_forest = RandomForestRegressor()
random_forest.fit(non_null_final_x, non_null_final_y)
final_prediction = random_forest.predict(x)

In [None]:
final_water_spring_lupa = pd.DataFrame(x, columns = ['Rainfall_Terni'])

final_water_spring_lupa['Date'] = water_spring_lupa['Date']
final_water_spring_lupa['Flow_Rate_Lupa'] = final_prediction

In [None]:
final_water_spring_lupa.head()

In [None]:
final_water_spring_lupa.to_csv('Preprocess Datasets/final_water_spring_lupa.csv', index = False)

## Water Spring Madonna Di Canneto 

In [None]:
print("Missing Values Percentage:\n", water_spring_madonna_di_canneto.isnull().sum() * 100 / len(water_spring_madonna_di_canneto))

In [None]:
x = water_spring_madonna_di_canneto[['Rainfall_Settefrati', 'Temperature_Settefrati']]
y = water_spring_madonna_di_canneto[['Flow_Rate_Madonna_di_Canneto']]

k_value = [1, 3, 5, 7, 9, 11, 13, 17]
mean_error = []
median_error = []
mse_error = []
rmse_error = []
rmsle_error = []
r2_error = []

for i in k_value:
    print("k = ", i)
    imputer = KNNImputer(n_neighbors = i)
    
    imputer.fit(x)
    x_transform = imputer.transform(x)
    print("Missing values in 'x' features: ", sum(np.isnan(x_transform).flatten()))
    
    imputer.fit(y)
    y_transform = imputer.transform(y)
    print("Missing values in 'y' features: ", sum(np.isnan(y_transform).flatten()))
    
    random_forest = RandomForestRegressor()
    random_forest.fit(x_transform, y_transform)
    predict_random_forest = random_forest.predict(x_transform)
    
    mean_error.append(mean_absolute_error(y_transform, predict_random_forest))
    median_error.append(median_absolute_error(y_transform, predict_random_forest))
    mse_error.append(mean_squared_error(y_transform, predict_random_forest))
    rmse_error.append(np.sqrt(mean_squared_error(y_transform, predict_random_forest)))
    rmsle_error.append(np.sqrt(mean_squared_log_error(abs(y_transform), abs(predict_random_forest))))
    r2_error.append(r2_score(y_transform, predict_random_forest))

In [None]:
fig, axes = plt.subplots(3, 2, figsize = (10, 12))
fig.suptitle('Error Estimation')

axes[0, 0].plot(k_value, mean_error)
axes[0, 0].set_title('Mean Error')
axes[0, 1].plot(k_value, median_error)
axes[0, 1].set_title('Median Error')
axes[1, 0].plot(k_value, mse_error)
axes[1, 0].set_title('Mean Square  Error')
axes[1, 1].plot(k_value, rmse_error)
axes[1, 1].set_title('Root Mean Square Error')
axes[2, 0].plot(k_value, rmsle_error)
axes[2, 0].set_title('Root Mean Square Log Error')
axes[2, 1].plot(k_value, r2_error)
axes[2, 1].set_title('R2 Error')

In [None]:
filter_water_spring_madonna_di_canneto = water_spring_madonna_di_canneto[water_spring_madonna_di_canneto[['Flow_Rate_Madonna_di_Canneto']].notna().all(axis = 1)]

x = filter_water_spring_madonna_di_canneto[['Rainfall_Settefrati', 'Temperature_Settefrati']]
y = filter_water_spring_madonna_di_canneto[['Flow_Rate_Madonna_di_Canneto']]

k_value = [1, 3, 5, 7, 9, 11, 13, 17]
result_rf_median = []
result_knn_median = []
result_dt_median = []
result_lr_median = []
result_rf_r2 = []
result_knn_r2 = []
result_dt_r2 = []
result_lr_r2 = []

for i in k_value:
    imputer = KNNImputer(n_neighbors = i)
    imputer.fit(x)
    x_transform = imputer.transform(x)
       
    x = x_transform
        
    random_forest = RandomForestRegressor()
    random_forest.fit(x, y)
    predict_random_forest = random_forest.predict(x)
    
    score_rf_median = median_absolute_error(y, predict_random_forest)
    score_rf_r2 = r2_score(y, predict_random_forest)
    
    decision_tree = DecisionTreeRegressor()
    decision_tree.fit(x, y)
    predict_decision_tree = decision_tree.predict(x)
    
    score_dt_median = median_absolute_error(y, predict_decision_tree)
    score_dt_r2 = r2_score(y, predict_decision_tree)
    
    linear_regression = LinearRegression()
    linear_regression.fit(x, y)
    predict_linear_regression = linear_regression.predict(x)
    
    score_lr_median = median_absolute_error(y, predict_linear_regression)
    score_lr_r2 = r2_score(y, predict_linear_regression)
    
    knn_regressor = KNeighborsRegressor()
    knn_regressor.fit(x, y)
    predict_knn_regressor = knn_regressor.predict(x)
    
    score_knn_median = median_absolute_error(y, predict_knn_regressor)
    score_knn_r2 = r2_score(y, predict_knn_regressor)
    
    result_rf_median.append(score_rf_median)
    result_knn_median.append(score_knn_median)
    result_dt_median.append(score_dt_median)
    result_lr_median.append(score_lr_median)
    
    result_rf_r2.append(score_rf_r2)
    result_knn_r2.append(score_knn_r2)
    result_dt_r2.append(score_dt_r2)
    result_lr_r2.append(score_lr_r2)

In [None]:
fig, axes = plt.subplots(2, 2, figsize = (10, 10))
fig.suptitle('Median Error Performance')

axes[0, 0].plot(k_value, result_rf_median)
axes[0, 0].set_title('Random Forest Regressor')
axes[0, 1].plot(k_value, result_dt_median)
axes[0, 1].set_title('Decision Tree Regressor')
axes[1, 0].plot(k_value, result_lr_median)
axes[1, 0].set_title('Linear Regression')
axes[1, 1].plot(k_value, result_knn_median)
axes[1, 1].set_title('KNeighbors Regressor')

In [None]:
fig, axes = plt.subplots(2, 2, figsize = (10, 10))
fig.suptitle('R2 Score Error Performance')

axes[0, 0].plot(k_value, result_rf_r2)
axes[0, 0].set_title('Random Forest Regressor')
axes[0, 1].plot(k_value, result_dt_r2)
axes[0, 1].set_title('Decision Tree Regressor')
axes[1, 0].plot(k_value, result_lr_r2)
axes[1, 0].set_title('Linear Regression')
axes[1, 1].plot(k_value, result_knn_r2)
axes[1, 1].set_title('KNeighbors Regressor')

In [None]:
x = water_spring_madonna_di_canneto[['Rainfall_Settefrati', 'Temperature_Settefrati']]

imputer = KNNImputer(n_neighbors = 17)
imputer.fit(x)
x_transform = imputer.transform(x)

x = x_transform

filter_water_spring_madonna_di_canneto = water_spring_madonna_di_canneto[water_spring_madonna_di_canneto[['Flow_Rate_Madonna_di_Canneto']].notna().all(axis = 1)]

non_null_final_x = filter_water_spring_madonna_di_canneto[['Rainfall_Settefrati', 'Temperature_Settefrati']]
non_null_final_y = filter_water_spring_madonna_di_canneto[['Flow_Rate_Madonna_di_Canneto']]

imputer = KNNImputer(n_neighbors = 17)
imputer.fit(non_null_final_x)
non_null_final_x = imputer.transform(non_null_final_x)

random_forest = RandomForestRegressor()
random_forest.fit(non_null_final_x, non_null_final_y)
final_prediction = random_forest.predict(x)

In [None]:
final_water_spring_madonna_di_canneto = pd.DataFrame(x, columns = ['Rainfall_Settefrati', 'Temperature_Settefrati'])

final_water_spring_madonna_di_canneto['Date'] = water_spring_madonna_di_canneto['Date']
final_water_spring_madonna_di_canneto['Flow_Rate_Madonna_di_Canneto'] = final_prediction

In [None]:
final_water_spring_madonna_di_canneto.head()

In [None]:
final_water_spring_madonna_di_canneto.to_csv('Preprocess Datasets/final_water_spring_madonna_di_canneto.csv', index = False)