In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.neighbors import KNeighborsRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import mean_absolute_error, median_absolute_error, mean_squared_log_error, r2_score

In [None]:
aquifer = pd.read_csv('Feature Analysis/aquifier.csv')
lake = pd.read_csv('Feature Analysis/lake.csv')
river = pd.read_csv('Feature Analysis/river.csv')
water_spring = pd.read_csv('Feature Analysis/water_spring.csv')

# Aquifer

In [None]:
aquifer.head()

In [None]:
aquifer['mean_rainfall'] = (aquifer['mean_rainfall'] - aquifer['mean_rainfall'].min()) / (aquifer['mean_rainfall'].max() - aquifer['mean_rainfall'].min())
aquifer['mean_temperature'] = (aquifer['mean_temperature'] - aquifer['mean_temperature'].min()) / (aquifer['mean_temperature'].max() - aquifer['mean_temperature'].min())
aquifer['actual_depth'] = (aquifer['actual_depth'] - aquifer['actual_depth'].min()) / (aquifer['actual_depth'].max() - aquifer['actual_depth'].min())
aquifer['actual_volume'] = (aquifer['actual_volume'] - aquifer['actual_volume'].min()) / (aquifer['actual_volume'].max() - aquifer['actual_volume'].min())
aquifer['actual_hydrometry'] = (aquifer['actual_hydrometry'] - aquifer['actual_hydrometry'].min()) / (aquifer['actual_hydrometry'].max() - aquifer['actual_hydrometry'].min())

In [None]:
X = aquifer.drop(['Date', 'actual_depth'], axis = 1)
y = aquifer['actual_depth']

In [None]:
x_train, x_test, y_train, y_test = train_test_split(X, y, test_size = 0.25, random_state = 42)

## KNN 

In [None]:
knn_classifier = KNeighborsRegressor()

In [None]:
param_grid = {
    'n_neighbors': [3, 5, 7, 9, 11, 13],
    'weights': ['uniform', 'distance'],
    'metric': ['euclidean', 'manhattan']
}

grid_search_cv = GridSearchCV(estimator = knn_classifier, param_grid = param_grid, cv = 5,
                              scoring = ['neg_mean_squared_log_error', 'neg_median_absolute_error', 'r2'], refit = 'neg_mean_squared_log_error',
                              verbose = 1, n_jobs = -1)

grid_search_cv_fit = grid_search_cv.fit(x_train, y_train)

In [None]:
print('Best Score: ', grid_search_cv_fit.best_score_)
print('Best Params: ', grid_search_cv_fit.best_params_)

In [None]:
y_predict = grid_search_cv_fit.best_estimator_.predict(x_test)

In [None]:
y_predict = (y_predict - y_predict.min()) / (y_predict.max() - y_predict.min())

In [None]:
median_absolute_error(y_test, y_predict)

In [None]:
np.sqrt(mean_squared_log_error(y_test, y_predict))

In [None]:
r2_score(y_test, y_predict)

## Linear Regression

In [None]:
linear_regression = LinearRegression()

In [None]:
linear_regression = linear_regression.fit(x_train, y_train)

In [None]:
y_predict = linear_regression.predict(x_test)

In [None]:
y_predict = (y_predict - y_predict.min()) / (y_predict.max() - y_predict.min())

In [None]:
median_absolute_error(y_test, y_predict)

In [None]:
np.sqrt(mean_squared_log_error(y_test, y_predict))

In [None]:
r2_score(y_test, y_predict)

## Decision Tree Regressor

In [None]:
decision_tree = DecisionTreeRegressor()

In [None]:
param_grid = {
    'criterion': ['squared_error', 'friedman_mse'],
    'max_depth': range(10,20),
    'min_samples_split': range(2,10),
}

grid_search_cv = GridSearchCV(estimator = decision_tree, param_grid = param_grid, cv = 5,
                              scoring = ['neg_mean_squared_log_error', 'neg_median_absolute_error', 'r2'], refit = 'neg_mean_squared_log_error',
                              verbose = 1, n_jobs = -1)

grid_search_cv_fit = grid_search_cv.fit(x_train, y_train)

In [None]:
print('Best Score: ', grid_search_cv_fit.best_score_)
print('Best Params: ', grid_search_cv_fit.best_params_)

In [None]:
y_predict = grid_search_cv_fit.best_estimator_.predict(x_test)

In [None]:
y_predict = (y_predict - y_predict.min()) / (y_predict.max() - y_predict.min())

In [None]:
median_absolute_error(y_test, y_predict)

In [None]:
np.sqrt(mean_squared_log_error(y_test, y_predict))

In [None]:
r2_score(y_test, y_predict)

# Lakes

In [None]:
lake.head()

In [None]:
lake['mean_rainfall'] = (lake['mean_rainfall'] - lake['mean_rainfall'].min()) / (lake['mean_rainfall'].max() - lake['mean_rainfall'].min())
lake['mean_temperature'] = (lake['mean_temperature'] - lake['mean_temperature'].min()) / (lake['mean_temperature'].max() - lake['mean_temperature'].min())
lake['actual_flow_rate'] = (lake['actual_flow_rate'] - lake['actual_flow_rate'].min()) / (lake['actual_flow_rate'].max() - lake['actual_flow_rate'].min())
lake['actual_lake_level'] = (lake['actual_lake_level'] - lake['actual_lake_level'].min()) / (lake['actual_lake_level'].max() - lake['actual_lake_level'].min())

In [None]:
X = lake.drop(['Date', 'actual_flow_rate', 'actual_lake_level'], axis = 1)
y = lake[['actual_flow_rate', 'actual_lake_level']]

In [None]:
x_train, x_test, y_train, y_test = train_test_split(X, y, test_size = 0.25, random_state = 42)

## KNN

In [None]:
knn_classifier = KNeighborsRegressor()

In [None]:
param_grid = {
    'n_neighbors': [11, 13, 17, 19, 21, 25],
    'weights': ['uniform', 'distance'],
    'metric': ['euclidean', 'manhattan']
}

grid_search_cv = GridSearchCV(estimator = knn_classifier, param_grid = param_grid, cv = 5,
                              scoring = ['neg_mean_squared_log_error', 'neg_median_absolute_error', 'r2'], refit = 'neg_mean_squared_log_error',
                              verbose = 1, n_jobs = -1)

grid_search_cv_fit = grid_search_cv.fit(x_train, y_train)

In [None]:
print('Best Score: ', grid_search_cv_fit.best_score_)
print('Best Params: ', grid_search_cv_fit.best_params_)

In [None]:
y_predict = grid_search_cv_fit.best_estimator_.predict(x_test)

In [None]:
y_predict[:, 0] = (y_predict[:, 0] - y_predict[:, 0].min()) / (y_predict[:, 0].max() - y_predict[:, 0].min())

In [None]:
y_predict[:, 1] = (y_predict[:, 1] - y_predict[:, 1].min()) / (y_predict[:, 1].max() - y_predict[:, 1].min())

In [None]:
median_absolute_error(y_test.iloc[:, 0], y_predict[:, 0])

In [None]:
median_absolute_error(y_test.iloc[:, 1], y_predict[:, 1])

In [None]:
np.sqrt(mean_squared_log_error(y_test.iloc[:, 0], y_predict[:, 0]))

In [None]:
np.sqrt(mean_squared_log_error(y_test.iloc[:, 1], y_predict[:, 1]))

In [None]:
r2_score(y_test.iloc[:, 0], y_predict[:, 0])

In [None]:
r2_score(y_test.iloc[:, 1], y_predict[:, 1])

## Linear Regression

In [None]:
linear_regression = LinearRegression()

In [None]:
linear_regression = linear_regression.fit(x_train, y_train)

In [None]:
y_predict = linear_regression.predict(x_test)

In [None]:
y_predict[:, 0] = (y_predict[:, 0] - y_predict[:, 0].min()) / (y_predict[:, 0].max() - y_predict[:, 0].min())

In [None]:
y_predict[:, 1] = (y_predict[:, 1] - y_predict[:, 1].min()) / (y_predict[:, 1].max() - y_predict[:, 1].min())

In [None]:
median_absolute_error(y_test.iloc[:, 0], y_predict[:, 0])

In [None]:
median_absolute_error(y_test.iloc[:, 1], y_predict[:, 1])

In [None]:
np.sqrt(mean_squared_log_error(y_test.iloc[:, 0], y_predict[:, 1]))

In [None]:
np.sqrt(mean_squared_log_error(y_test.iloc[:, 1], y_predict[:, 1]))

In [None]:
r2_score(y_test.iloc[:, 0], y_predict[:, 1])

In [None]:
r2_score(y_test.iloc[:, 1], y_predict[:, 1])

## Decision Tree Regressor

In [None]:
decision_tree = DecisionTreeRegressor()

In [None]:
param_grid = {
    'criterion': ['squared_error', 'friedman_mse'],
    'max_depth': range(10,20),
    'min_samples_split': range(2,10),
}

grid_search_cv = GridSearchCV(estimator = decision_tree, param_grid = param_grid, cv = 5,
                              scoring = ['neg_mean_squared_log_error', 'neg_median_absolute_error', 'r2'], refit = 'neg_mean_squared_log_error',
                              verbose = 1, n_jobs = -1)

grid_search_cv_fit = grid_search_cv.fit(x_train, y_train)

In [None]:
print('Best Score: ', grid_search_cv_fit.best_score_)
print('Best Params: ', grid_search_cv_fit.best_params_)

In [None]:
y_predict = grid_search_cv_fit.best_estimator_.predict(x_test)

In [None]:
y_predict[:, 0] = (y_predict[:, 0] - y_predict[:, 0].min()) / (y_predict[:, 0].max() - y_predict[:, 0].min())

In [None]:
y_predict[:, 1] = (y_predict[:, 1] - y_predict[:, 1].min()) / (y_predict[:, 1].max() - y_predict[:, 1].min())

In [None]:
median_absolute_error(y_test.iloc[:, 0], y_predict[:, 0])

In [None]:
np.sqrt(mean_squared_log_error(y_test.iloc[:, 0], y_predict[:, 0]))

In [None]:
r2_score(y_test, y_predict)

## River Arno

In [None]:
river.head()

In [None]:
river['mean_rainfall'] = (river['mean_rainfall'] - river['mean_rainfall'].min()) / (river['mean_rainfall'].max() - river['mean_rainfall'].min())
river['mean_temperature'] = (river['mean_temperature'] - river['mean_temperature'].min()) / (river['mean_temperature'].max() - river['mean_temperature'].min())
river['actual_hydrometry'] = (river['actual_hydrometry'] - river['actual_hydrometry'].min()) / (river['actual_hydrometry'].max() - river['actual_hydrometry'].min())

In [None]:
X = river.drop(['Date', 'actual_hydrometry', 'Source'], axis = 1)
y = river['actual_hydrometry']

In [None]:
x_train, x_test, y_train, y_test = train_test_split(X, y, test_size = 0.25, random_state = 42)

## KNN

In [None]:
knn_classifier = KNeighborsRegressor()

In [None]:
param_grid = {
    'n_neighbors': [3, 5, 7, 9, 11, 13],
    'weights': ['uniform', 'distance'],
    'metric': ['euclidean', 'manhattan']
}

grid_search_cv = GridSearchCV(estimator = knn_classifier, param_grid = param_grid, cv = 5,
                              scoring = ['neg_mean_squared_log_error', 'neg_median_absolute_error', 'r2'], refit = 'neg_mean_squared_log_error',
                              verbose = 1, n_jobs = -1)

grid_search_cv_fit = grid_search_cv.fit(x_train, y_train)

In [None]:
print('Best Score: ', grid_search_cv_fit.best_score_)
print('Best Params: ', grid_search_cv_fit.best_params_)

In [None]:
y_predict = grid_search_cv_fit.best_estimator_.predict(x_test)

In [None]:
y_predict = (y_predict - y_predict.min()) / (y_predict.max() - y_predict.min())

In [None]:
median_absolute_error(y_test, y_predict)

In [None]:
np.sqrt(mean_squared_log_error(y_test, y_predict))

In [None]:
r2_score(y_test, y_predict)

## Linear Regression

In [None]:
linear_regression = LinearRegression()

In [None]:
linear_regression = linear_regression.fit(x_train, y_train)

In [None]:
y_predict = linear_regression.predict(x_test)

In [None]:
y_predict = (y_predict - y_predict.min()) / (y_predict.max() - y_predict.min())

In [None]:
median_absolute_error(y_test, y_predict)

In [None]:
np.sqrt(mean_squared_log_error(y_test, y_predict))

In [None]:
r2_score(y_test, y_predict)

## Decision Tree Regressor

In [None]:
decision_tree = DecisionTreeRegressor()

In [None]:
param_grid = {
    'criterion': ['squared_error', 'friedman_mse'],
    'max_depth': range(10,20),
    'min_samples_split': range(2,10),
}

grid_search_cv = GridSearchCV(estimator = decision_tree, param_grid = param_grid, cv = 5,
                              scoring = ['neg_mean_squared_log_error', 'neg_median_absolute_error', 'r2'], refit = 'neg_mean_squared_log_error',
                              verbose = 1, n_jobs = -1)

grid_search_cv_fit = grid_search_cv.fit(x_train, y_train)

In [None]:
print('Best Score: ', grid_search_cv_fit.best_score_)
print('Best Params: ', grid_search_cv_fit.best_params_)

In [None]:
y_predict = grid_search_cv_fit.best_estimator_.predict(x_test)

In [None]:
y_predict = (y_predict - y_predict.min()) / (y_predict.max() - y_predict.min())

In [None]:
median_absolute_error(y_test, y_predict)

In [None]:
np.sqrt(mean_squared_log_error(y_test, y_predict))

In [None]:
r2_score(y_test, y_predict)

## Water Spring

In [None]:
water_spring.head()

In [None]:
water_spring['mean_rainfall'] = (water_spring['mean_rainfall'] - water_spring['mean_rainfall'].min()) / (water_spring['mean_rainfall'].max() - water_spring['mean_rainfall'].min())
water_spring['mean_temperature'] = (water_spring['mean_temperature'] - water_spring['mean_temperature'].min()) / (water_spring['mean_temperature'].max() - water_spring['mean_temperature'].min())
water_spring['actual_depth'] = (water_spring['actual_depth'] - water_spring['actual_depth'].min()) / (water_spring['actual_depth'].max() - water_spring['actual_depth'].min())
water_spring['actual_flow_rate'] = (water_spring['actual_flow_rate'] - water_spring['actual_flow_rate'].min()) / (water_spring['actual_flow_rate'].max() - water_spring['actual_flow_rate'].min())

In [None]:
water_spring.head()

In [None]:
X = water_spring.drop(['Date', 'actual_flow_rate'], axis = 1)
y = water_spring['actual_flow_rate']

In [None]:
x_train, x_test, y_train, y_test = train_test_split(X, y, test_size = 0.25, random_state = 42)

## KNN

In [None]:
knn_classifier = KNeighborsRegressor()

In [None]:
param_grid = {
    'n_neighbors': [3, 5, 7, 9, 11, 13],
    'weights': ['uniform', 'distance'],
    'metric': ['euclidean', 'manhattan']
}

grid_search_cv = GridSearchCV(estimator = knn_classifier, param_grid = param_grid, cv = 5,
                              scoring = ['neg_mean_squared_log_error', 'neg_median_absolute_error', 'r2'], refit = 'neg_mean_squared_log_error',
                              verbose = 1, n_jobs = -1)

grid_search_cv_fit = grid_search_cv.fit(x_train, y_train)

In [None]:
print('Best Score: ', grid_search_cv_fit.best_score_)
print('Best Params: ', grid_search_cv_fit.best_params_)

In [None]:
y_predict = grid_search_cv_fit.best_estimator_.predict(x_test)

In [None]:
y_predict = (y_predict - y_predict.min()) / (y_predict.max() - y_predict.min())

In [None]:
median_absolute_error(y_test, y_predict)

In [None]:
np.sqrt(mean_squared_log_error(y_test, y_predict))

In [None]:
r2_score(y_test, y_predict)

## Linear Regression

In [None]:
linear_regression = LinearRegression()

In [None]:
linear_regression = linear_regression.fit(x_train, y_train)

In [None]:
y_predict = linear_regression.predict(x_test)

In [None]:
y_predict = (y_predict - y_predict.min()) / (y_predict.max() - y_predict.min())

In [None]:
median_absolute_error(y_test, y_predict)

In [None]:
np.sqrt(mean_squared_log_error(y_test, y_predict))

In [None]:
r2_score(y_test, y_predict)

## Decision Tree Regressor

In [None]:
decision_tree = DecisionTreeRegressor()

In [None]:
param_grid = {
    'criterion': ['squared_error', 'friedman_mse'],
    'max_depth': range(10,20),
    'min_samples_split': range(2,10),
}

grid_search_cv = GridSearchCV(estimator = decision_tree, param_grid = param_grid, cv = 5,
                              scoring = ['neg_mean_squared_log_error', 'neg_median_absolute_error', 'r2'], refit = 'neg_mean_squared_log_error',
                              verbose = 1, n_jobs = -1)

grid_search_cv_fit = grid_search_cv.fit(x_train, y_train)

In [None]:
print('Best Score: ', grid_search_cv_fit.best_score_)
print('Best Params: ', grid_search_cv_fit.best_params_)

In [None]:
y_predict = grid_search_cv_fit.best_estimator_.predict(x_test)

In [None]:
y_predict = (y_predict - y_predict.min()) / (y_predict.max() - y_predict.min())

In [None]:
median_absolute_error(y_test, y_predict)

In [None]:
np.sqrt(mean_squared_log_error(y_test, y_predict))

In [None]:
r2_score(y_test, y_predict)