<a href="https://colab.research.google.com/github/ferdouszislam/Weather-WaterLevel-Prediction-ML/blob/main/Notebooks/apply_ml_algo.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [2]:
# random seed
RAND_SEED = 42

## Regression

In [3]:
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import KFold, cross_val_score, GridSearchCV
from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error
from math import sqrt

In [4]:
def train_regression(model, param_grid, X_train, y_train):

  # 10-fold cross validation
  cv = KFold(n_splits=10, random_state=RAND_SEED, shuffle= True)

  # use gridsearch to check all values in param_grid
  model = GridSearchCV(model, param_grid, scoring=['r2', 'neg_mean_absolute_error', 'neg_root_mean_squared_error'], refit='r2', cv=cv)
  # fit model to data
  model.fit(X_train, y_train)

  selected_hyperparams = model.best_params_
  train_r2 = round(model.cv_results_['mean_test_r2'][model.best_index_], 3)
  train_mae = round(model.cv_results_['mean_test_neg_mean_absolute_error'][model.best_index_], 3)
  train_rmse = round(model.cv_results_['mean_test_neg_root_mean_squared_error'][model.best_index_], 3)

  return model, selected_hyperparams, train_r2, train_mae, train_rmse

def eval_regression(model, X_test, y_test):
  y_test_pred = model.predict(X_test)
  test_r2 = round(r2_score(y_test, y_test_pred), 3)
  test_mae = round(mean_absolute_error(y_test, y_test_pred), 3)
  test_rmse = round(sqrt(mean_squared_error(y_test, y_test_pred)), 3)

  return test_r2, test_mae, test_rmse

In [5]:
def showEvalutationGraph_regression(model, X, y, x_axis_param_name, x_axis_param_vals, selected_x_axis_param_val, other_model_params):
  # FUNCTION NOT TESTED!!!

  cv = StratifiedKFold(n_splits=10, random_state=RAND_SEED, shuffle= True)

  r2s = []
  maes = []
  rmses = []

  for x_axis_param_val in x_axis_param_vals:
    model_params = other_model_params
    model_params[x_axis_param_name] = x_axis_param_val

    model = model(**model_params)

    r2_segments = cross_val_score(model, X, y, scoring='r2',cv=cv, n_jobs=1)
    mae_segments = cross_val_score(model, X, y, scoring='neg_mean_absolute_error',cv=cv, n_jobs=1)
    rmse_segments = cross_val_score(model, X, y, scoring='neg_root_mean_squared_error',cv=cv, n_jobs=1)
    r2s.append(np.mean(r2_segments))
    maes.append(np.mean(mae_segments))
    rmses.append(np.mean(rmse_segments))

  plt.figure(figsize =(15,9))
  plt.plot(max_depth, accuracies, 'ro-',  max_depth, f1s ,'bv-', max_depth, aucs,'yo-')
  plt.axvline(x=selected_x_axis_param_val, color='k', linestyle='--')
  plt.legend(['R2','MAE','RMSE', f'selected value ({x_axis_param_name}={selected_x_axis_param_val})'], fontsize=16)
  plt.xlabel(x_axis_param_name, fontsize=18)
  plt.ylabel('R2, MAE, RMSE', fontsize=18)
  plt.xticks(fontsize=18)
  plt.yticks(fontsize=18)
  plt.show()

### Load the datasets

In [6]:
# Load the train dataset
train_df = pd.read_csv('https://raw.githubusercontent.com/ferdouszislam/Weather-WaterLevel-Prediction-ML/main/Datasets/brri-datasets/final-dataset/train/brri-weather_train_regression.csv')
X_train = train_df.drop(columns='Rainfall (mm)')
y_train = train_df['Rainfall (mm)']

# Load test set
test_df = pd.read_csv('https://raw.githubusercontent.com/ferdouszislam/Weather-WaterLevel-Prediction-ML/main/Datasets/brri-datasets/final-dataset/test/brri-weather_test_regression.csv')
X_test = test_df.drop(columns='Rainfall (mm)')
y_test = test_df['Rainfall (mm)']

### Apply Linear Regression

In [7]:
# Linear Regression
model = LinearRegression()
# dictionary of hyper-parameters
param_grid = {}

# train model
model, selected_hyperparams, train_r2, train_mae, train_rmse = train_regression(model, param_grid, X_train, y_train)
print(f'Selected hyperparameters: {selected_hyperparams}')
# performance on the train set
print(f'Train set performance: r2-score={train_r2}, mae={train_mae}, rmse={train_rmse}')

print()

# test model
test_r2, test_mae, test_rmse = eval_regression(model, X_test, y_test)
# performance on the test set
print(f'Test set performance: r2-score={test_r2}, mae={test_mae}, rmse={test_rmse}')

Selected hyperparameters: {}
Train set performance: r2-score=0.181, mae=-7.325, rmse=-13.543

Test set performance: r2-score=0.108, mae=7.822, rmse=16.334
