<a href="https://colab.research.google.com/github/ferdouszislam/Weather-WaterLevel-Prediction-ML/blob/main/apply_ml_algo.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [14]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [15]:
# random seed
RAND_SEED = 42

## Regression

In [16]:
from sklearn.linear_model import LinearRegression, Ridge
from sklearn.model_selection import KFold, StratifiedKFold, cross_val_score, GridSearchCV
from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error
from math import sqrt

In [17]:
def train_regression(model, param_grid, X_train, y_train):

  # 10-fold cross validation
  cv = KFold(n_splits=10, random_state=RAND_SEED, shuffle= True)

  # use gridsearch to check all values in param_grid
  model = GridSearchCV(model, param_grid, scoring=['r2', 'neg_mean_absolute_error', 'neg_root_mean_squared_error'], refit='r2', cv=cv)
  # fit model to data
  model.fit(X_train, y_train)

  selected_hyperparams = model.best_params_
  train_r2 = -1*round(model.cv_results_['mean_test_r2'][model.best_index_], 4)
  train_mae = -1*round(model.cv_results_['mean_test_neg_mean_absolute_error'][model.best_index_], 4)
  train_rmse = -1*round(model.cv_results_['mean_test_neg_root_mean_squared_error'][model.best_index_], 4)

  return model, selected_hyperparams, train_r2, train_mae, train_rmse

def eval_regression(model, X_test, y_test):
  y_test_pred = model.predict(X_test)
  test_r2 = round(r2_score(y_test, y_test_pred), 4)
  test_mae = round(mean_absolute_error(y_test, y_test_pred), 4)
  test_rmse = round(sqrt(mean_squared_error(y_test, y_test_pred)), 4)

  return test_r2, test_mae, test_rmse

### Load the datasets

In [19]:
# Load the train dataset
train_df = pd.read_csv('https://raw.githubusercontent.com/ferdouszislam/Weather-WaterLevel-Prediction-ML/main/Datasets/brri-datasets/final-dataset/train/brri-weather_train_regression.csv')
X_train = train_df.drop(columns='Rainfall (mm)')
y_train = train_df['Rainfall (mm)']

# Load test set
test_df = pd.read_csv('https://raw.githubusercontent.com/ferdouszislam/Weather-WaterLevel-Prediction-ML/main/Datasets/brri-datasets/final-dataset/test/brri-weather_test_regression.csv')
X_test = test_df.drop(columns='Rainfall (mm)')
y_test = test_df['Rainfall (mm)']

### Apply Linear Regression

In [20]:
# Linear Regression Model
model = LinearRegression()
# dictionary of hyper-parameters
param_grid = {}

# train model
model, selected_hyperparams, train_r2, train_mae, train_rmse = train_regression(model, param_grid, X_train, y_train)
print(f'Selected hyperparameters: {selected_hyperparams}')
# performance on the train set
print(f'Train set performance: r2-score={train_r2}, mae={train_mae}, rmse={train_rmse}')

print()

# test model
test_r2, test_mae, test_rmse = eval_regression(model, X_test, y_test)
# performance on the test set
print(f'Test set performance: r2-score={test_r2}, mae={test_mae}, rmse={test_rmse}')

Selected hyperparameters: {}
Train set performance: r2-score=-0.1808, mae=7.3246, rmse=13.5433

Test set performance: r2-score=0.1076, mae=7.8221, rmse=16.3342


### Apply Ridge Regression

In [29]:
# Ridge Regression Model
model = Ridge(random_state=RAND_SEED)
# dictionary of hyper-parameters
alpha_vals = [1.0]
for i in range(1, 4):
  b = 1.0/10**i
  for j in range(1, 10):
    alpha_vals.append(round(b*j, 3))
solvers = ['sparse_cg', 'cholesky', 'svd', 'lsqr', 'sag', 'saga']
param_grid = {'alpha': alpha_vals, 'solver': solvers}

# train model
model, selected_hyperparams, train_r2, train_mae, train_rmse = train_regression(model, param_grid, X_train, y_train)
print(f'Selected hyperparameters: {selected_hyperparams}')
# performance on the train set
print(f'Train set performance: r2-score={train_r2}, mae={train_mae}, rmse={train_rmse}')

print()

# test model
test_r2, test_mae, test_rmse = eval_regression(model, X_test, y_test)
# performance on the test set
print(f'Test set performance: r2-score={test_r2}, mae={test_mae}, rmse={test_rmse}')

Selected hyperparameters: {'alpha': 1.0, 'solver': 'saga'}
Train set performance: r2-score=-0.1829, mae=7.3103, rmse=13.5292

Test set performance: r2-score=0.1124, mae=7.7839, rmse=16.2905


## Classification