<a href="https://colab.research.google.com/github/Maryam-Afshari/thesis/blob/master/MLR_Basel.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# checking out the GPU
gpu_info = !nvidia-smi
gpu_info = '\n'.join(gpu_info)
if gpu_info.find('failed') >= 0:
  print('Not connected to a GPU')
else:
  print(gpu_info)

Sat Jun 18 19:27:50 2022       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 460.32.03    Driver Version: 460.32.03    CUDA Version: 11.2     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla T4            Off  | 00000000:00:04.0 Off |                    0 |
| N/A   42C    P8     9W /  70W |      0MiB / 15109MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [2]:
# checking out the memory
from psutil import virtual_memory
ram_gb = virtual_memory().total / 1e9
print('Your runtime has {:.1f} gigabytes of available RAM\n'.format(ram_gb))

if ram_gb < 20:
  print('Not using a high-RAM runtime')
else:
  print('You are using a high-RAM runtime!')

Your runtime has 27.3 gigabytes of available RAM

You are using a high-RAM runtime!


#Running MLR on Basel station 

In [4]:
#!pip install hydroeval

In [8]:
# required libraries
import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import MinMaxScaler
import hydroeval as he
import matplotlib.pyplot as plt

In [5]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [9]:
# reading data
basel= pd.read_csv('/content/drive/MyDrive/Thesis-Afshari/data/basel.csv',index_col=0)
q_basel = pd.read_csv("/content/drive/MyDrive/Thesis-Afshari/data/q_basel.csv",index_col=0)


### Feature engineering

In [10]:
# making lagged variables

# define the number of lags
TIME_STEPS = 60

#add the lagged variables for predictors to the dataframe
for i, var in enumerate(basel[["et","p","t"]]):
  for step in range(0, TIME_STEPS - 1):
    basel.insert(i*(TIME_STEPS) + 1,
                 f'{var}_lag_{TIME_STEPS - 1 - step}',
                 basel[var].shift(TIME_STEPS - 1 - step))

#remove the first TIME_STEPS - 1 rows since they will contain NA values
basel = basel.iloc[TIME_STEPS - 1:,:].reset_index(drop=True)
q_basel = q_basel.iloc[TIME_STEPS - 1:,:].reset_index(drop=True)

  if self.run_code(code, result):


### Train-test split and defineing variables


In [11]:
# train-test split
len_ = int(0.75 * basel.shape[0])

df_train = basel[:len_]
df_test = basel[len_:]

# x-train without lagged vars
X_train = df_train[["et","t","p"]]

# x-train with lagged vars
X_train_lagged = df_train.drop(columns = ["obs","datetime"], axis =1)
y_train = df_train.obs

# x-test without lagged vars
X_test = df_test[["et","t","p"]]

# x-test with lagged vars
X_test_lagged = df_test.drop(columns = ["obs","datetime"], axis =1)
y_test = df_test.obs

### Normalising data

In [None]:
# normalising perdictors 
scaler = MinMaxScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# normalising perdictors for model with the lagged varibales
scaler_lagged = MinMaxScaler()
X_train_lagged = scaler_lagged.fit_transform(X_train_lagged)
X_test_lagged = scaler_lagged.transform(X_test_lagged)

### Model

In [12]:
# creating an object of LinearRegression class
lm = LinearRegression()

# fitting the training data
lm.fit(X_train,y_train)

# evaluating the model
lm.score(X_test, y_test)

0.12075166081013522

In [13]:
# creating an object of LinearRegression class
lm_lag = LinearRegression()

# fitting the training data for model with the lagged variables
lm_lag.fit(X_train_lagged,y_train)

# evaluating the model
lm_lag.score(X_test_lagged, y_test)

0.6796257969847803

### Evaluation

In [14]:
# evaluate pcr model using nse and kge
streamflow_pcr = q_basel.pcr[len_:]
nse_pcr = he.evaluator(he.nse, streamflow_pcr,y_test)
kge_pcr, r_pcr, alpha_pcr, beta_pcr = he.evaluator(he.kge, streamflow_pcr, y_test)
print("The nse and kge of the PCR model are {:.2} and {:.2} respectively.".format(nse_pcr[0], kge_pcr[0]))

# predicting for model with only meteorological variables
y_pred =  lm.predict(X_test)

# predicting for model with meteorological variables + lagged variables
y_pred_lagged = lm_lag.predict(X_test_lagged)

# evaluate the prediction using nse and kge for model with only meteorological variables
nse = he.evaluator(he.nse, y_pred, y_test)
kge, r, alpha, beta = he.evaluator(he.kge, y_pred, y_test)

# evaluate the prediction using nse and kge for model with meteorological variables + lagged variables
nse_lag = he.evaluator(he.nse, y_pred_lagged, y_test)
kge_lag, r_lag, alpha_lag, beta_lag = he.evaluator(he.kge, y_pred_lagged, y_test)
print("The nse and kge of the linear model are {:.2} and {:.2} respectively.".format(nse[0], kge[0]))
print("The nse and kge of the linear model including lagged variables are {:.2} and {:.2} respectively.".format(nse_lag[0], kge_lag[0]))

The nse and kge of the PCR model are 0.22 and 0.64 respectively.
The nse and kge of the linear model are 0.12 and 0.12 respectively.
The nse and kge of the linear model including lagged variables are 0.68 and 0.72 respectively.
