In [None]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
import torch.utils.data as data
from datetime import date
from datetime import timedelta
from datetime import datetime
from scipy.stats import t
from google.colab import files

In [None]:
targets = pd.read_csv('https://data.ecoforecast.org/neon4cast-targets/aquatics/aquatics-targets.csv.gz')
targets = targets.dropna()
targetsTemp = targets[targets.variable == "temperature"]
targetsOxygen = targets[targets.variable == "oxygen"]
targetsChla = targets[targets.variable == "chla"]

In [None]:
# defineing all the function we will use

#this function will turn our data in to a tensor for our model to able to use
def create_dataset(dataset, lb):
    X, y = [], []
    for i in range(len(dataset) - lb):
        x_i = dataset[i:i+lb]
        y_i = dataset[i+1:i+lb+1]
        X.append(x_i)
        y.append(y_i)
    return torch.tensor(X), torch.tensor(y)


# def the model
class WaterQmodel(nn.Module):
    def __init__(self):
        super().__init__()
        self.lstm = nn.LSTM(input_size=1, hidden_size=10, num_layers=1, batch_first=True)
        self.linear = nn.Linear(10, 1)
    def forward(self, x):
        x, _ = self.lstm(x)
        x = self.linear(x)
        return x

#Train the data
def training(model,number_epochs):
  dataloads = torch.utils.data.DataLoader(torch.utils.data.TensorDataset(X_train, y_train), shuffle=True, batch_size=9)
  optimizer = optim.Adam(model.parameters())
  lossfn = nn.MSELoss()
  for epoch in range(number_epochs):
    model.train()
    for X_batch, y_batch in dataloads:
        y_pred = model(X_batch)
        loss = lossfn(y_pred, y_batch)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    model.eval()
    if epoch % 100 != 0:
        continue
    with torch.no_grad():
        y_pred = model(X_train)
        train_rmse = np.sqrt(lossfn(y_pred, y_train))
    print("Epoch %d: train RMSE %.4f" % (epoch, train_rmse))

# prediction 
def predict(timeseries, data , types = ('temperature','oxygen','chla'),site_id = ('BARC','CRAM','LIRO','PRLA','PRPO','SUGG','TOOK')):
  edate = datetime.strptime(data.iloc[-1, 0], "%Y-%m-%d").date()
  date_left = (today - edate).days
  number_of_days_to_pred = date_left + 30
  input_seq = torch.Tensor(timeseries)
  output = model(input_seq)
  j = 0
  i = -1
  pred = [0]*number_of_days_to_pred

  while i > -1*number_of_days_to_pred-1:
    pred[j] = output[i].item()
    i -= 1
    j += 1

  ref_date = today - timedelta(days = (today - edate).days)

  date_range = [0]*number_of_days_to_pred

  for i in range (number_of_days_to_pred):
    date = ref_date + timedelta(days = i+1)
    date_range[i] = date.strftime("%Y-%m-%d")
  variable = [types]*number_of_days_to_pred
  site_id = [site_id]*number_of_days_to_pred

  dat = {'datetime':date_range, 'site_id': site_id,'variable':variable, 'prediction':pred}
  dataf = pd.DataFrame(dat, columns = ['datetime', 'site_id','variable', 'prediction'])
  dataf = dataf.iloc[date_left:, :].reset_index(drop=True)
  dataf.insert(2, 'family', 'ensemble')
  return dataf


BARC - Temperature,Oxygen,Chla

In [None]:
targets_BARCO_Temp = targetsTemp[targetsTemp['site_id'].str.contains('BARC')]
data_targets_BARCO_Temp = targets_BARCO_Temp[['datetime', 'observation', 'site_id']]
data_targets_BARCO_Temp = data_targets_BARCO_Temp.sort_values('datetime')
data_targets_BARCO_Temp = data_targets_BARCO_Temp.reset_index(drop=True)
targets_BARCO_Temp = data_targets_BARCO_Temp[["observation"]].values.astype('float32')


In [None]:
X_train, y_train = create_dataset(targets_BARCO_Temp, lb=14)
today =date.today()
model = WaterQmodel()

# Define the number of times to train the model
num_trainings = 3

# Initialize an DataFrame
results_df = pd.DataFrame()
for i in range(num_trainings):

    training(model,number_epochs=300)

    predictions = predict(targets_BARCO_Temp , data_targets_BARCO_Temp ,types = 'temperature' , site_id = 'BARC')
    predictions.insert(3, 'parameters', i+1)
    results_i = pd.DataFrame(predictions)
    if i == 0:
        results_df = results_i
    else:
        results_df = pd.concat([results_df,results_i], ignore_index=True)

BARC_Temp = results_df
BARC_Temp

  return torch.tensor(X), torch.tensor(y)


Epoch 0: train RMSE 22.6660
Epoch 100: train RMSE 0.4453
Epoch 200: train RMSE 0.4392
Epoch 0: train RMSE 0.4362
Epoch 100: train RMSE 0.4365
Epoch 200: train RMSE 0.4306
Epoch 0: train RMSE 0.4332
Epoch 100: train RMSE 0.4257
Epoch 200: train RMSE 0.4254


Unnamed: 0,datetime,site_id,family,parameters,variable,prediction
0,2023-04-12,BARC,ensemble,1,temperature,21.162157
1,2023-04-13,BARC,ensemble,1,temperature,20.957037
2,2023-04-14,BARC,ensemble,1,temperature,20.958063
3,2023-04-15,BARC,ensemble,1,temperature,21.443325
4,2023-04-16,BARC,ensemble,1,temperature,21.758297
...,...,...,...,...,...,...
85,2023-05-07,BARC,ensemble,3,temperature,23.492336
86,2023-05-08,BARC,ensemble,3,temperature,23.511312
87,2023-05-09,BARC,ensemble,3,temperature,23.048660
88,2023-05-10,BARC,ensemble,3,temperature,22.862291


In [None]:
targets_BARCO_Oxy = targetsOxygen[targetsOxygen['site_id'].str.contains('BARC')]
data_targets_BARCO_Oxy = targets_BARCO_Oxy[['datetime', 'observation', 'site_id']]
data_targets_BARCO_Oxy = data_targets_BARCO_Oxy.sort_values('datetime')
data_targets_BARCO_Oxy = data_targets_BARCO_Oxy.reset_index(drop=True)
targets_BARCO_Oxy = data_targets_BARCO_Oxy[["observation"]].values.astype('float32')

In [None]:
X_train, y_train = create_dataset(targets_BARCO_Oxy, lb=14)
today =date.today()
model = WaterQmodel()

# Define the number of times to train the model
num_trainings = 3

# Initialize an DataFrame
results_df = pd.DataFrame()
for i in range(num_trainings):

    training(model,number_epochs=300)

    predictions = predict(targets_BARCO_Oxy , data_targets_BARCO_Oxy ,types = 'oxygen' , site_id = 'BARC')
    predictions.insert(3, 'parameters', i+1)
    results_i = pd.DataFrame(predictions)
    if i == 0:
        results_df = results_i
    else:
        results_df = pd.concat([results_df,results_i], ignore_index=True)

BARC_Oxy = results_df 
BARC_Oxy

Epoch 0: train RMSE 5.9290
Epoch 100: train RMSE 0.2616
Epoch 200: train RMSE 0.2561
Epoch 0: train RMSE 0.2524
Epoch 100: train RMSE 0.2490
Epoch 200: train RMSE 0.2441
Epoch 0: train RMSE 0.2417
Epoch 100: train RMSE 0.2396
Epoch 200: train RMSE 0.2386


Unnamed: 0,datetime,site_id,family,parameters,variable,prediction
0,2023-04-12,BARC,ensemble,1,oxygen,8.131665
1,2023-04-13,BARC,ensemble,1,oxygen,8.120245
2,2023-04-14,BARC,ensemble,1,oxygen,8.068915
3,2023-04-15,BARC,ensemble,1,oxygen,8.001410
4,2023-04-16,BARC,ensemble,1,oxygen,8.131119
...,...,...,...,...,...,...
85,2023-05-07,BARC,ensemble,3,oxygen,8.073325
86,2023-05-08,BARC,ensemble,3,oxygen,8.073096
87,2023-05-09,BARC,ensemble,3,oxygen,8.100517
88,2023-05-10,BARC,ensemble,3,oxygen,8.196108


In [None]:
targets_BARCO_Chla = targetsChla[targetsChla['site_id'].str.contains('BARC')]
data_targets_BARCO_Chla = targets_BARCO_Chla[['datetime', 'observation', 'site_id']]
data_targets_BARCO_Chla = data_targets_BARCO_Chla.sort_values('datetime')
data_targets_BARCO_Chla = data_targets_BARCO_Chla.reset_index(drop=True)
targets_BARCO_Chla = data_targets_BARCO_Chla[["observation"]].values.astype('float32')

In [None]:
X_train, y_train = create_dataset(targets_BARCO_Chla, lb=14)
today =date.today()
model = WaterQmodel()

# Define the number of times to train the model
num_trainings = 3

# Initialize an DataFrame
results_df = pd.DataFrame()
for i in range(num_trainings):

    training(model,number_epochs=300)

    predictions = predict(targets_BARCO_Chla , data_targets_BARCO_Chla ,types = 'chla' , site_id = 'BARC')
    predictions.insert(3, 'parameters', i+1)
    results_i = pd.DataFrame(predictions)
    if i == 0:
        results_df = results_i
    else:
        results_df = pd.concat([results_df,results_i], ignore_index=True)

BARC_Chla = results_df 
BARC_Chla

Epoch 0: train RMSE 4.4422
Epoch 100: train RMSE 2.5130
Epoch 200: train RMSE 1.4285
Epoch 0: train RMSE 0.9838
Epoch 100: train RMSE 0.8813
Epoch 200: train RMSE 0.8606
Epoch 0: train RMSE 0.8559
Epoch 100: train RMSE 0.8482
Epoch 200: train RMSE 0.8403


Unnamed: 0,datetime,site_id,family,parameters,variable,prediction
0,2023-04-12,BARC,ensemble,1,chla,1.085543
1,2023-04-13,BARC,ensemble,1,chla,1.192252
2,2023-04-14,BARC,ensemble,1,chla,1.328143
3,2023-04-15,BARC,ensemble,1,chla,1.146902
4,2023-04-16,BARC,ensemble,1,chla,1.019750
...,...,...,...,...,...,...
85,2023-05-07,BARC,ensemble,3,chla,1.298118
86,2023-05-08,BARC,ensemble,3,chla,1.353152
87,2023-05-09,BARC,ensemble,3,chla,1.518216
88,2023-05-10,BARC,ensemble,3,chla,1.548364


In [None]:
# download as csv
BARC = pd.concat([BARC_Temp, BARC_Oxy, BARC_Chla], axis=0, ignore_index=True)
BARC.to_csv('BARC.csv', encoding = 'utf-8-sig') 
files.download('BARC.csv')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

CRAM - Temperature, Oxygen, Chla


In [None]:
targets_CRAM_Temp = targetsTemp[targetsTemp['site_id'].str.contains('CRAM')]
data_targets_CRAM_Temp = targets_CRAM_Temp[['datetime', 'observation', 'site_id']]
data_targets_CRAM_Temp = data_targets_CRAM_Temp.sort_values('datetime')
data_targets_CRAM_Temp = data_targets_CRAM_Temp.reset_index(drop=True)
targets_CRAM_Temp = data_targets_CRAM_Temp[["observation"]].values.astype('float32')

In [None]:
X_train, y_train = create_dataset(targets_CRAM_Temp, lb=14)
today =date.today()
model = WaterQmodel()

# Define the number of times to train the model
num_trainings = 3

# Initialize an DataFrame
results_df = pd.DataFrame()
for i in range(num_trainings):

    training(model,number_epochs=300)

    predictions = predict(targets_CRAM_Temp , data_targets_CRAM_Temp ,types = 'temperature' , site_id = 'CRAM')
    predictions.insert(3, 'parameters', i+1)
    results_i = pd.DataFrame(predictions)
    if i == 0:
        results_df = results_i
    else:
        results_df = pd.concat([results_df,results_i], ignore_index=True)

CRAM_Temp = results_df
CRAM_Temp

Epoch 0: train RMSE 18.2854
Epoch 100: train RMSE 0.8436
Epoch 200: train RMSE 0.7368
Epoch 0: train RMSE 0.7022
Epoch 100: train RMSE 0.6882
Epoch 200: train RMSE 0.6858
Epoch 0: train RMSE 0.6669
Epoch 100: train RMSE 0.6624
Epoch 200: train RMSE 0.6555


Unnamed: 0,datetime,site_id,family,parameters,variable,prediction
0,2023-04-12,CRAM,ensemble,1,temperature,20.876564
1,2023-04-13,CRAM,ensemble,1,temperature,21.034580
2,2023-04-14,CRAM,ensemble,1,temperature,19.822609
3,2023-04-15,CRAM,ensemble,1,temperature,20.550926
4,2023-04-16,CRAM,ensemble,1,temperature,22.640251
...,...,...,...,...,...,...
85,2023-05-07,CRAM,ensemble,3,temperature,15.954968
86,2023-05-08,CRAM,ensemble,3,temperature,14.573168
87,2023-05-09,CRAM,ensemble,3,temperature,15.152077
88,2023-05-10,CRAM,ensemble,3,temperature,15.483828


In [None]:
targets_CRAM_Oxy = targetsOxygen[targetsOxygen['site_id'].str.contains('CRAM')]
data_targets_CRAM_Oxy = targets_CRAM_Oxy[['datetime', 'observation', 'site_id']]
data_targets_CRAM_Oxy = data_targets_CRAM_Oxy.sort_values('datetime')
data_targets_CRAM_Oxy = data_targets_CRAM_Oxy.reset_index(drop=True)
targets_CRAM_Oxy = data_targets_CRAM_Oxy[["observation"]].values.astype('float32')

In [None]:
X_train, y_train = create_dataset(targets_CRAM_Oxy, lb=14)
today =date.today()
model = WaterQmodel()

# Define the number of times to train the model
num_trainings = 3

# Initialize an DataFrame
results_df = pd.DataFrame()
for i in range(num_trainings):

    training(model,number_epochs=300)

    predictions = predict(targets_CRAM_Oxy , data_targets_CRAM_Oxy ,types = 'oxygen' , site_id = 'CRAM')
    predictions.insert(3, 'parameters', i+1)
    results_i = pd.DataFrame(predictions)
    if i == 0:
        results_df = results_i
    else:
        results_df = pd.concat([results_df,results_i], ignore_index=True)

CRAM_Oxy = results_df 
CRAM_Oxy

Epoch 0: train RMSE 8.2479
Epoch 100: train RMSE 0.9110
Epoch 200: train RMSE 0.3052
Epoch 0: train RMSE 0.2075
Epoch 100: train RMSE 0.1912
Epoch 200: train RMSE 0.1895
Epoch 0: train RMSE 0.1881
Epoch 100: train RMSE 0.1830
Epoch 200: train RMSE 0.1775


Unnamed: 0,datetime,site_id,family,parameters,variable,prediction
0,2023-04-12,CRAM,ensemble,1,oxygen,6.956359
1,2023-04-13,CRAM,ensemble,1,oxygen,9.084452
2,2023-04-14,CRAM,ensemble,1,oxygen,9.290710
3,2023-04-15,CRAM,ensemble,1,oxygen,8.542192
4,2023-04-16,CRAM,ensemble,1,oxygen,8.481756
...,...,...,...,...,...,...
85,2023-05-07,CRAM,ensemble,3,oxygen,8.022076
86,2023-05-08,CRAM,ensemble,3,oxygen,8.137622
87,2023-05-09,CRAM,ensemble,3,oxygen,8.068714
88,2023-05-10,CRAM,ensemble,3,oxygen,8.440280


In [None]:
targets_CRAM_Chla = targetsChla[targetsChla['site_id'].str.contains('CRAM')]
data_targets_CRAM_Chla = targets_CRAM_Chla[['datetime', 'observation', 'site_id']]
data_targets_CRAM_Chla = data_targets_CRAM_Chla.sort_values('datetime')
data_targets_CRAM_Chla = data_targets_CRAM_Chla.reset_index(drop=True)
targets_CRAM_Chla = data_targets_CRAM_Chla[["observation"]].values.astype('float32')

In [None]:
X_train, y_train = create_dataset(targets_CRAM_Chla, lb=14)
today =date.today()
model = WaterQmodel()

# Define the number of times to train the model
num_trainings = 3

# Initialize an DataFrame
results_df = pd.DataFrame()
for i in range(num_trainings):

    training(model,number_epochs=300)

    predictions = predict(targets_CRAM_Chla , data_targets_CRAM_Chla ,types = 'chla' , site_id = 'CRAM')
    predictions.insert(3, 'parameters', i+1)
    results_i = pd.DataFrame(predictions)
    if i == 0:
        results_df = results_i
    else:
        results_df = pd.concat([results_df,results_i], ignore_index=True)

CRAM_Chla = results_df 
CRAM_Chla

Epoch 0: train RMSE 4.0717
Epoch 100: train RMSE 0.6866
Epoch 200: train RMSE 0.6594
Epoch 0: train RMSE 0.6405
Epoch 100: train RMSE 0.6201
Epoch 200: train RMSE 0.6067
Epoch 0: train RMSE 0.5999
Epoch 100: train RMSE 0.5931
Epoch 200: train RMSE 0.5879


Unnamed: 0,datetime,site_id,family,parameters,variable,prediction
0,2023-04-12,CRAM,ensemble,1,chla,3.539690
1,2023-04-13,CRAM,ensemble,1,chla,5.269785
2,2023-04-14,CRAM,ensemble,1,chla,4.290708
3,2023-04-15,CRAM,ensemble,1,chla,4.223272
4,2023-04-16,CRAM,ensemble,1,chla,4.769316
...,...,...,...,...,...,...
85,2023-05-07,CRAM,ensemble,3,chla,4.129078
86,2023-05-08,CRAM,ensemble,3,chla,4.060746
87,2023-05-09,CRAM,ensemble,3,chla,3.717337
88,2023-05-10,CRAM,ensemble,3,chla,3.181904


In [None]:
# download as csv
CRAM = pd.concat([CRAM_Temp, CRAM_Oxy, CRAM_Chla], axis=0, ignore_index=True)
CRAM.to_csv('CRAM.csv', encoding = 'utf-8-sig') 
files.download('CRAM.csv')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

LIRO - Temperature,Oxygen,Chla

In [None]:
targets_LIRO_Temp = targetsTemp[targetsTemp['site_id'].str.contains('LIRO')]
data_targets_LIRO_Temp = targets_LIRO_Temp[['datetime', 'observation', 'site_id']]
data_targets_LIRO_Temp = data_targets_LIRO_Temp.sort_values('datetime')
data_targets_LIRO_Temp = data_targets_LIRO_Temp.reset_index(drop=True)
targets_LIRO_Temp = data_targets_LIRO_Temp[["observation"]].values.astype('float32')

In [None]:
X_train, y_train = create_dataset(targets_LIRO_Temp, lb=14)
today =date.today()
model = WaterQmodel()

# Define the number of times to train the model
num_trainings = 3

# Initialize an DataFrame
results_df = pd.DataFrame()
for i in range(num_trainings):

    training(model,number_epochs=300)

    predictions = predict(targets_LIRO_Temp , data_targets_LIRO_Temp ,types = 'temperature' , site_id = 'LIRO')
    predictions.insert(3, 'parameters', i+1)
    results_i = pd.DataFrame(predictions)
    if i == 0:
        results_df = results_i
    else:
        results_df = pd.concat([results_df,results_i], ignore_index=True)

LIRO_Temp = results_df 
LIRO_Temp

Epoch 0: train RMSE 18.9618
Epoch 100: train RMSE 0.9341
Epoch 200: train RMSE 0.7796
Epoch 0: train RMSE 0.7568
Epoch 100: train RMSE 0.7427
Epoch 200: train RMSE 0.7315
Epoch 0: train RMSE 0.7271
Epoch 100: train RMSE 0.7311
Epoch 200: train RMSE 0.7223


Unnamed: 0,datetime,site_id,family,parameters,variable,prediction
0,2023-04-12,LIRO,ensemble,1,temperature,14.132172
1,2023-04-13,LIRO,ensemble,1,temperature,15.816753
2,2023-04-14,LIRO,ensemble,1,temperature,16.186010
3,2023-04-15,LIRO,ensemble,1,temperature,14.668315
4,2023-04-16,LIRO,ensemble,1,temperature,15.702448
...,...,...,...,...,...,...
85,2023-05-07,LIRO,ensemble,3,temperature,8.516548
86,2023-05-08,LIRO,ensemble,3,temperature,8.729434
87,2023-05-09,LIRO,ensemble,3,temperature,9.423103
88,2023-05-10,LIRO,ensemble,3,temperature,9.543606


In [None]:
targets_LIRO_Oxy = targetsOxygen[targetsOxygen['site_id'].str.contains('LIRO')]
data_targets_LIRO_Oxy = targets_LIRO_Oxy[['datetime', 'observation', 'site_id']]
data_targets_LIRO_Oxy = data_targets_LIRO_Oxy.sort_values('datetime')
data_targets_LIRO_Oxy = data_targets_LIRO_Oxy.reset_index(drop=True)
targets_LIRO_Oxy = data_targets_LIRO_Oxy[["observation"]].values.astype('float32')

In [None]:
X_train, y_train = create_dataset(targets_LIRO_Oxy, lb=14)
today =date.today()
model = WaterQmodel()

# Define the number of times to train the model
num_trainings = 3

# Initialize an DataFrame
results_df = pd.DataFrame()
for i in range(num_trainings):

    training(model,number_epochs=300)

    predictions = predict(targets_LIRO_Oxy , data_targets_LIRO_Oxy ,types = 'oxygen' , site_id = 'LIRO')
    predictions.insert(3, 'parameters', i+1)
    results_i = pd.DataFrame(predictions)
    if i == 0:
        results_df = results_i
    else:
        results_df = pd.concat([results_df,results_i], ignore_index=True)

LIRO_Oxy = results_df 
LIRO_Oxy

Epoch 0: train RMSE 8.0892
Epoch 100: train RMSE 0.9654
Epoch 200: train RMSE 0.2931
Epoch 0: train RMSE 0.2045
Epoch 100: train RMSE 0.1907
Epoch 200: train RMSE 0.1884
Epoch 0: train RMSE 0.1837
Epoch 100: train RMSE 0.1812
Epoch 200: train RMSE 0.1807


Unnamed: 0,datetime,site_id,family,parameters,variable,prediction
0,2023-04-12,LIRO,ensemble,1,oxygen,8.194643
1,2023-04-13,LIRO,ensemble,1,oxygen,8.088731
2,2023-04-14,LIRO,ensemble,1,oxygen,8.062041
3,2023-04-15,LIRO,ensemble,1,oxygen,8.102724
4,2023-04-16,LIRO,ensemble,1,oxygen,8.100031
...,...,...,...,...,...,...
85,2023-05-07,LIRO,ensemble,3,oxygen,7.257850
86,2023-05-08,LIRO,ensemble,3,oxygen,7.225492
87,2023-05-09,LIRO,ensemble,3,oxygen,7.323795
88,2023-05-10,LIRO,ensemble,3,oxygen,7.299452


In [None]:
targets_LIRO_Chla = targetsChla[targetsChla['site_id'].str.contains('LIRO')]
data_targets_LIRO_Chla = targets_LIRO_Chla[['datetime', 'observation', 'site_id']]
data_targets_LIRO_Chla = data_targets_LIRO_Chla.sort_values('datetime')
data_targets_LIRO_Chla = data_targets_LIRO_Chla.reset_index(drop=True)
targets_LIRO_Chla = data_targets_LIRO_Chla[["observation"]].values.astype('float32')

In [None]:
X_train, y_train = create_dataset(targets_LIRO_Chla, lb=14)
today =date.today()
model = WaterQmodel()

# Define the number of times to train the model
num_trainings = 3

# Initialize an DataFrame
results_df = pd.DataFrame()
for i in range(num_trainings):

    training(model,number_epochs=300)

    predictions = predict(targets_LIRO_Chla , data_targets_LIRO_Chla ,types = 'chla' , site_id = 'LIRO')
    predictions.insert(3, 'parameters', i+1)
    results_i = pd.DataFrame(predictions)
    if i == 0:
        results_df = results_i
    else:
        results_df = pd.concat([results_df,results_i], ignore_index=True)

LIRO_Chla = results_df 
LIRO_Chla

Epoch 0: train RMSE 7.2468
Epoch 100: train RMSE 3.1381
Epoch 200: train RMSE 2.7975
Epoch 0: train RMSE 2.4928
Epoch 100: train RMSE 2.4123
Epoch 200: train RMSE 2.2952
Epoch 0: train RMSE 2.1104
Epoch 100: train RMSE 2.0598
Epoch 200: train RMSE 1.9804


Unnamed: 0,datetime,site_id,family,parameters,variable,prediction
0,2023-04-12,LIRO,ensemble,1,chla,4.951863
1,2023-04-13,LIRO,ensemble,1,chla,4.650382
2,2023-04-14,LIRO,ensemble,1,chla,3.874831
3,2023-04-15,LIRO,ensemble,1,chla,5.252534
4,2023-04-16,LIRO,ensemble,1,chla,3.849034
...,...,...,...,...,...,...
85,2023-05-07,LIRO,ensemble,3,chla,2.987589
86,2023-05-08,LIRO,ensemble,3,chla,2.928113
87,2023-05-09,LIRO,ensemble,3,chla,2.673027
88,2023-05-10,LIRO,ensemble,3,chla,2.366901


In [None]:
# download as csv
LIRO = pd.concat([LIRO_Temp, LIRO_Oxy, LIRO_Chla], axis=0, ignore_index=True)
LIRO.to_csv('LIRO.csv', encoding = 'utf-8-sig') 
files.download('LIRO.csv')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

PRLA - Temperature,Oxygen,Chla

In [None]:
targets_PRLA_Temp = targetsTemp[targetsTemp['site_id'].str.contains('PRLA')]
data_targets_PRLA_Temp = targets_PRLA_Temp[['datetime', 'observation', 'site_id']]
data_targets_PRLA_Temp = data_targets_PRLA_Temp.sort_values('datetime')
data_targets_PRLA_Temp = data_targets_PRLA_Temp.reset_index(drop=True)
targets_PRLA_Temp = data_targets_PRLA_Temp[["observation"]].values.astype('float32')

In [None]:
X_train, y_train = create_dataset(targets_PRLA_Temp, lb=14)
today =date.today()
model = WaterQmodel()

# Define the number of times to train the model
num_trainings = 3

# Initialize an DataFrame
results_df = pd.DataFrame()
for i in range(num_trainings):

    training(model,number_epochs=300)

    predictions = predict(targets_PRLA_Temp , data_targets_PRLA_Temp ,types = 'temperature' , site_id = 'PRLA')
    predictions.insert(3, 'parameters', i+1)
    results_i = pd.DataFrame(predictions)
    if i == 0:
        results_df = results_i
    else:
        results_df = pd.concat([results_df,results_i], ignore_index=True)

PRLA_Temp = results_df 
PRLA_Temp

Epoch 0: train RMSE 18.0719
Epoch 100: train RMSE 1.4730
Epoch 200: train RMSE 1.2870
Epoch 0: train RMSE 1.2567
Epoch 100: train RMSE 1.2218
Epoch 200: train RMSE 1.2111
Epoch 0: train RMSE 1.1997
Epoch 100: train RMSE 1.2148
Epoch 200: train RMSE 1.1894


Unnamed: 0,datetime,site_id,family,parameters,variable,prediction
0,2023-04-12,PRLA,ensemble,1,temperature,6.049641
1,2023-04-13,PRLA,ensemble,1,temperature,5.464489
2,2023-04-14,PRLA,ensemble,1,temperature,5.545524
3,2023-04-15,PRLA,ensemble,1,temperature,5.603086
4,2023-04-16,PRLA,ensemble,1,temperature,5.508148
...,...,...,...,...,...,...
85,2023-05-07,PRLA,ensemble,3,temperature,16.583250
86,2023-05-08,PRLA,ensemble,3,temperature,17.175831
87,2023-05-09,PRLA,ensemble,3,temperature,16.946301
88,2023-05-10,PRLA,ensemble,3,temperature,17.501850


In [None]:
targets_PRLA_Oxy = targetsOxygen[targetsOxygen['site_id'].str.contains('PRLA')]
data_targets_PRLA_Oxy = targets_PRLA_Oxy[['datetime', 'observation', 'site_id']]
data_targets_PRLA_Oxy = data_targets_PRLA_Oxy.sort_values('datetime')
data_targets_PRLA_Oxy = data_targets_PRLA_Oxy.reset_index(drop=True)
targets_PRLA_Oxy = data_targets_PRLA_Oxy[["observation"]].values.astype('float32')

In [None]:
X_train, y_train = create_dataset(targets_PRLA_Oxy, lb=14)
today =date.today()
model = WaterQmodel()

# Define the number of times to train the model
num_trainings = 3

# Initialize an DataFrame
results_df = pd.DataFrame()
for i in range(num_trainings):

    training(model,number_epochs=300)

    predictions = predict(targets_PRLA_Oxy , data_targets_PRLA_Oxy ,types = 'oxygen' , site_id = 'PRLA')
    predictions.insert(3, 'parameters', i+1)
    results_i = pd.DataFrame(predictions)
    if i == 0:
        results_df = results_i
    else:
        results_df = pd.concat([results_df,results_i], ignore_index=True)

PRLA_Oxy = results_df 
PRLA_Oxy

Epoch 0: train RMSE 8.2554
Epoch 100: train RMSE 1.0768
Epoch 200: train RMSE 1.0025
Epoch 0: train RMSE 0.9475
Epoch 100: train RMSE 0.9058
Epoch 200: train RMSE 0.8845
Epoch 0: train RMSE 0.8675
Epoch 100: train RMSE 0.8566
Epoch 200: train RMSE 0.8452


Unnamed: 0,datetime,site_id,family,parameters,variable,prediction
0,2023-04-12,PRLA,ensemble,1,oxygen,3.589435
1,2023-04-13,PRLA,ensemble,1,oxygen,3.117079
2,2023-04-14,PRLA,ensemble,1,oxygen,2.835123
3,2023-04-15,PRLA,ensemble,1,oxygen,3.180810
4,2023-04-16,PRLA,ensemble,1,oxygen,2.996122
...,...,...,...,...,...,...
85,2023-05-07,PRLA,ensemble,3,oxygen,3.014874
86,2023-05-08,PRLA,ensemble,3,oxygen,4.619194
87,2023-05-09,PRLA,ensemble,3,oxygen,7.138300
88,2023-05-10,PRLA,ensemble,3,oxygen,7.751081


In [None]:
targets_PRLA_Chla = targetsChla[targetsChla['site_id'].str.contains('PRLA')]
data_targets_PRLA_Chla = targets_PRLA_Chla[['datetime', 'observation', 'site_id']]
data_targets_PRLA_Chla = data_targets_PRLA_Chla.sort_values('datetime')
data_targets_PRLA_Chla = data_targets_PRLA_Chla.reset_index(drop=True)
targets_PRLA_Chla = data_targets_PRLA_Chla[["observation"]].values.astype('float32')

In [None]:
X_train, y_train = create_dataset(targets_PRLA_Chla, lb=14)
today =date.today()
model = WaterQmodel()

# Define the number of times to train the model
num_trainings = 3

# Initialize an DataFrame
results_df = pd.DataFrame()
for i in range(num_trainings):

    training(model,number_epochs=300)

    predictions = predict(targets_PRLA_Chla , data_targets_PRLA_Chla ,types = 'chla' , site_id = 'PRLA')
    predictions.insert(3, 'parameters', i+1)
    results_i = pd.DataFrame(predictions)
    if i == 0:
        results_df = results_i
    else:
        results_df = pd.concat([results_df,results_i], ignore_index=True)

PRLA_Chla = results_df 
PRLA_Chla

Epoch 0: train RMSE 37.9291
Epoch 100: train RMSE 19.6159
Epoch 200: train RMSE 14.6318
Epoch 0: train RMSE 11.1564
Epoch 100: train RMSE 9.1690
Epoch 200: train RMSE 8.2396
Epoch 0: train RMSE 7.5874
Epoch 100: train RMSE 7.3830
Epoch 200: train RMSE 7.3149


Unnamed: 0,datetime,site_id,family,parameters,variable,prediction
0,2023-04-12,PRLA,ensemble,1,chla,8.811110
1,2023-04-13,PRLA,ensemble,1,chla,9.113152
2,2023-04-14,PRLA,ensemble,1,chla,8.828966
3,2023-04-15,PRLA,ensemble,1,chla,12.110043
4,2023-04-16,PRLA,ensemble,1,chla,10.393335
...,...,...,...,...,...,...
85,2023-05-07,PRLA,ensemble,3,chla,37.317558
86,2023-05-08,PRLA,ensemble,3,chla,43.324371
87,2023-05-09,PRLA,ensemble,3,chla,52.102089
88,2023-05-10,PRLA,ensemble,3,chla,35.982727


In [None]:
# download as csv
PRLA = pd.concat([PRLA_Temp, PRLA_Oxy, PRLA_Chla], axis=0, ignore_index=True)
PRLA.to_csv('PRLA.csv', encoding = 'utf-8-sig') 
files.download('PRLA.csv')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

PRPO - Temperature,Oxygen,Chla

In [None]:
targets_PRPO_Temp = targetsTemp[targetsTemp['site_id'].str.contains('PRPO')]
data_targets_PRPO_Temp = targets_PRPO_Temp[['datetime', 'observation', 'site_id']]
data_targets_PRPO_Temp = data_targets_PRPO_Temp.sort_values('datetime')
data_targets_PRPO_Temp = data_targets_PRPO_Temp.reset_index(drop=True)
targets_PRPO_Temp = data_targets_PRPO_Temp[["observation"]].values.astype('float32')

In [None]:
X_train, y_train = create_dataset(targets_PRPO_Temp, lb=14)
today =date.today()
model = WaterQmodel()

# Define the number of times to train the model
num_trainings = 3

# Initialize an DataFrame
results_df = pd.DataFrame()
for i in range(num_trainings):

    training(model,number_epochs=300)

    predictions = predict(targets_PRPO_Temp , data_targets_PRPO_Temp ,types = 'temperature' , site_id = 'PRPO')
    predictions.insert(3, 'parameters', i+1)
    results_i = pd.DataFrame(predictions)
    if i == 0:
        results_df = results_i
    else:
        results_df = pd.concat([results_df,results_i], ignore_index=True)

PRPO_Temp = results_df
PRPO_Temp

Epoch 0: train RMSE 18.4981
Epoch 100: train RMSE 1.1931
Epoch 200: train RMSE 1.1272
Epoch 0: train RMSE 1.0775
Epoch 100: train RMSE 1.0145
Epoch 200: train RMSE 0.9844
Epoch 0: train RMSE 0.9700
Epoch 100: train RMSE 0.9538
Epoch 200: train RMSE 0.9460


Unnamed: 0,datetime,site_id,family,parameters,variable,prediction
0,2023-04-12,PRPO,ensemble,1,temperature,16.617283
1,2023-04-13,PRPO,ensemble,1,temperature,15.541849
2,2023-04-14,PRPO,ensemble,1,temperature,13.418247
3,2023-04-15,PRPO,ensemble,1,temperature,12.689926
4,2023-04-16,PRPO,ensemble,1,temperature,10.345356
...,...,...,...,...,...,...
85,2023-05-07,PRPO,ensemble,3,temperature,5.773472
86,2023-05-08,PRPO,ensemble,3,temperature,5.919339
87,2023-05-09,PRPO,ensemble,3,temperature,5.496747
88,2023-05-10,PRPO,ensemble,3,temperature,5.154621


In [None]:
targets_PRPO_Oxy = targetsOxygen[targetsOxygen['site_id'].str.contains('PRPO')]
data_targets_PRPO_Oxy = targets_PRPO_Oxy[['datetime', 'observation', 'site_id']]
data_targets_PRPO_Oxy = data_targets_PRPO_Oxy.sort_values('datetime')
data_targets_PRPO_Oxy = data_targets_PRPO_Oxy.reset_index(drop=True)
targets_PRPO_Oxy = data_targets_PRPO_Oxy[["observation"]].values.astype('float32')

In [None]:
X_train, y_train = create_dataset(targets_PRPO_Oxy, lb=14)
today =date.today()
model = WaterQmodel()

# Define the number of times to train the model
num_trainings = 3

# Initialize an DataFrame
results_df = pd.DataFrame()
for i in range(num_trainings):

    training(model,number_epochs=300)

    predictions = predict(targets_PRPO_Oxy , data_targets_PRPO_Oxy ,types = 'oxygen' , site_id = 'PRPO')
    predictions.insert(3, 'parameters', i+1)
    results_i = pd.DataFrame(predictions)
    if i == 0:
        results_df = results_i
    else:
        results_df = pd.concat([results_df,results_i], ignore_index=True)

PRPO_Oxy = results_df 
PRPO_Oxy

Epoch 0: train RMSE 7.9896
Epoch 100: train RMSE 0.9704
Epoch 200: train RMSE 0.9167
Epoch 0: train RMSE 0.8811
Epoch 100: train RMSE 0.8610
Epoch 200: train RMSE 0.8481
Epoch 0: train RMSE 0.8349
Epoch 100: train RMSE 0.8257
Epoch 200: train RMSE 0.8178


Unnamed: 0,datetime,site_id,family,parameters,variable,prediction
0,2023-04-12,PRPO,ensemble,1,oxygen,10.992669
1,2023-04-13,PRPO,ensemble,1,oxygen,10.457038
2,2023-04-14,PRPO,ensemble,1,oxygen,10.139423
3,2023-04-15,PRPO,ensemble,1,oxygen,10.008495
4,2023-04-16,PRPO,ensemble,1,oxygen,9.814295
...,...,...,...,...,...,...
85,2023-05-07,PRPO,ensemble,3,oxygen,9.216803
86,2023-05-08,PRPO,ensemble,3,oxygen,9.469707
87,2023-05-09,PRPO,ensemble,3,oxygen,9.517960
88,2023-05-10,PRPO,ensemble,3,oxygen,9.544353


In [None]:
targets_PRPO_Chla = targetsChla[targetsChla['site_id'].str.contains('PRPO')]
data_targets_PRPO_Chla = targets_PRPO_Chla[['datetime', 'observation', 'site_id']]
data_targets_PRPO_Chla = data_targets_PRPO_Chla.sort_values('datetime')
data_targets_PRPO_Chla = data_targets_PRPO_Chla.reset_index(drop=True)
targets_PRPO_Chla = data_targets_PRPO_Chla[["observation"]].values.astype('float32')

In [None]:
X_train, y_train = create_dataset(targets_PRPO_Chla, lb=14)
today =date.today()
model = WaterQmodel()

# Define the number of times to train the model
num_trainings = 3

# Initialize an DataFrame
results_df = pd.DataFrame()
for i in range(num_trainings):

    training(model,number_epochs=300)

    predictions = predict(targets_PRPO_Chla , data_targets_PRPO_Chla ,types = 'chla' , site_id = 'PRPO')
    predictions.insert(3, 'parameters', i+1)
    results_i = pd.DataFrame(predictions)
    if i == 0:
        results_df = results_i
    else:
        results_df = pd.concat([results_df,results_i], ignore_index=True)

PRPO_Chla = results_df 
PRPO_Chla

Epoch 0: train RMSE 30.5569
Epoch 100: train RMSE 13.6820
Epoch 200: train RMSE 8.7719
Epoch 0: train RMSE 6.3862
Epoch 100: train RMSE 5.5984
Epoch 200: train RMSE 5.0567
Epoch 0: train RMSE 4.6501
Epoch 100: train RMSE 4.4777
Epoch 200: train RMSE 4.4264


Unnamed: 0,datetime,site_id,family,parameters,variable,prediction
0,2023-04-12,PRPO,ensemble,1,chla,11.591595
1,2023-04-13,PRPO,ensemble,1,chla,26.913530
2,2023-04-14,PRPO,ensemble,1,chla,24.186258
3,2023-04-15,PRPO,ensemble,1,chla,30.406927
4,2023-04-16,PRPO,ensemble,1,chla,19.270712
...,...,...,...,...,...,...
85,2023-05-07,PRPO,ensemble,3,chla,48.296951
86,2023-05-08,PRPO,ensemble,3,chla,45.613102
87,2023-05-09,PRPO,ensemble,3,chla,29.046211
88,2023-05-10,PRPO,ensemble,3,chla,38.370472


In [None]:
# download as csv
PRPO = pd.concat([PRPO_Temp, PRPO_Oxy, PRPO_Chla], axis=0, ignore_index=True)
PRPO.to_csv('PRPO.csv', encoding = 'utf-8-sig') 
files.download('PRPO.csv')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

SUGG - Temperature,Oxygen,Chla

In [None]:
targets_SUGG_Temp = targetsTemp[targetsTemp['site_id'].str.contains('SUGG')]
data_targets_SUGG_Temp = targets_SUGG_Temp[['datetime', 'observation', 'site_id']]
data_targets_SUGG_Temp = data_targets_SUGG_Temp.sort_values('datetime')
data_targets_SUGG_Temp = data_targets_SUGG_Temp.reset_index(drop=True)
targets_SUGG_Temp = data_targets_SUGG_Temp[["observation"]].values.astype('float32')

In [None]:
X_train, y_train = create_dataset(targets_SUGG_Temp, lb=14)
today =date.today()
model = WaterQmodel()

# Define the number of times to train the model
num_trainings = 3

# Initialize an DataFrame
results_df = pd.DataFrame()
for i in range(num_trainings):

    training(model,number_epochs=300)

    predictions = predict(targets_SUGG_Temp , data_targets_SUGG_Temp ,types = 'temperature' , site_id = 'SUGG')
    predictions.insert(3, 'parameters', i+1)
    results_i = pd.DataFrame(predictions)
    if i == 0:
        results_df = results_i
    else:
        results_df = pd.concat([results_df,results_i], ignore_index=True)

SUGG_Temp = results_df
SUGG_Temp

Epoch 0: train RMSE 21.5548
Epoch 100: train RMSE 0.6312
Epoch 200: train RMSE 0.6198
Epoch 0: train RMSE 0.6115
Epoch 100: train RMSE 0.6117
Epoch 200: train RMSE 0.6096
Epoch 0: train RMSE 0.6153
Epoch 100: train RMSE 0.6016
Epoch 200: train RMSE 0.6057


Unnamed: 0,datetime,site_id,family,parameters,variable,prediction
0,2023-04-12,SUGG,ensemble,1,temperature,26.728796
1,2023-04-13,SUGG,ensemble,1,temperature,25.210363
2,2023-04-14,SUGG,ensemble,1,temperature,25.882429
3,2023-04-15,SUGG,ensemble,1,temperature,25.347649
4,2023-04-16,SUGG,ensemble,1,temperature,24.518965
...,...,...,...,...,...,...
85,2023-05-07,SUGG,ensemble,3,temperature,22.928715
86,2023-05-08,SUGG,ensemble,3,temperature,22.919071
87,2023-05-09,SUGG,ensemble,3,temperature,22.321630
88,2023-05-10,SUGG,ensemble,3,temperature,24.172655


In [None]:
targets_SUGG_Oxy = targetsOxygen[targetsOxygen['site_id'].str.contains('SUGG')]
data_targets_SUGG_Oxy = targets_SUGG_Oxy[['datetime', 'observation', 'site_id']]
data_targets_SUGG_Oxy = data_targets_SUGG_Oxy.sort_values('datetime')
data_targets_SUGG_Oxy = data_targets_SUGG_Oxy.reset_index(drop=True)
targets_SUGG_Oxy = data_targets_SUGG_Oxy[["observation"]].values.astype('float32')

In [None]:
X_train, y_train = create_dataset(targets_SUGG_Oxy, lb=14)
today =date.today()
model = WaterQmodel()

# Define the number of times to train the model
num_trainings = 3

# Initialize an DataFrame
results_df = pd.DataFrame()
for i in range(num_trainings):

    training(model,number_epochs=300)

    predictions = predict(targets_SUGG_Oxy , data_targets_SUGG_Oxy ,types = 'oxygen' , site_id = 'SUGG')
    predictions.insert(3, 'parameters', i+1)
    results_i = pd.DataFrame(predictions)
    if i == 0:
        results_df = results_i
    else:
        results_df = pd.concat([results_df,results_i], ignore_index=True)

SUGG_Oxy = results_df 
SUGG_Oxy

Epoch 0: train RMSE 4.2700
Epoch 100: train RMSE 0.3812
Epoch 200: train RMSE 0.3742
Epoch 0: train RMSE 0.3735
Epoch 100: train RMSE 0.3669
Epoch 200: train RMSE 0.3697
Epoch 0: train RMSE 0.3632
Epoch 100: train RMSE 0.3624
Epoch 200: train RMSE 0.3627


Unnamed: 0,datetime,site_id,family,parameters,variable,prediction
0,2023-04-12,SUGG,ensemble,1,oxygen,6.874405
1,2023-04-13,SUGG,ensemble,1,oxygen,6.586783
2,2023-04-14,SUGG,ensemble,1,oxygen,6.837514
3,2023-04-15,SUGG,ensemble,1,oxygen,6.963411
4,2023-04-16,SUGG,ensemble,1,oxygen,6.840862
...,...,...,...,...,...,...
85,2023-05-07,SUGG,ensemble,3,oxygen,5.848862
86,2023-05-08,SUGG,ensemble,3,oxygen,6.158765
87,2023-05-09,SUGG,ensemble,3,oxygen,6.121847
88,2023-05-10,SUGG,ensemble,3,oxygen,6.064577


In [None]:
targets_SUGG_Chla = targetsChla[targetsChla['site_id'].str.contains('SUGG')]
data_targets_SUGG_Chla = targets_SUGG_Chla[['datetime', 'observation', 'site_id']]
data_targets_SUGG_Chla = data_targets_SUGG_Chla.sort_values('datetime')
data_targets_SUGG_Chla = data_targets_SUGG_Chla.reset_index(drop=True)
targets_SUGG_Chla = data_targets_SUGG_Chla[["observation"]].values.astype('float32')

In [None]:
X_train, y_train = create_dataset(targets_SUGG_Chla, lb=14)
today =date.today()
model = WaterQmodel()

# Define the number of times to train the model
num_trainings = 3

# Initialize an DataFrame
results_df = pd.DataFrame()
for i in range(num_trainings):

    training(model,number_epochs=300)

    predictions = predict(targets_SUGG_Chla , data_targets_SUGG_Chla ,types = 'chla' , site_id = 'SUGG')
    predictions.insert(3, 'parameters', i+1)
    results_i = pd.DataFrame(predictions)
    if i == 0:
        results_df = results_i
    else:
        results_df = pd.concat([results_df,results_i], ignore_index=True)

SUGG_Chla = results_df 
SUGG_Chla

Epoch 0: train RMSE 19.2289
Epoch 100: train RMSE 4.6419
Epoch 200: train RMSE 3.7681
Epoch 0: train RMSE 3.6055
Epoch 100: train RMSE 3.5469
Epoch 200: train RMSE 3.5569
Epoch 0: train RMSE 3.5352
Epoch 100: train RMSE 3.4305
Epoch 200: train RMSE 3.4044


Unnamed: 0,datetime,site_id,family,parameters,variable,prediction
0,2023-04-12,SUGG,ensemble,1,chla,31.393679
1,2023-04-13,SUGG,ensemble,1,chla,31.220612
2,2023-04-14,SUGG,ensemble,1,chla,28.042177
3,2023-04-15,SUGG,ensemble,1,chla,26.169359
4,2023-04-16,SUGG,ensemble,1,chla,22.053532
...,...,...,...,...,...,...
85,2023-05-07,SUGG,ensemble,3,chla,10.903906
86,2023-05-08,SUGG,ensemble,3,chla,23.248987
87,2023-05-09,SUGG,ensemble,3,chla,13.565275
88,2023-05-10,SUGG,ensemble,3,chla,16.543364


In [None]:
# download as csv
SUGG = pd.concat([SUGG_Temp, SUGG_Oxy, SUGG_Chla], axis=0, ignore_index=True)
SUGG.to_csv('SUGG.csv', encoding = 'utf-8-sig') 
files.download('SUGG.csv')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

TOOK - Temperature,Oxygen,Chla

In [None]:
targets_TOOK_Temp = targetsTemp[targetsTemp['site_id'].str.contains('TOOK')]
data_targets_TOOK_Temp = targets_TOOK_Temp[['datetime', 'observation', 'site_id']]
data_targets_TOOK_Temp = data_targets_TOOK_Temp.sort_values('datetime')
data_targets_TOOK_Temp = data_targets_TOOK_Temp.reset_index(drop=True)
targets_TOOK_Temp = data_targets_TOOK_Temp[["observation"]].values.astype('float32')

In [None]:
# train the model to predict enough datas for future prediction
X_train, y_train = create_dataset(targets_TOOK_Temp, lb=14)
today =date.today()
model = WaterQmodel()
training(model,number_epochs=300)

Epoch 0: train RMSE 11.7590
Epoch 100: train RMSE 2.5627
Epoch 200: train RMSE 1.0275


In [None]:
## predict more data to predict next 30 days from today

edate = datetime.strptime(data_targets_TOOK_Temp.iloc[-1, 0], "%Y-%m-%d").date()
new = len(data_targets_TOOK_Temp)
input_seq = torch.Tensor(targets_TOOK_Temp)
output = model(input_seq)
j = 0
i = -1
pred = [0]*new
while i > -1*new - 1:
    pred[j] = output[i].item()
    i -= 1
    j += 1
ref_date = edate

date_range = [0]*new

for i in range (new):
    date = ref_date + timedelta(days = i+1)
    date_range[i] = date.strftime("%Y-%m-%d")
variable = ["temperature"]*new
site_id = ['TOOK']*new

dat = {'datetime':date_range, 'site_id': site_id, 'observation':pred}
new_Took_temperature = pd.DataFrame(dat, columns = ['datetime', 'observation' , 'site_id'])

In [None]:
# combine dataset
TOOK_Temperature = pd.concat([data_targets_TOOK_Temp,new_Took_temperature], ignore_index=True)
TOOK_Temperature

Unnamed: 0,datetime,observation,site_id
0,2019-07-24,17.255000,TOOK
1,2019-07-25,16.937692,TOOK
2,2019-07-26,16.882500,TOOK
3,2019-07-27,16.385000,TOOK
4,2019-07-30,15.249286,TOOK
...,...,...,...
327,2023-03-01,14.947304,TOOK
328,2023-03-02,15.998652,TOOK
329,2023-03-03,16.437168,TOOK
330,2023-03-04,16.238693,TOOK


In [None]:
Data_TOOK_Temp = TOOK_Temperature[['datetime', 'observation', 'site_id']]
Data_TOOK_Temp = Data_TOOK_Temp.sort_values('datetime')
Data_TOOK_Temp = Data_TOOK_Temp.reset_index(drop=True)
TOOK_Temperature = Data_TOOK_Temp[["observation"]].values.astype('float32')

In [None]:
X_train, y_train = create_dataset(TOOK_Temperature, lb=14)
today =date.today()
model = WaterQmodel()

# Define the number of times to train the model
num_trainings = 2

# Initialize an DataFrame
results_df = pd.DataFrame()
for i in range(num_trainings):

    training(model,number_epochs=50)

    predictions = predict(TOOK_Temperature , Data_TOOK_Temp ,types = 'temperature' , site_id = 'TOOK')
    predictions.insert(3, 'parameters', i+1)
    results_i = pd.DataFrame(predictions)
    if i == 0:
        results_df = results_i
    else:
        results_df = pd.concat([results_df,results_i], ignore_index=True)

TOOK_Temp = results_df 
TOOK_Temp

Epoch 0: train RMSE 11.3701
Epoch 0: train RMSE 1.8862


Unnamed: 0,datetime,site_id,family,parameters,variable,prediction
0,2023-04-12,TOOK,ensemble,1,temperature,11.878513
1,2023-04-13,TOOK,ensemble,1,temperature,11.877554
2,2023-04-14,TOOK,ensemble,1,temperature,11.868513
3,2023-04-15,TOOK,ensemble,1,temperature,11.860962
4,2023-04-16,TOOK,ensemble,1,temperature,11.839278
5,2023-04-17,TOOK,ensemble,1,temperature,11.817838
6,2023-04-18,TOOK,ensemble,1,temperature,11.806396
7,2023-04-19,TOOK,ensemble,1,temperature,11.790752
8,2023-04-20,TOOK,ensemble,1,temperature,11.817636
9,2023-04-21,TOOK,ensemble,1,temperature,11.841375


In [None]:
targets_TOOK_Oxy = targetsOxygen[targetsOxygen['site_id'].str.contains('TOOK')]
data_targets_TOOK_Oxy = targets_TOOK_Oxy[['datetime', 'observation', 'site_id']]
data_targets_TOOK_Oxy = data_targets_TOOK_Oxy.sort_values('datetime')
data_targets_TOOK_Oxy = data_targets_TOOK_Oxy.reset_index(drop=True)
targets_TOOK_Oxy = data_targets_TOOK_Oxy[["observation"]].values.astype('float32')

In [None]:
# train the model to predict enough datas for future prediction
X_train, y_train = create_dataset(targets_TOOK_Oxy, lb=14)
today =date.today()
model = WaterQmodel()
training(model,number_epochs=300)

Epoch 0: train RMSE 10.3266
Epoch 100: train RMSE 0.8794
Epoch 200: train RMSE 0.8339


In [None]:
## predict more data to predict next 30 days from today

edate = datetime.strptime(data_targets_TOOK_Oxy.iloc[-1, 0], "%Y-%m-%d").date()
new = len(data_targets_TOOK_Oxy)
input_seq = torch.Tensor(targets_TOOK_Oxy)
output = model(input_seq)
j = 0
i = -1
pred = [0]*new
while i > -1*new - 1:
    pred[j] = output[i].item()
    i -= 1
    j += 1
ref_date = edate

date_range = [0]*new

for i in range (new):
    date = ref_date + timedelta(days = i+1)
    date_range[i] = date.strftime("%Y-%m-%d")
variable = ["oxygen"]*new
site_id = ['TOOK']*new

dat = {'datetime':date_range, 'site_id': site_id, 'observation':pred}
new_Took_Oxy = pd.DataFrame(dat, columns = ['datetime', 'observation' , 'site_id'])

In [None]:
# combine dataset
TOOK_Oxy = pd.concat([data_targets_TOOK_Oxy,new_Took_Oxy], ignore_index=True)
TOOK_Oxy

Unnamed: 0,datetime,observation,site_id
0,2019-07-18,9.035748,TOOK
1,2019-07-19,9.038122,TOOK
2,2019-07-22,9.163333,TOOK
3,2019-07-23,9.173576,TOOK
4,2019-07-24,9.140769,TOOK
...,...,...,...
341,2023-03-07,10.441486,TOOK
342,2023-03-08,10.434928,TOOK
343,2023-03-09,10.391388,TOOK
344,2023-03-10,10.099438,TOOK


In [None]:
Data_TOOK_Oxy = TOOK_Oxy[['datetime', 'observation', 'site_id']]
Data_TOOK_Oxy = Data_TOOK_Oxy.sort_values('datetime')
Data_TOOK_Oxy = Data_TOOK_Oxy.reset_index(drop=True)
TOOK_Oxy = Data_TOOK_Oxy[["observation"]].values.astype('float32')

In [None]:
X_train, y_train = create_dataset(TOOK_Oxy, lb=14)
today =date.today()
model = WaterQmodel()

# Define the number of times to train the model
num_trainings = 2

# Initialize an DataFrame
results_df = pd.DataFrame()
for i in range(num_trainings):

    training(model,number_epochs=50)

    predictions = predict(TOOK_Oxy , Data_TOOK_Oxy ,types = 'oxygen' , site_id = 'TOOK')
    predictions.insert(3, 'parameters', i+1)
    results_i = pd.DataFrame(predictions)
    if i == 0:
        results_df = results_i
    else:
        results_df = pd.concat([results_df,results_i], ignore_index=True)

TOOK_Oxy = results_df 
TOOK_Oxy

Epoch 0: train RMSE 9.8968
Epoch 0: train RMSE 0.7601


Unnamed: 0,datetime,site_id,family,parameters,variable,prediction
0,2023-04-12,TOOK,ensemble,1,oxygen,10.516521
1,2023-04-13,TOOK,ensemble,1,oxygen,10.516521
2,2023-04-14,TOOK,ensemble,1,oxygen,10.516521
3,2023-04-15,TOOK,ensemble,1,oxygen,10.516521
4,2023-04-16,TOOK,ensemble,1,oxygen,10.516521
5,2023-04-17,TOOK,ensemble,1,oxygen,10.516521
6,2023-04-18,TOOK,ensemble,1,oxygen,10.516521
7,2023-04-19,TOOK,ensemble,1,oxygen,10.516521
8,2023-04-20,TOOK,ensemble,1,oxygen,10.516521
9,2023-04-21,TOOK,ensemble,1,oxygen,10.516521


In [None]:
targets_TOOK_Chla = targetsChla[targetsChla['site_id'].str.contains('TOOK')]
data_targets_TOOK_Chla = targets_TOOK_Chla[['datetime', 'observation', 'site_id']]
data_targets_TOOK_Chla = data_targets_TOOK_Chla.sort_values('datetime')
data_targets_TOOK_Chla = data_targets_TOOK_Chla.reset_index(drop=True)
targets_TOOK_Chla = data_targets_TOOK_Chla[["observation"]].values.astype('float32')

In [None]:
# train the model to predict enough datas for future prediction
X_train, y_train = create_dataset(targets_TOOK_Chla, lb=14)
today =date.today()
model = WaterQmodel()
training(model,number_epochs=300)


Epoch 0: train RMSE 1.9897
Epoch 100: train RMSE 0.3932
Epoch 200: train RMSE 0.3883


In [None]:
## predict more data to predict next 30 days from today

edate = datetime.strptime(data_targets_TOOK_Chla.iloc[-1, 0], "%Y-%m-%d").date()
new = len(data_targets_TOOK_Chla)
input_seq = torch.Tensor(targets_TOOK_Chla)
output = model(input_seq)
j = 0
i = -1
pred = [0]*new
while i > -1*new - 1:
    pred[j] = output[i].item()
    i -= 1
    j += 1
ref_date = edate

date_range = [0]*new

for i in range (new):
    date = ref_date + timedelta(days = i+1)
    date_range[i] = date.strftime("%Y-%m-%d")
variable = ["chla"]*new
site_id = ['TOOK']*new

dat = {'datetime':date_range, 'site_id': site_id, 'observation':pred}
new_Took_Chla = pd.DataFrame(dat, columns = ['datetime', 'observation' , 'site_id'])

In [None]:
# combine dataset
TOOK_Chla = pd.concat([data_targets_TOOK_Chla,new_Took_Chla], ignore_index=True)
TOOK_Chla

Unnamed: 0,datetime,observation,site_id
0,2019-07-18,1.582126,TOOK
1,2019-07-19,1.541602,TOOK
2,2019-07-22,0.603182,TOOK
3,2019-07-23,0.810937,TOOK
4,2019-07-24,0.848681,TOOK
...,...,...,...
341,2023-03-07,1.052465,TOOK
342,2023-03-08,1.000823,TOOK
343,2023-03-09,0.858354,TOOK
344,2023-03-10,1.634194,TOOK


In [None]:
Data_TOOK_Chla = TOOK_Chla[['datetime', 'observation', 'site_id']]
Data_TOOK_Chla = Data_TOOK_Chla.sort_values('datetime')
Data_TOOK_Chla = Data_TOOK_Chla.reset_index(drop=True)
TOOK_Chla = Data_TOOK_Chla[["observation"]].values.astype('float32')

In [None]:
X_train, y_train = create_dataset(TOOK_Chla, lb=14)
today =date.today()
model = WaterQmodel()

# Define the number of times to train the model
num_trainings = 2

# Initialize an DataFrame
results_df = pd.DataFrame()
for i in range(num_trainings):

    training(model,number_epochs=50)

    predictions = predict(TOOK_Chla , Data_TOOK_Chla ,types = 'chla' , site_id = 'TOOK')
    predictions.insert(3, 'parameters', i+1)
    results_i = pd.DataFrame(predictions)
    if i == 0:
        results_df = results_i
    else:
        results_df = pd.concat([results_df,results_i], ignore_index=True)

TOOK_Chla = results_df 
TOOK_Chla

Epoch 0: train RMSE 1.7115
Epoch 0: train RMSE 0.3183


Unnamed: 0,datetime,site_id,family,parameters,variable,prediction
0,2023-04-12,TOOK,ensemble,1,chla,2.261864
1,2023-04-13,TOOK,ensemble,1,chla,2.220779
2,2023-04-14,TOOK,ensemble,1,chla,2.262952
3,2023-04-15,TOOK,ensemble,1,chla,2.290476
4,2023-04-16,TOOK,ensemble,1,chla,2.298185
5,2023-04-17,TOOK,ensemble,1,chla,2.257091
6,2023-04-18,TOOK,ensemble,1,chla,2.303361
7,2023-04-19,TOOK,ensemble,1,chla,2.256515
8,2023-04-20,TOOK,ensemble,1,chla,2.53613
9,2023-04-21,TOOK,ensemble,1,chla,2.59733


In [None]:
# download as csv
TOOK = pd.concat([TOOK_Temp, TOOK_Oxy, TOOK_Chla], axis=0, ignore_index=True)
TOOK.to_csv('TOOK.csv', encoding = 'utf-8-sig') 
files.download('TOOK.csv')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [None]:
csv_files = ['BARC.csv', 'CRAM.csv', 'LIRO.csv', 'PRLA.csv', 'SUGG.csv', 'PRPO.csv', 'TOOK.csv']

# Create an empty DataFrame to hold the merged data
merged_data = pd.DataFrame()

# Loop through the CSV files and append them to the merged_data DataFrame
for file in csv_files:
    data = pd.read_csv(file)
    merged_data = merged_data.append(data)

# Write the merged data to a new CSV file
merged_data.to_csv('lake.csv', index=False)

files.download('lake.csv')

  merged_data = merged_data.append(data)
  merged_data = merged_data.append(data)
  merged_data = merged_data.append(data)
  merged_data = merged_data.append(data)
  merged_data = merged_data.append(data)
  merged_data = merged_data.append(data)
  merged_data = merged_data.append(data)


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>