In [76]:
# Imports
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.metrics import mean_squared_error, mean_absolute_error, mean_absolute_percentage_error
import optuna
import joblib

import torch
from torch.utils.data import TensorDataset, DataLoader

from darts import TimeSeries
from darts.models import  RandomForest, LinearRegressionModel, LightGBMModel, \
                        CatBoostModel,  BlockRNNModel, NBEATSModel, NHiTSModel, \
                        TCNModel, TFTModel
from darts.dataprocessing.transformers import Scaler

from darts.utils.likelihood_models import GaussianLikelihood

import warnings
warnings.filterwarnings('ignore')

# Configuration

In [5]:
prj_path = '../'
data_path = prj_path + "data/new_data/DH/squeezed/"
prj_path_opt= prj_path + "optimize_hyperparam/opt_results/"
output_process = prj_path + "data/new_data/DH/processed_data/"
output_featureselection = prj_path + "data/new_data/DH/feature_selection/"

In [74]:
all_cities = [
        'An Giang', 'BR Vũng Tàu', 'Bình Phước', 'Bình Thuận', 'Bình Định',
        'Bạc Liêu', 'Bắc Kạn', 'Bắc Giang', 'Cao Bằng', 'Cà Mau',
        'Cần Thơ', 'Gia Lai', 'Hà Giang', 'Hà Nội', 'Hà Tĩnh',
        'Hòa Bình','Hưng Yên', 'Hải Dương', 'Hải Phòng', 'Khánh Hòa', 'Kiên Giang',
        'Kon Tum', 'Lai Châu', 'Long An', 'Lào Cai', 'Lâm Đồng',
        'Lạng Sơn','Nam Định', 'Nghệ An', 'Ninh Bình', 'Ninh Thuận',
        'Phú Thọ', 'Phú Yên', 'Quảng Bình', 'Quảng Nam', 'Quảng Ngãi',
        'Quảng Ninh', 'Quảng Trị', 'Sóc Trăng', 'Sơn La', 'TT Huế',
        'Thanh Hóa', 'Thái Bình', 'Thái Nguyên', 'Tiền Giang', 'Trà Vinh',
        'Tuyên Quang', 'Tây Ninh', 'Vĩnh Phúc', 'Yên Bái', 'Điện Biên',
        'Đà Nẵng', 'Đắk Nông', 'Đắk Lắk', 'Đồng Tháp'
]
cities = ['An Giang', 'BR Vũng Tàu', 'Bình Phước', 'Bình Thuận', 'Bình Định']

In [63]:
# Set hyperparameters as args using the Configuration class
class Configuration():
    def __init__(self):
        # lấy bộ test dài 36 tháng = 3 năm
        self.test_size = 36
        # là nhìn vào dữ liệu trước 3 tháng và dự phóng        
        self.look_back = 3
        # dự phóng n-step trong 6 tháng
        self.n_predicted_period_months = 6
        self.n_features = 3
        self.seed = 42
        # mỗi phần tử x trong tập suppervise có độ lớn là 16 = 16 tháng
        self.batch_size = 16
        self.device = torch.device("cuda")
        self.epochs = 300
        #others
        self.labels = "Dengue_fever_rates"
        # Input param for Optimize Run
        self.ntry = 1
        self.njob = 1

args = Configuration()

In [None]:
# model1 = RandomForest(lags = args.look_back,
#                     lags_past_covariates = [-1,-2,-3],
#                     input_chunk_length = 5)
# model2 = RandomForest(lags = 4,
#                     lags_past_covariates = [-2,-3,-4])
model3 = RandomForest(lags = 5,
                    lags_past_covariates = [-3,-4,-5])
specific_data = pd.read_csv(output_process+city+'_train_preprocessed.csv', parse_dates=True, index_col= None, encoding = 'unicode_escape')
df_train, df_valid = split_data(specific_data, args.look_back,1)
x_train = TimeSeries.from_dataframe(df_train, "year_month", feature_list)
y_train = TimeSeries.from_dataframe(df_train, "year_month", labels)

x_test = TimeSeries.from_dataframe(df_valid, "year_month", feature_list)
y_test = TimeSeries.from_dataframe(df_valid, "year_month", labels)

model1.fit(y_train, past_covariates = x_train)
past_covariates_val = x_test
# prediction = model3.predict(len(y_test), past_covariates = past_covariates_val, num_samples=1)
prediction = model1.predict(len(y_test)-2+4, past_covariates = past_covariates_val, num_samples=1)
prediction

# Seeding

In [8]:
def seed_everything(seed: int):
    import random
    import numpy as np
    import torch
    
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)

seed_everything(args.seed)

# Supporting functions

In [13]:
def get_dict_all_city_data():
  cities_data = {}  
  for city in cities:
    city_result = pd.read_excel(prj_path+'data/new_data/DH/squeezed/squeezed_'+city+'.xlsx')  
    """Get all data from all city in 1997 - 2016""" 
    city_result = city_result.loc[city_result['year_month'] < '2017-1-1'] 
    cities_data[city] = city_result
  return cities_data

In [14]:
# Define data (pre-)processing functions
# modification
def get_city_data(city_name, dict_full_data):
    """Returns Diarrhoea rate and climate data""" 
    city_data = dict_full_data[city_name].drop(columns=['Diarrhoea_cases','Diarrhoea_rates', 'province',
                                                        'Influenza_rates','Influenza_cases',
                                                        'Dengue_fever_cases', 'year', 'month'], 
                                                                  axis=1, 
                                                                  inplace=False)    
    return city_data

def convert_to_stationary(city_data):
    """Subtracts previous value for all cols except disease rates"""
    for col_name in city_data.columns:
        if col_name != 'Diarrhoea_rates':
            try:
                city_data[col_name] = city_data[col_name] - city_data[col_name].shift()
            except:
                print(col_name)
    return city_data

def impute_missing_value(city_data):
    """
    Imputes 0 for first 12 months, 
    last year's value for months 12-24, 
    and minimum value of last two years for months 25+
    """
    for col in city_data.columns:
        for index in range(len(city_data[col])):
            if np.isnan(city_data[col].iloc[index]):
                if index < 12:
                    city_data[col].iloc[index] = 0
                elif index >= 12 and index <= 24:
                    city_data[col].iloc[index] = city_data[col].iloc[index - 12]
                else:
                    city_data[col].iloc[index] = min(city_data[col].iloc[index - 12], city_data[col].iloc[index - 24])
    return city_data

In [15]:
def clean_full_data(dict_full_data):
    climate_and_disease_feats = ['Total_Evaporation',
       'Total_Rainfall', 'Max_Daily_Rainfall', 'n_raining_days',
       'Average_temperature', 'Max_Average_Temperature',
       'Min_Average_Temperature', 'Max_Absolute_Temperature',
       'Min_Absolute_Temperature', 'Average_Humidity', 'Min_Humidity',
       'n_hours_sunshine', 'Dengue_fever_rates']
    for city in cities:
        city_data = get_city_data(city_name=city,dict_full_data = dict_full_data)
        city_data_features = city_data[climate_and_disease_feats]
        city_data_features = impute_missing_value(city_data_features)
        city_data_features = convert_to_stationary(city_data_features)
        city_data_features.dropna(inplace=True)
        city_data_features.loc[:, "year_month"] = city_data["year_month"]
        dict_full_data[city] = city_data_features
    return dict_full_data


In [48]:
def split_data(data, look_back, n_nextstep = args.n_predicted_period_months):
    """Splits data into train and test sets based on args (Configuration class)"""
    train = data[: -args.test_size]    
    test = data[-args.test_size - look_back-(n_nextstep - 1): ]
    return train, test

to_supervised()

In [None]:
def to_supervised(data,  d_out, d_in, features_list=[]):
    """
    Frames time-series as supervised learning dataset.
    
    Args:
      d_in: lookback window
      d_out: number of predicted months
      features_list: list of all features **where last col is the disease incidence**

    Returns:
      Numpy arrays of disease incidence (y) and other predictors (X)
    """
    X, y = list(), list()
    for index, _ in enumerate(data):
        in_end = index + d_in
        out_end = in_end + d_out
        if out_end <= len(data):
            if len(features_list) == 0 :
                X.append(data[index: in_end, :])
            else:
                X.append(data[index: in_end, features_list])
            y.append(data[out_end-1: out_end, -1])
    return np.array(X), np.array(y)

In [250]:
model1 = RandomForest(lags = 3,
                    lags_past_covariates = [-1,-2,-3])
# model2 = RandomForest(lags = 4,
#                     lags_past_covariates = [-2,-3,-4])
# model3 = RandomForest(lags = 5,
#                     lags_past_covariates = [-3,-4,-5])
specific_data = pd.read_csv(output_process+city+'_train_preprocessed.csv', parse_dates=True, index_col= None, encoding = 'unicode_escape')
df_train, df_valid = split_data(specific_data, args.look_back,1)
x_train = TimeSeries.from_dataframe(df_train, "year_month", feature_list)
y_train = TimeSeries.from_dataframe(df_train, "year_month", labels)

x_test = TimeSeries.from_dataframe(df_valid, "year_month", feature_list)
y_test = TimeSeries.from_dataframe(df_valid, "year_month", labels)

model1.fit(y_train, past_covariates = x_train)
past_covariates_val = x_test
# prediction = model3.predict(len(y_test), past_covariates = past_covariates_val, num_samples=1)
prediction = model1.predict(len(y_test)-args.look_back, past_covariates = past_covariates_val, num_samples=1)


ValueError: setting an array element with a sequence. The requested array would exceed the maximum number of dimension of 32.

In [252]:
np.array(prediction._xa).squeeze()

array([0.5503514 , 0.7943897 , 0.54379149, 0.52382343, 0.54897979,
       0.55659009, 0.54082459, 0.54310631, 0.53979221, 0.54582229,
       0.54382109, 0.54297397, 0.45361153, 0.52414221, 0.5254382 ,
       0.58274941, 0.53918262, 0.54095845, 0.54509208, 0.53932759,
       0.54133891, 0.55079078, 0.54549301, 0.55991313, 0.75319625,
       0.50700563, 0.46905401, 0.57559314, 0.54916772, 0.55059619,
       0.55106765, 0.55230924, 0.54698121, 0.54069252, 0.54006857,
       0.54600497])

In [266]:
def train_and_evaluate(df_train, df_eval, model, feature_list , labels, scaler):
  """
  $df: pandas.DataFrame object containing data for training and testing model:
  $model: darts model object
  $feature_list: Names of the features used as model input
  $label: the value the model will be trained to predict
  $scaler: scaler object. Note: the scaler will be fitted on training data and applied to test data
  $lags: how much to look back into the past to output prediction
  $split_index: the point at which to divide train and test_data

  """

  x_train = TimeSeries.from_dataframe(df_train, "year_month", feature_list)
  y_train = TimeSeries.from_dataframe(df_train, "year_month", labels)

  x_test = TimeSeries.from_dataframe(df_eval, "year_month", feature_list)
  y_test = TimeSeries.from_dataframe(df_eval, "year_month", labels)

  model.fit(y_train, past_covariates = x_train)

  prediction = model.predict(len(y_test)-args.look_back, past_covariates = x_test, num_samples=1)

  y_true = scaler.inverse_transform(df_eval.iloc[:,:-1])[:,[-1]].reshape(len(df_eval))[args.look_back:]

  df_eval[labels][args.look_back:] = np.array(prediction._xa).squeeze()
  y_pred = scaler.inverse_transform(df_eval.iloc[args.look_back:,:-1])[:,[-1]].reshape(len(prediction))

  # df_compare_test_predict = pd.DataFrame({'y_true':y_true, 'y_pred':y_pred})
  # df_compare_test_predict.plot()
  # plt.legend()
  # plt.show()

  mse = mean_squared_error(y_true, y_pred)
  mae = mean_absolute_error(y_true, y_pred)
  rmse = mse**0.5
  mape = mean_absolute_percentage_error(y_true, y_pred)
  print(f"mean_squared_error: {mse:.4f}")
  print(f"rmse: {rmse}")
  print(f"mape: {mape}")
  return model, y_true, y_pred, mse, mae, rmse, mape

In [267]:
def output_prediction_for_location(df_train, df_eval, model, location, feature_list, 
                                                labels, scaler):
    """train and generate prediction for a province
    df: DataFrame object containing features and label(s) for training model
    localtion: location_name
    feature_list: list of features used as model input,  must be among the column names of df
    labels: the values model will be trained to predict
    scaler: sklearn scaler object
    lags: how long into the past to look back when making prediction
    split_index: the point at which to divide data into the train and test subsets.
    """
    model, y_true, prediction_inverse, mse, mae, rmse, mape = train_and_evaluate(df_train, df_eval, model, feature_list, labels, scaler)
    df_prediction = pd.DataFrame({"Date": df_eval["year_month"][-len(prediction_inverse):],
                                  "Observed": y_true[-len(prediction_inverse):],
                                  "1-month": prediction_inverse})
    
    df_prediction["City"] = location
    df_prediction["RMSE_1-month"] = rmse
    df_prediction["MAE_1-month"] = mae
    df_prediction["MAPE_1-month"] = mape

    return mae

In [268]:
def getDataWithSelectedFeature(city, next_predicted_month):
  selected_feature = []
  df = pd.read_csv(output_featureselection+str(next_predicted_month)+"step_feature_selection_3_most.csv", encoding = 'unicode_escape')
  for row in range(len(df)):
    if (df["City"][row] == city):
      selected_feature.append(df["1st_Feature"][row])
      selected_feature.append(df["2nd_Feature"][row])
      selected_feature.append(df["3rd_Feature"][row])
  return selected_feature

# Objective and Suggest Hyperparams of Darts Models


In [269]:
def objective(model_name, trial, city, nstep):   
    specific_data = pd.read_csv(output_process+city+'_train_preprocessed.csv', parse_dates=True, index_col= None, encoding = 'unicode_escape')
    scaler = joblib.load(output_process+city+'_train_scalerMinMaxNorm.save') #ok

    df_train, df_valid = split_data(specific_data, args.look_back,nstep)
    selected_features = getDataWithSelectedFeature(city, nstep)

    lags_by_nstep = args.look_back + nstep - 1
    lags_past_covariates_by_nstep = [-lags_by_nstep+2,-lags_by_nstep+1,-lags_by_nstep] #Mảng này chứa ba giá trị tương ứng cho args.lookback 3



    if model_name == "RandomForest":
      random_state = trial.suggest_int('random_state', 0, 42)
      n_estimators = trial.suggest_int('n_estimators', 50, 200)
      max_depth = trial.suggest_int('max_depth', 1, 15)
      # Create the RandomForest model
      model = RandomForest(
                    lags = lags_by_nstep,
                    lags_past_covariates = lags_past_covariates_by_nstep,
                    output_chunk_length = 1,
                    n_estimators = n_estimators,
                    max_depth = max_depth,
                    random_state=random_state)
    elif model_name == 'TFTModel':
      # Define the hyperparameters to optimize
      random_state = trial.suggest_int('random_state', 0, 42)
      dropout = trial.suggest_uniform('dropout', 0.01, 0.8)
      n_epochs = trial.suggest_int('n_epochs', 50, 200)

      # Create the TFTModel model
      model = TFTModel(
                    input_chunk_length = lags_by_nstep,
                    output_chunk_length = 1,
                    add_relative_index = True,
                    dropout = dropout,
                    n_epochs = n_epochs ,
                    random_state=random_state)
    elif model_name == 'NHiTSModel':
      #suggest hyperparams
      random_state = trial.suggest_int('random_state', 0, 42)
      dropout = trial.suggest_uniform('dropout', 0.01, 0.80)
      n_epochs = trial.suggest_int('n_epochs', 100, 500, step=10)
      MaxPool1d = trial.suggest_categorical('MaxPool1d', [True, False])

      model = NHiTSModel(
                          input_chunk_length = lags_by_nstep,
                          output_chunk_length = 1,
                          MaxPool1d = MaxPool1d,
                          dropout = dropout,
                          n_epochs = n_epochs ,
                          random_state=random_state)
    elif model_name == 'LinearRegressionModel':
      random_state = trial.suggest_int('random_state', 0, 43)
      
      # Create the  model
      model = LinearRegressionModel(
                      lags = lags_by_nstep,
                      lags_past_covariates = lags_past_covariates_by_nstep,
                      output_chunk_length = 1,
                      random_state=random_state)
    elif model_name == "BlockRNNModel":
      #suggest hyperparams
      random_state = trial.suggest_int('random_state', 0, 1000)
      n_rnn_layers = trial.suggest_int('n_rnn_layers', 1, 3)
      dropout = trial.suggest_uniform('dropout', 0.1, 0.5)
      hidden_dim = trial.suggest_int('n_rnn_layers', 5, 20)
      n_epochs = trial.suggest_int('n_epochs', 50, 200)

      model = BlockRNNModel(
                          input_chunk_length = lags_by_nstep,
                          output_chunk_length = 1,
                          hidden_dim = hidden_dim,
                          n_rnn_layers = n_rnn_layers,
                          dropout = dropout,
                          n_epochs = n_epochs,
                          random_state=random_state)
      
    elif model_name == "CatBoostModel":
      #suggest hyperparams
      learning_rate = trial.suggest_float('learning_rate', 0.001, 0.1)
      n_estimators = trial.suggest_int('n_estimators', 50, 200)
      max_depth = trial.suggest_int('max_depth', 1, 15)
      random_state = trial.suggest_int('random_state', 0, 1000)
      likelihood = trial.suggest_categorical('likelihood', ['quantile'])
      quantiles =  trial.suggest_categorical('quantiles', [None, [0.1, 0.5, 0.9]])
      bagging_temperature = trial.suggest_float('bagging_temperature', 0.01, 100.0)
      border_count = trial.suggest_int('border_count', 1, 255)
      l2_leaf_reg = trial.suggest_float('l2_leaf_reg', 0.1, 10)
      random_strength = trial.suggest_float('random_strength', 0.1, 10)
      model = CatBoostModel(
                            lags=lags_by_nstep,
                            lags_past_covariates=lags_past_covariates_by_nstep, 
                            learning_rate=learning_rate,
                            n_estimators=n_estimators,
                            max_depth=max_depth, 
                            output_chunk_length = 1,
                            likelihood = likelihood,
                            quantiles = quantiles,
                            bagging_temperature = bagging_temperature,
                            border_count = border_count,
                            l2_leaf_reg = l2_leaf_reg,
                            random_strength = random_strength,
                            random_state=random_state)
    
    elif model_name == "NBEATSModel":
      random_state = trial.suggest_int('random_state', 0, 42)
      dropout = trial.suggest_uniform('dropout', 0.01, 0.80)
      n_epochs = trial.suggest_int('n_epochs', 50, 200)

      pl_trainer_kwargs = {
              "accelerator": "gpu",
              "devices": -1,
              "auto_select_gpus": True,
          }
      model = NBEATSModel(
                            input_chunk_length = lags_by_nstep,
                            output_chunk_length = 1,
                            dropout = dropout,
                            n_epochs = n_epochs ,
                            pl_trainer_kwargs = pl_trainer_kwargs,
                            random_state=random_state)
  
    elif model_name == "TCNModel":
      params = {
        'kernel_size': trial.suggest_int("kernel_size", 2, 5),
        'num_filters': trial.suggest_int("num_filters", 1, 5),
        'weight_norm': trial.suggest_categorical("weight_norm", [False, True]),
        'dilation_base': trial.suggest_int("dilation_base", 2, 4),
        'dropout': trial.suggest_float("dropout", 0.0, 0.4),
        'learning_rate': trial.suggest_float("learning_rate", 5e-5, 1e-3, log=True),
        'include_year': trial.suggest_categorical("year", [False, True]),
        'n_epochs': trial.suggest_int("n_epochs", 100, 300),
      }

      # select input and output chunk lengths

      params['input_chunk_length'] = lags_by_nstep
      params['output_chunk_length'] = 1  
   

      # optionally also add the (scaled) year value as a past covariate
      if params['include_year']:
          encoders = {"datetime_attribute": {"past": ["year"]},
                      "transformer": Scaler()}
      else:
          encoders = None
      params['encoders'] = encoders

      pl_trainer_kwargs = {
            "accelerator": "gpu",
            "devices": -1,
            "auto_select_gpus": True,
        }
     

      param = params
      model = TCNModel(
          input_chunk_length=param['input_chunk_length'],
          output_chunk_length=param['output_chunk_length'],
          batch_size=16,
          n_epochs=param['n_epochs'],
          nr_epochs_val_period=1,
          kernel_size=param['kernel_size'],
          num_filters=param['num_filters'],
          weight_norm=param['weight_norm'],
          dilation_base=param['dilation_base'],
          dropout=param['dropout'],
          optimizer_kwargs={"lr": param['learning_rate']},
          add_encoders=param['encoders'],
          likelihood=GaussianLikelihood(),
          pl_trainer_kwargs=pl_trainer_kwargs,
          model_name="tcn_model",
          force_reset=True,
          save_checkpoints=True,
      )
    elif model_name == "LightGBMModel":
      params = {
        "lags": lags_by_nstep,
        "lags_past_covariates": lags_past_covariates_by_nstep,
        "random_state": trial.suggest_int("random_state", 0, 999),
        "multi_models": trial.suggest_categorical("multi_models", [True, False]),
        'num_leaves': trial.suggest_int('num_leaves', 2, 256),
        'learning_rate': trial.suggest_loguniform('learning_rate', 0.005, 0.5),
        'feature_fraction': trial.suggest_uniform('feature_fraction', 0.1, 1.0),
        'bagging_fraction': trial.suggest_uniform('bagging_fraction', 0.1, 1.0),
        'min_child_samples': trial.suggest_int('min_child_samples', 5, 100),
        'lambda_l1': trial.suggest_loguniform('lambda_l1', 1e-8, 10.0),
        'lambda_l2': trial.suggest_loguniform('lambda_l2', 1e-8, 10.0),
        'verbose': -1,
        'likelihood' : trial.suggest_categorical("likelihood", ["quantile"])
      }

      param = params
      model = LightGBMModel(
          lags = param['lags'],
          lags_past_covariates = param['lags_past_covariates'],
          output_chunk_length = 1,
          random_state = param['random_state'],
          multi_models = param['multi_models'],
          likelihood = param['likelihood'],
          num_leaves = param['num_leaves'],
          learning_rate = param['learning_rate'],
          feature_fraction = param['feature_fraction'],
          bagging_fraction = param['bagging_fraction'],
          min_child_samples = param['min_child_samples'],
          lambda_l1 = param['lambda_l1'],
          verbose = param['verbose']
      )
    
    mae_error = output_prediction_for_location(df_train, df_valid, model, location=city, feature_list=selected_features,
                                                labels=args.labels, scaler=scaler)

    return mae_error

# Main run optimize and save

In [271]:
#########################
# Main Cell for optimize
#########################

model_name_list = [
     "RandomForest",
     "LinearRegressionModel",
     "LightGBMModel",
     "CatBoostModel",
     "BlockRNNModel",
     "NBEATSModel",
     "NHiTSModel",
     "TCNModel",
     "TFTModel"
]

# Lưu thông tin traceback study và error city trong quá trình optimize
l_study_city ={}
l_errCity =[]

if __name__ == '__main__':  
  nstep = 1
  for model_name in model_name_list: 
    print("🦁🦁🦁🦁🦁🦁🦁🦁🦁🦁🦁: ",model_name)
    best_param = pd.DataFrame()
    for city_index in range(len(cities)):
      # Use Tree-structured Parzen Estimator sampler to minimise RMSE
      sampler = optuna.samplers.TPESampler()
      study = optuna.create_study(sampler=sampler, direction='minimize', study_name = model_name)

      # truyền multiple param vào trong biến trial
      print("🦁 1")
      obj_func = lambda trial: objective(model_name, trial, cities[city_index], nstep = nstep)
      print("🦁 2")
      try:
        # Optimise over 100 trials
        print("🦁 3")
        study.optimize(obj_func, n_trials=args.ntry, n_jobs=args.njob)
        print("🦁 4")

        # Print results
        print("Study statistics for : ")
        print("  Number of finished trials: ", len(study.trials))
    
        print("🦁 4")
        print("Best trial of city: ",cities[city_index])

        best_trial = study.best_trial
        print("  Value: ", best_trial.value)   
        print("🦁 6")
        # lưu best param vào trong biến toàn cục

        if model_name == "LinearRegressionModel": 
          one_city_param = pd.DataFrame({
                              'City':  cities[city_index],
                              'Alg_name': 'LinearRegressionModel',
                              'Best_value': best_trial.value,
                              'n_try_opt': args.ntry,
                              'lags' : best_trial.params['output_chunk_length'],
                              'lags_past_covariates': 3,
                              'output_chunk_length': best_trial.params['output_chunk_length'],
                              'random_state':best_trial.params['random_state'],
                              }, index=[0])


        elif model_name == "LightGBMModel":
          one_city_param = pd.DataFrame({
                              'City':  cities[city_index],
                              'Alg_name': 'LightGBMModel',
                              'Best_value': best_trial.value,
                              'n_try_opt': args.ntry,
                              'lags': best_trial.params['lags'],
                              'lags_past_covariates': best_trial.params['lags_past_covariates'],
                              'multi_models': best_trial.params['multi_models'],
                              'num_leaves': best_trial.params['num_leaves'], 
                              'feature_fraction': best_trial.params['feature_fraction'], 
                              'min_child_samples': best_trial.params['min_child_samples'], 
                              'lambda_l1': best_trial.params['lambda_l1'], 
                              'lambda_l2': best_trial.params['lambda_l2'], 
                              'likelihood': best_trial.params['likelihood'], 
                              'learning_rate': best_trial.params['learning_rate']}, index=[0])

           
        elif model_name == "CatBoostModel":
          one_city_param = pd.DataFrame({
                              'City':  cities[city_index],
                              'Alg_name': 'CatBoost',
                              'Best_value': best_trial.value,
                              'n_try_opt': args.ntry,
                              'lags' : 3,
                              'lags_past_covariates': 3,
                              'output_chunk_length': best_trial.params['output_chunk_length'],
                              'likelihood': best_trial.params['likelihood'],
                              'learning_rate': best_trial.params['learning_rate'],
                              'n_estimators': best_trial.params['n_estimators'],
                              'max_depth': best_trial.params['max_depth'],
                              'bagging_temperature': best_trial.params['bagging_temperature'],
                              'l2_leaf_reg': best_trial.params['l2_leaf_reg'],
                              'random_strength':best_trial.params['random_strength'],
                              }, index=[0])


          
        elif model_name == "NHiTSModel":
          one_city_param = pd.DataFrame({
                              'City':  cities[city_index],
                              'Alg_name': 'N-HiTS',
                              'Best_value': best_trial.value,
                              'n_try_opt': args.ntry,
                              'input_chunk_length' : 3,
                              'output_chunk_length' : 1,
                              'MaxPool1d' : best_trial.params['MaxPool1d'],
                              'dropout' : best_trial.params['dropout'],
                              'n_epochs' : best_trial.params['n_epochs'],
                              'random_state' : best_trial.params['random_state'],
                              }, index=[0])


        elif model_name == "TCNModel":
          one_city_param = pd.DataFrame({
                              'City':  cities[city_index],
                              'Alg_name': 'TCNModel',
                              'Best_value': best_trial.value,
                              'n_try_opt': args.ntry,
                              'lags' : best_trial.params['input_chunk_length'],
                              'input_chunk_length': best_trial.params['input_chunk_length'],
                              'output_chunk_length': best_trial.params['output_chunk_length'],
                              'n_epochs':best_trial.params['n_epochs'],
                              'num_filters':best_trial.params['num_filters'],
                              'weight_norm':best_trial.params['weight_norm'],
                              'dilation_base':best_trial.params['dilation_base'],
                              'dropout':best_trial.params['dropout'],
                              'learning_rate':best_trial.params['learning_rate'],
                              'year':best_trial.params['year'],
                              }, index=[0])

          
        elif model_name == "NBEATSModel":
          one_city_param = pd.DataFrame({
                              'City':  cities[city_index],
                              'Alg_name': 'NBeatsModel',
                              'Best_value': best_trial.value,
                              'n_try_opt': args.ntry,
                              'output_chunk_length': 3,
                              'input_chunk_length': 1,
                              'n_epochs':best_trial.params['n_epochs'],
                              'dropout':best_trial.params['dropout'],
                              'random_state':best_trial.params['random_state'],
                              }, index=[0])    

        elif model_name == "TFTModel":
          one_city_param = pd.DataFrame({
                              'City':  cities[city_index],
                              'Alg_name': 'TFTModel',
                              'Best_value': best_trial.value,
                              'n_try_opt': args.ntry,
                              'output_chunk_length': 3,
                              'input_chunk_length': 1,
                              'add_relative_index': True,
                              'random_state':best_trial.params['random_state'],
                              'n_epochs':best_trial.params['n_epochs'],
                              'dropout':best_trial.params['dropout']
                              }, index=[0])

        elif model_name == "RandomForest":
          one_city_param = pd.DataFrame({
                              'City':  cities[city_index],
                              'Alg_name': 'RandomForest',
                              'Best_value': best_trial.value,
                              'n_try_opt': args.ntry,
                              'lags' : best_trial.params['output_chunk_length'],
                              'lags_past_covariates': 3,
                              'output_chunk_length': best_trial.params['output_chunk_length'],
                              'n_estimators': best_trial.params['n_estimators'],
                              'max_depth': best_trial.params['max_depth'],
                              'random_state':best_trial.params['random_state'],
                              }, index=[0])

           
        elif model_name == "BlockRNNModel":
          one_city_param = pd.DataFrame({
                              'City':  cities[city_index],
                              'Alg_name': 'BlockRNNModel',
                              'Best_value': best_trial.value,
                              'n_try_opt': args.ntry,
                              'input_chunk_length': 3,
                              'output_chunk_length': 1,
                              'random_state':best_trial.params['random_state'],
                              'n_epochs':best_trial.params['n_epochs'],
                              'hidden_dim': best_trial.params['hidden_dim'],
                              'n_rnn_layers': best_trial.params['n_rnn_layers'],
                              'dropout':best_trial.params['dropout']
                              }, index=[0])


        file_path = '261023_denguefever_opt_hyperparam_'+ model_name + '_'+nstep+'-nstep.xlsx'
        if(os.path.isfile(file_path)):
            print("🍉🍉🍉🍉🍉🍉🍉🍉🍉🍉")
            with pd.ExcelWriter(file_path,mode="a",engine="openpyxl",if_sheet_exists="overlay") as writer:
                one_city_param.to_excel(writer, header=None, startrow=city_index+2,index=False)
        else:
            with pd.ExcelWriter(file_path,engine="openpyxl") as writer:
                one_city_param.to_excel(writer, startrow=city_index+1,index=False)
      except:# có error thì lưu vào l_errCity để check lại sau 
        l_errCity.append(cities[city_index])

[I 2023-10-26 16:04:57,729] A new study created in memory with name: RandomForest


🦁🦁🦁🦁🦁🦁🦁🦁🦁🦁🦁:  RandomForest
🦁 1
🦁 2
🦁 3


[I 2023-10-26 16:04:58,086] Trial 0 finished with value: 5.481145948175719 and parameters: {'random_state': 39, 'n_estimators': 199, 'max_depth': 10}. Best is trial 0 with value: 5.481145948175719.
[I 2023-10-26 16:04:58,086] A new study created in memory with name: RandomForest


mean_squared_error: 88.5245
rmse: 9.408747750316342
mape: 101066.75640689167
🦁 4
Study statistics for : 
  Number of finished trials:  1
🦁 4
Best trial of city:  An Giang
  Value:  5.481145948175719
🦁 6
🦁 1
🦁 2
🦁 3


[I 2023-10-26 16:04:58,339] Trial 0 finished with value: 13.62379197559468 and parameters: {'random_state': 21, 'n_estimators': 198, 'max_depth': 12}. Best is trial 0 with value: 13.62379197559468.
[I 2023-10-26 16:04:58,340] A new study created in memory with name: RandomForest
[I 2023-10-26 16:04:58,516] Trial 0 finished with value: 11.46031320098124 and parameters: {'random_state': 30, 'n_estimators': 160, 'max_depth': 4}. Best is trial 0 with value: 11.46031320098124.
[I 2023-10-26 16:04:58,516] A new study created in memory with name: RandomForest


mean_squared_error: 299.6381
rmse: 17.310057511410204
mape: 12.815059325701913
🦁 4
Study statistics for : 
  Number of finished trials:  1
🦁 4
Best trial of city:  BR Vũng Tàu
  Value:  13.62379197559468
🦁 6
🦁 1
🦁 2
🦁 3
mean_squared_error: 253.7539
rmse: 15.929655927551336
mape: 2.084366909131627
🦁 4
Study statistics for : 
  Number of finished trials:  1
🦁 4
Best trial of city:  Bình Phước
  Value:  11.46031320098124
🦁 6
🦁 1
🦁 2
🦁 3


[I 2023-10-26 16:04:58,700] Trial 0 finished with value: 3.451767034831164 and parameters: {'random_state': 18, 'n_estimators': 133, 'max_depth': 15}. Best is trial 0 with value: 3.451767034831164.
[I 2023-10-26 16:04:58,700] A new study created in memory with name: RandomForest
[I 2023-10-26 16:04:58,897] Trial 0 finished with value: 3.3625040016424865 and parameters: {'random_state': 35, 'n_estimators': 148, 'max_depth': 2}. Best is trial 0 with value: 3.3625040016424865.
[I 2023-10-26 16:04:58,898] A new study created in memory with name: LinearRegressionModel


mean_squared_error: 25.7075
rmse: 5.070257752102603
mape: 2.463305359888167
🦁 4
Study statistics for : 
  Number of finished trials:  1
🦁 4
Best trial of city:  Bình Thuận
  Value:  3.451767034831164
🦁 6
🦁 1
🦁 2
🦁 3
mean_squared_error: 28.9705
rmse: 5.3824206706847
mape: 10282.48690668497
🦁 4
Study statistics for : 
  Number of finished trials:  1
🦁 4
Best trial of city:  Bình Định
  Value:  3.3625040016424865
🦁 6
🦁🦁🦁🦁🦁🦁🦁🦁🦁🦁🦁:  LinearRegressionModel
🦁 1
🦁 2
🦁 3


[W 2023-10-26 16:04:58,913] Trial 0 failed with parameters: {'random_state': 16} because of the following error: ValueError("Input X contains infinity or a value too large for dtype('float64').").
Traceback (most recent call last):
  File "/Users/trinhtruc/Library/Python/3.9/lib/python/site-packages/optuna/study/_optimize.py", line 200, in _run_trial
    value_or_values = func(trial)
  File "/var/folders/r_/lbw3rw192wl9sx9vtc1c2_xc0000gn/T/ipykernel_36494/2087632013.py", line 33, in <lambda>
    obj_func = lambda trial: objective(model_name, trial, cities[city_index], nstep = nstep)
  File "/var/folders/r_/lbw3rw192wl9sx9vtc1c2_xc0000gn/T/ipykernel_36494/719169775.py", line 211, in objective
    mae_error = output_prediction_for_location(df_train, df_valid, model, location=city, feature_list=selected_features,
  File "/var/folders/r_/lbw3rw192wl9sx9vtc1c2_xc0000gn/T/ipykernel_36494/1283319320.py", line 12, in output_prediction_for_location
    model, y_true, prediction_inverse, mse, ma

🦁 1
🦁 2
🦁 3
mean_squared_error: 130.3747
rmse: 11.418172286162022
mape: 0.95713966553233
🦁 4
Study statistics for : 
  Number of finished trials:  1
🦁 4
Best trial of city:  BR Vũng Tàu
  Value:  7.790132031540103
🦁 6
🦁 1
🦁 2
🦁 3
mean_squared_error: 248.3009
rmse: 15.757566771422649
mape: 1.014835580385045
🦁 4
Study statistics for : 
  Number of finished trials:  1
🦁 4
Best trial of city:  Bình Phước
  Value:  11.026946160116264
🦁 6
🦁 1
🦁 2
🦁 3
mean_squared_error: 20.5256
rmse: 4.530513352751685
mape: 0.9967861034495081
🦁 4
Study statistics for : 
  Number of finished trials:  1
🦁 4
Best trial of city:  Bình Thuận
  Value:  3.0097119332291222
🦁 6
🦁 1
🦁 2
🦁 3
mean_squared_error: 28.4965
rmse: 5.338211788213013
mape: 1063.295144598981
🦁 4
Study statistics for : 
  Number of finished trials:  1
🦁 4
Best trial of city:  Bình Định
  Value:  3.243043277998413
🦁 6
🦁🦁🦁🦁🦁🦁🦁🦁🦁🦁🦁:  LightGBMModel
🦁 1
🦁 2
🦁 3
🦁 1
🦁 2
🦁 3
🦁 1
🦁 2
🦁 3
🦁 1
🦁 2
🦁 3
🦁 1
🦁 2
🦁 3
🦁🦁🦁🦁🦁🦁🦁🦁🦁🦁🦁:  CatBoostModel
🦁 1
🦁 2
🦁 3


[I 2023-10-26 16:04:59,135] Trial 0 finished with value: 3.547325498416681 and parameters: {'learning_rate': 0.007130152982699675, 'n_estimators': 84, 'max_depth': 3, 'random_state': 792, 'likelihood': 'quantile', 'quantiles': [0.1, 0.5, 0.9], 'bagging_temperature': 24.421724135286755, 'border_count': 220, 'l2_leaf_reg': 1.8659657808399648, 'random_strength': 3.7531903974349885}. Best is trial 0 with value: 3.547325498416681.
[I 2023-10-26 16:04:59,136] A new study created in memory with name: CatBoostModel


mean_squared_error: 20.8554
rmse: 4.56676944758353
mape: 6750.120081992567
🦁 4
Study statistics for : 
  Number of finished trials:  1
🦁 4
Best trial of city:  An Giang
  Value:  3.547325498416681
🦁 6
🦁 1
🦁 2
🦁 3


[I 2023-10-26 16:04:59,381] Trial 0 finished with value: 7.695300799655218 and parameters: {'learning_rate': 0.010760117280047134, 'n_estimators': 104, 'max_depth': 8, 'random_state': 168, 'likelihood': 'quantile', 'quantiles': [0.1, 0.5, 0.9], 'bagging_temperature': 57.819153395967085, 'border_count': 201, 'l2_leaf_reg': 0.39133464552737274, 'random_strength': 4.275903603107939}. Best is trial 0 with value: 7.695300799655218.
[I 2023-10-26 16:04:59,382] A new study created in memory with name: CatBoostModel


mean_squared_error: 127.5239
rmse: 11.292649576813576
mape: 1.4138755093168855
🦁 4
Study statistics for : 
  Number of finished trials:  1
🦁 4
Best trial of city:  BR Vũng Tàu
  Value:  7.695300799655218
🦁 6
🦁 1
🦁 2
🦁 3


[I 2023-10-26 16:04:59,711] Trial 0 finished with value: 11.215490335731008 and parameters: {'learning_rate': 0.009465599162139009, 'n_estimators': 138, 'max_depth': 4, 'random_state': 830, 'likelihood': 'quantile', 'quantiles': None, 'bagging_temperature': 82.94154958591312, 'border_count': 175, 'l2_leaf_reg': 0.3214675080614175, 'random_strength': 3.7335529868700665}. Best is trial 0 with value: 11.215490335731008.
[I 2023-10-26 16:04:59,711] A new study created in memory with name: CatBoostModel
[I 2023-10-26 16:04:59,847] Trial 0 finished with value: 3.0773434052949944 and parameters: {'learning_rate': 0.08284795678062556, 'n_estimators': 192, 'max_depth': 1, 'random_state': 387, 'likelihood': 'quantile', 'quantiles': None, 'bagging_temperature': 91.25465546761718, 'border_count': 2, 'l2_leaf_reg': 2.9210292216914833, 'random_strength': 9.253412053174813}. Best is trial 0 with value: 3.0773434052949944.
[I 2023-10-26 16:04:59,847] A new study created in memory with name: CatBoostMo

mean_squared_error: 248.6194
rmse: 15.76766914787379
mape: 1.6675578317964488
🦁 4
Study statistics for : 
  Number of finished trials:  1
🦁 4
Best trial of city:  Bình Phước
  Value:  11.215490335731008
🦁 6
🦁 1
🦁 2
🦁 3
mean_squared_error: 20.6879
rmse: 4.548396807136015
mape: 1.3243675770466223
🦁 4
Study statistics for : 
  Number of finished trials:  1
🦁 4
Best trial of city:  Bình Thuận
  Value:  3.0773434052949944
🦁 6
🦁 1
🦁 2
🦁 3


[I 2023-10-26 16:05:00,437] Trial 0 finished with value: 4.493529693204887 and parameters: {'learning_rate': 0.06616008520434953, 'n_estimators': 113, 'max_depth': 10, 'random_state': 796, 'likelihood': 'quantile', 'quantiles': [0.1, 0.5, 0.9], 'bagging_temperature': 28.927172173613798, 'border_count': 225, 'l2_leaf_reg': 2.6685231627912898, 'random_strength': 5.403838370035645}. Best is trial 0 with value: 4.493529693204887.
[I 2023-10-26 16:05:00,438] A new study created in memory with name: BlockRNNModel
GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
[W 2023-10-26 16:05:00,492] Trial 0 failed with parameters: {'random_state': 530, 'n_rnn_layers': 1, 'dropout': 0.11076941152779357, 'n_epochs': 60} because of the following error: TypeError("Cannot convert a MPS Tensor to float64 dtype as the MPS framework doesn't support float64. Please use float32 instead.").
Traceback (most recent

mean_squared_error: 42.6576
rmse: 6.531277207875631
mape: 10492.00274510713
🦁 4
Study statistics for : 
  Number of finished trials:  1
🦁 4
Best trial of city:  Bình Định
  Value:  4.493529693204887
🦁 6
🦁🦁🦁🦁🦁🦁🦁🦁🦁🦁🦁:  BlockRNNModel
🦁 1
🦁 2
🦁 3
🦁 1
🦁 2
🦁 3
🦁 1
🦁 2
🦁 3
🦁 1
🦁 2
🦁 3


GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
[W 2023-10-26 16:05:00,655] Trial 0 failed with parameters: {'random_state': 402, 'n_rnn_layers': 1, 'dropout': 0.4549471692403151, 'n_epochs': 94} because of the following error: TypeError("Cannot convert a MPS Tensor to float64 dtype as the MPS framework doesn't support float64. Please use float32 instead.").
Traceback (most recent call last):
  File "/Users/trinhtruc/Library/Python/3.9/lib/python/site-packages/optuna/study/_optimize.py", line 200, in _run_trial
    value_or_values = func(trial)
  File "/var/folders/r_/lbw3rw192wl9sx9vtc1c2_xc0000gn/T/ipykernel_36494/2087632013.py", line 33, in <lambda>
    obj_func = lambda trial: objective(model_name, trial, cities[city_index], nstep = nstep)
  File "/var/folders/r_/lbw3rw192wl9sx9vtc1c2_xc0000gn/T/ipykernel_36494/719169775.py", line 211, in objective
    mae_error = output_predictio

🦁 1
🦁 2
🦁 3
🦁🦁🦁🦁🦁🦁🦁🦁🦁🦁🦁:  NBEATSModel
🦁 1
🦁 2
🦁 3
🦁 1
🦁 2
🦁 3
🦁 1
🦁 2
🦁 3
🦁 1
🦁 2
🦁 3


[W 2023-10-26 16:05:00,869] Trial 0 failed with parameters: {'random_state': 14, 'dropout': 0.2553464742828824, 'n_epochs': 156} because of the following error: TypeError("__init__() got an unexpected keyword argument 'auto_select_gpus'").
Traceback (most recent call last):
  File "/Users/trinhtruc/Library/Python/3.9/lib/python/site-packages/optuna/study/_optimize.py", line 200, in _run_trial
    value_or_values = func(trial)
  File "/var/folders/r_/lbw3rw192wl9sx9vtc1c2_xc0000gn/T/ipykernel_36494/2087632013.py", line 33, in <lambda>
    obj_func = lambda trial: objective(model_name, trial, cities[city_index], nstep = nstep)
  File "/var/folders/r_/lbw3rw192wl9sx9vtc1c2_xc0000gn/T/ipykernel_36494/719169775.py", line 211, in objective
    mae_error = output_prediction_for_location(df_train, df_valid, model, location=city, feature_list=selected_features,
  File "/var/folders/r_/lbw3rw192wl9sx9vtc1c2_xc0000gn/T/ipykernel_36494/1283319320.py", line 12, in output_prediction_for_location
   

🦁 1
🦁 2
🦁 3
🦁🦁🦁🦁🦁🦁🦁🦁🦁🦁🦁:  NHiTSModel
🦁 1
🦁 2
🦁 3
🦁 1
🦁 2
🦁 3
🦁 1
🦁 2
🦁 3
🦁 1
🦁 2
🦁 3
🦁 1
🦁 2
🦁 3
🦁🦁🦁🦁🦁🦁🦁🦁🦁🦁🦁:  TCNModel
🦁 1
🦁 2
🦁 3
🦁 1
🦁 2
🦁 3


[W 2023-10-26 16:05:01,089] Trial 0 failed with parameters: {'kernel_size': 2, 'num_filters': 3, 'weight_norm': False, 'dilation_base': 3, 'dropout': 0.39742912012605147, 'learning_rate': 0.00016712828180728985, 'year': True, 'n_epochs': 196} because of the following error: TypeError("__init__() got an unexpected keyword argument 'auto_select_gpus'").
Traceback (most recent call last):
  File "/Users/trinhtruc/Library/Python/3.9/lib/python/site-packages/optuna/study/_optimize.py", line 200, in _run_trial
    value_or_values = func(trial)
  File "/var/folders/r_/lbw3rw192wl9sx9vtc1c2_xc0000gn/T/ipykernel_36494/2087632013.py", line 33, in <lambda>
    obj_func = lambda trial: objective(model_name, trial, cities[city_index], nstep = nstep)
  File "/var/folders/r_/lbw3rw192wl9sx9vtc1c2_xc0000gn/T/ipykernel_36494/719169775.py", line 211, in objective
    mae_error = output_prediction_for_location(df_train, df_valid, model, location=city, feature_list=selected_features,
  File "/var/folders/

🦁 1
🦁 2
🦁 3
🦁 1
🦁 2
🦁 3
🦁 1
🦁 2
🦁 3
🦁🦁🦁🦁🦁🦁🦁🦁🦁🦁🦁:  TFTModel
🦁 1
🦁 2
🦁 3
🦁 1
🦁 2
🦁 3
🦁 1
🦁 2
🦁 3
🦁 1
🦁 2
🦁 3
🦁 1
🦁 2
🦁 3


In [99]:
cities

['An Giang', 'BR Vũng Tàu', 'Bình Phước', 'Bình Thuận', 'Bình Định']

In [98]:
file_path = 'diarrhoea_opt_hyperparam_RNN_v5.xlsx'
for city_index in range(len(cities)):
        one_city_param = pd.DataFrame({
                                    'City': cities[city_index],
                                    'Alg_name': 'BlockRNNModel',
                                    'Best_value': 2,
                                    'n_try_opt': 2,
                                    'input_chunk_length': 2,
                                    'output_chunk_length': 1,
                                    'random_state': 3,
                                    'n_epochs': 3,
                                    'hidden_dim': 3,
                                    'n_rnn_layers': 3,
                                    'dropout': 3
                                    }, index=[city_index])
        
        if(os.path.isfile(file_path)):
            with pd.ExcelWriter(file_path,mode="a",engine="openpyxl",if_sheet_exists="overlay") as writer:
                one_city_param.to_excel(writer, header=None, startrow=city_index+2,index=False)
        else:
            with pd.ExcelWriter(file_path,engine="openpyxl") as writer:
                one_city_param.to_excel(writer, startrow=city_index+1,index=False)

In [82]:
pip show pandas

Name: pandas
Version: 2.1.0
Summary: Powerful data structures for data analysis, time series, and statistics
Home-page: https://pandas.pydata.org
Author: 
Author-email: The Pandas Development Team <pandas-dev@python.org>
License: BSD 3-Clause License
        
        Copyright (c) 2008-2011, AQR Capital Management, LLC, Lambda Foundry, Inc. and PyData Development Team
        All rights reserved.
        
        Copyright (c) 2011-2023, Open source contributors.
        
        Redistribution and use in source and binary forms, with or without
        modification, are permitted provided that the following conditions are met:
        
        * Redistributions of source code must retain the above copyright notice, this
          list of conditions and the following disclaimer.
        
        * Redistributions in binary form must reproduce the above copyright notice,
          this list of conditions and the following disclaimer in the documentation
          and/or other materials 

In [None]:
# import requests

# def send_to_telegram(message):

#     apiToken = '5908735099:AAGVSLrW62aXPBP-GrMvxoVgMsuJxXJpP1Q'
#     chatID = '@ptn_announcement'
#     apiURL = f'https://api.telegram.org/bot{apiToken}/sendMessage'

#     try:
#         response = requests.post(apiURL, json={'chat_id': chatID, 'text': message})
#         print(response.text)
#     except Exception as e:
#         print(e)

# send_to_telegram("Server Chạy Xong optimize" )

In [143]:
from darts.datasets import WeatherDataset
from darts.models import RandomForest
series = WeatherDataset().load()
# predicting atmospheric pressure
series

In [150]:
from darts.datasets import WeatherDataset
from darts.models import RegressionModel
from sklearn.linear_model import Ridge
series = WeatherDataset().load()
# predicting atmospheric pressure
target = series['p (mbar)'][:100]
# optionally, use past observed rainfall (pretending to be unknown beyond index 100)
past_cov = series['rain (mm)'][:100]
# optionally, use future temperatures (pretending this component is a forecast)
future_cov = series['T (degC)'][:106]
# wrap around the sklearn Ridge model
model = RegressionModel(
    model=Ridge(),
    lags=12,
    lags_past_covariates=4,
    lags_future_covariates=(0,6),
    output_chunk_length=6
)
model.fit(target, past_covariates=past_cov, future_covariates=future_cov)
pred = model.predict(6)
pred.values()

array([[1005.73340676],
       [1005.71159051],
       [1005.7322616 ],
       [1005.76314504],
       [1005.82204348],
       [1005.89100967]])

In [153]:
target = series['p (mbar)'][:100]
# optionally, use past observed rainfall (pretending to be unknown beyond index 100)
past_cov = series['rain (mm)'][:100]
# optionally, use future temperatures (pretending this component is a forecast)
future_cov = series['T (degC)'][:106]
# random forest with 200 trees trained with MAE
model = RandomForest(
    lags=2,
    lags_past_covariates=2,
    lags_future_covariates=[0,1,2,3,4,5],
    output_chunk_length=6,
    n_estimators=200,
    criterion="absolute_error",
)
model.fit(target, past_covariates=past_cov, future_covariates=future_cov)
pred = model.predict(1)
pred.values()

array([[1006.4309]])