In [1]:
import os
import sys

In [2]:
sys.path.insert(0, os.path.abspath(
    os.path.join(os.path.dirname('__file__'), '..')))
from src.encoding.encoders import *
from src.encoding.tools import create_encoding_pipeline
import pandas as pd
import src.features as ft
from src.datasets.base_tabular_dataset import BaseTabularDataset
import datetime as dt
import logging
import pathlib
pd.set_option('display.max_columns', None)

In [3]:
logger = logging.getLogger()
logger.setLevel(logging.INFO)
logging.basicConfig(level=logging.INFO, encoding="utf-8",
                    format="%(name)s %(asctime)s: %(levelname)s: %(message)s", handlers=[logging.StreamHandler()])

In [4]:
root_dir = os.path.abspath(os.path.join(os.path.dirname('__file__'), '..'))
root_dir = pathlib.Path(root_dir)

In [5]:
root_dir

PosixPath('/home/maxime/Documents/WORKSPACES/forecasting_models')

In [6]:
fetch_config = {
    "data_start": '01-01-2017',
    "data_stop": '31-12-2023',
    'data_dir': root_dir / 'data',
    "etablissement": "CHU Dijon",
    "departement": "21",
    'region': 'BOURGOGNE'
    }

In [7]:
# , ft.FireFightersFeatures(config=config, include_calls=False),
#  ft.GoogleTrendFeatures, ft.MeteorologicalFeatures, ft.SociologicalFeatures,
#  ft.SportsCompetitionFeatures, ft.TrafficFeatures
ars_features_class = [ft.AirQualityFeatures, ft.HopitalFeatures, ft.EpidemiologicalFeatures]

In [8]:
# target_colomns = ['nb_vers_hospit']
target_colomns = ['Total_CHU Dijon']

In [9]:
encoders_dict = {
    'number': {
        'as_number': {
            'imputers': [imputers.SimpleImputer(strategy='mean')],
            'encoders': [
                ne.StandardScaler(),
            ]
        }
    },
    'category': {
        'as_category': {
            'imputers': [imputers.SimpleImputer(strategy='most_frequent')],
            'encoders': [
                ne.MultiTargetEncoder(drop_invariant=True, return_df=True),
            ]
        }
    },
    'datetime': {
        'as_number': {
            'imputers': [de.DateFeatureExtractor()],
            'encoders': [
                ne.CyclicalFeatures(drop_original=True)
            ]
        },
        'as_category': {
            'imputers': [de.DateFeatureExtractor(dtype='category')],
            'encoders': [
                ne.MultiTargetEncoder(drop_invariant=True, return_df=True),
            ]
        }
    }
}

In [10]:
pipeline = create_encoding_pipeline(encoders_dict)

Creating encoding pipeline


In [11]:
split_config = {'test_size': 0.2, 'val_size': 0.2, 'shuffle': False}

In [12]:
dataset_config = {
    'from_date': '15-01-2019',
    'to_date': '31-12-2023',
    # 'shift': [1, 2, 3, 4, 5, 6, 7],
    # 'rolling_window': [7, 14],
    'freq': '1D',
    'split_config': split_config,
    'create_X_y': True,
    'encoding_pipeline': pipeline,
    'targets_names': target_colomns,
    'drop_constant_thr':0.65, #0.65
    'data_dir': root_dir / 'data'
    }

In [13]:
baseTabularDataset = BaseTabularDataset(features_class=ars_features_class, logger=logger, fetch_config=fetch_config, getter_config=dataset_config)


root 2024-10-10 16:44:11,655: INFO: Initialisation de la classe BaseTabularDataset
root 2024-10-10 16:44:11,659: INFO: Initialisation de la classe airqualityfeatures
root 2024-10-10 16:44:11,660: INFO: Initialisation de la classe hopitalfeatures
root 2024-10-10 16:44:11,660: INFO: Initialisation de la classe epidemiologicalfeatures
root 2024-10-10 16:44:11,661: INFO: Fetching data for airqualityfeatures
root 2024-10-10 16:44:11,669: INFO: Fetching data for hopitalfeatures
root 2024-10-10 16:44:11,673: INFO: Fetching data for epidemiologicalfeatures
root 2024-10-10 16:44:11,676: INFO: Getting the dataset from 15-01-2019 to 31-12-2023...
root 2024-10-10 16:44:11,677: INFO: Getting data for airqualityfeatures from 2019-01-15 00:00:00 to 2023-12-31 00:00:00, at a 1D frequency
root 2024-10-10 16:44:11,682: INFO: Column 'PM10_FR26010' is constant at 15.0 for 68.79% of the rows.
root 2024-10-10 16:44:11,684: INFO: Column 'PM10_FR26094' is constant at 19.0 for 88.06% of the rows.
root 2024-10-

/home/maxime/Documents/WORKSPACES/forecasting_models/data
X shape: (1159, 13), y shape: (1159, 1)
[FeatureUnion]  (step 1 of 4) Processing columntransformer-1, total=   0.0s
[FeatureUnion]  (step 2 of 4) Processing columntransformer-2, total=   0.0s
[FeatureUnion]  (step 3 of 4) Processing columntransformer-3, total=   0.0s
[FeatureUnion]  (step 4 of 4) Processing columntransformer-4, total=   0.0s


In [14]:
baseTabularDataset.X_train


Unnamed: 0_level_0,O3_FR26005,O3_FR26010,PM10_FR26005,PM10_FR26014,PM25_FR26005,NO2_FR26005,NO2_FR26014,HNFC_moving,nb_vers_hospit,inc_grippe,inc_diarrhee,inc_varicelle,inc_ira,date
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
2019-01-15,26.0,38.0,21.0,15.0,11.0,34.0,39.0,after,51.0,1431,1047,48,0,2019-01-15
2019-01-16,15.0,27.0,27.0,24.0,17.0,34.0,30.0,after,65.0,1431,1047,48,0,2019-01-16
2019-01-17,53.0,59.0,16.0,13.0,8.7,17.0,20.0,after,59.0,1431,1047,48,0,2019-01-17
2019-01-18,39.0,52.0,18.0,12.0,7.2,27.0,29.0,after,59.0,1431,1047,48,0,2019-01-18
2019-01-19,19.0,27.0,23.0,22.0,16.0,27.0,28.0,after,45.0,1431,1047,48,0,2019-01-19
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2022-03-14,47.0,52.0,14.0,10.0,7.2,20.0,31.0,after,47.0,2268,2010,219,3406,2022-03-14
2022-03-15,35.0,42.0,18.0,22.0,7.9,25.0,41.0,after,58.0,2268,2010,219,3406,2022-03-15
2022-03-16,33.0,44.0,30.0,29.0,12.0,29.0,36.0,after,54.0,2268,2010,219,3406,2022-03-16
2022-03-17,43.0,50.0,27.0,24.0,12.0,7.5,21.0,after,58.0,2268,2010,219,3406,2022-03-17


In [15]:
baseTabularDataset.test_set

Unnamed: 0_level_0,O3_FR26005,O3_FR26010,PM10_FR26005,PM10_FR26014,PM25_FR26005,NO2_FR26005,NO2_FR26014,target_Total_CHU Dijon,HNFC_moving,nb_vers_hospit,inc_grippe,inc_diarrhee,inc_varicelle,inc_ira,date
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
2023-01-03,24.0,26.0,12.0,10.0,6.0,16.0,22.0,185,after,49.0,2533,1239,136,4154,2023-01-03
2023-01-04,36.0,37.0,12.0,10.0,5.6,9.9,17.0,182,after,58.0,2533,1239,136,4154,2023-01-04
2023-01-05,30.0,36.0,10.0,6.0,3.6,13.0,24.0,178,after,45.0,2533,1239,136,4154,2023-01-05
2023-01-06,30.0,38.0,12.0,9.0,4.6,16.0,29.0,187,after,60.0,2533,1239,136,4154,2023-01-06
2023-01-07,45.0,46.0,11.0,10.0,5.0,6.3,16.0,202,after,45.0,2533,1239,136,4154,2023-01-07
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2023-12-27,22.0,29.0,21.0,19.0,17.0,20.0,27.0,165,after,45.0,3474,892,554,6291,2023-12-27
2023-12-28,41.0,42.0,14.0,14.0,9.5,17.0,26.0,192,after,63.0,3474,892,554,6291,2023-12-28
2023-12-29,48.0,49.0,12.0,12.0,8.0,10.0,19.0,157,after,56.0,3474,892,554,6291,2023-12-29
2023-12-30,36.0,31.0,15.0,14.0,8.8,13.0,21.0,212,after,45.0,3474,892,554,6291,2023-12-30


In [16]:
baseTabularDataset.val_set

Unnamed: 0_level_0,O3_FR26005,O3_FR26010,PM10_FR26005,PM10_FR26014,PM25_FR26005,NO2_FR26005,NO2_FR26014,target_Total_CHU Dijon,HNFC_moving,nb_vers_hospit,inc_grippe,inc_diarrhee,inc_varicelle,inc_ira,date
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
2022-03-19,72.0,77.0,30.0,27.0,21.0,7.00,26.0,222,after,52.0,2268,2010,219,3406,2022-03-19
2022-03-20,68.0,72.0,24.0,21.0,13.0,8.60,17.0,211,after,50.0,2268,2010,219,3406,2022-03-20
2022-03-21,63.0,71.0,25.0,23.0,14.0,20.00,31.0,252,after,59.0,3961,1548,0,6123,2022-03-21
2022-03-22,57.0,70.0,32.0,28.0,17.0,26.00,35.0,235,after,64.0,3961,1548,0,6123,2022-03-22
2022-03-23,58.0,80.0,32.0,28.0,17.0,32.00,40.0,213,after,56.0,3961,1548,0,6123,2022-03-23
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2022-12-29,48.0,54.0,12.0,9.0,5.3,9.50,18.0,187,after,62.0,3619,784,218,5706,2022-12-29
2022-12-30,42.0,39.0,12.0,11.0,6.9,9.10,17.0,187,after,58.0,3619,784,218,5706,2022-12-30
2022-12-31,41.0,36.0,13.0,11.0,6.4,7.60,16.0,204,after,54.0,3619,784,218,5706,2022-12-31
2023-01-01,62.0,61.0,21.0,17.0,7.7,0.86,8.5,169,after,46.0,3619,784,218,5706,2023-01-01


In [17]:
baseTabularDataset.enc_X_train

Unnamed: 0_level_0,O3_FR26005,O3_FR26010,PM10_FR26005,PM10_FR26014,PM25_FR26005,NO2_FR26005,NO2_FR26014,inc_grippe,inc_diarrhee,inc_varicelle,inc_ira,date##month_sin,date##month_cos,date##day_sin,date##day_cos,date##dayofweek_sin,date##dayofweek_cos,date##quarter_sin,date##quarter_cos,date##week_sin,date##week_cos,date##dayofYear_sin,date##dayofYear_cos,date##month##cat##target_Total_CHU Dijon,date##day##cat##target_Total_CHU Dijon,date##dayofweek##cat##target_Total_CHU Dijon,date##quarter##cat##target_Total_CHU Dijon,date##week##cat##target_Total_CHU Dijon,date##dayofYear##cat##target_Total_CHU Dijon,nb_vers_hospit
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1
2019-01-15,-1.227567,-0.962214,0.660920,0.190396,0.626901,2.788459,2.549176,0.785847,0.261534,-0.414918,-0.707487,0.5,8.660254e-01,0.101168,-0.994869,8.660254e-01,0.5,1.0,6.123234e-17,0.348202,0.93742,0.254671,0.967028,184.289821,175.128433,166.253016,180.583832,184.165049,174.385506,51.0
2019-01-16,-1.687314,-1.428685,1.475891,1.370794,1.646170,2.788459,1.449447,0.785847,0.261534,-0.414918,-0.707487,0.5,8.660254e-01,-0.101168,-0.994869,8.660254e-01,-0.5,1.0,6.123234e-17,0.348202,0.93742,0.271234,0.962513,184.289821,174.370066,166.379522,180.583832,184.165049,178.543051,65.0
2019-01-17,-0.099097,-0.071678,-0.018224,-0.071915,0.236181,0.587161,0.227526,0.785847,0.261534,-0.414918,-0.707487,0.5,8.660254e-01,-0.299363,-0.954139,1.224647e-16,-1.0,1.0,6.123234e-17,0.348202,0.93742,0.287717,0.957716,184.289821,175.262262,171.180725,180.583832,184.165049,176.275300,59.0
2019-01-18,-0.684230,-0.368523,0.253434,-0.203070,-0.018636,1.882042,1.327255,0.785847,0.261534,-0.414918,-0.707487,0.5,8.660254e-01,-0.485302,-0.874347,-8.660254e-01,-0.5,1.0,6.123234e-17,0.348202,0.93742,0.304115,0.952635,184.289821,177.805022,173.771085,180.583832,184.165049,179.676927,59.0
2019-01-19,-1.520133,-1.428685,0.932577,1.108483,1.476291,1.882042,1.205063,0.785847,0.261534,-0.414918,-0.707487,0.5,8.660254e-01,-0.651372,-0.758758,-8.660254e-01,0.5,1.0,6.123234e-17,0.348202,0.93742,0.320423,0.947274,184.289821,174.908990,190.654538,180.583832,184.165049,178.417065,45.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2022-03-14,-0.349868,-0.368523,-0.289881,-0.465381,-0.018636,0.975625,1.571639,1.542837,1.575052,0.346413,3.367837,1.0,6.123234e-17,0.299363,-0.954139,0.000000e+00,1.0,1.0,6.123234e-17,0.964636,0.263587,0.955211,0.295927,168.856595,176.738202,180.412119,180.583832,178.321324,176.359290,47.0
2022-03-15,-0.851410,-0.792588,0.253434,1.108483,0.100279,1.623066,2.793561,1.542837,1.575052,0.346413,3.367837,1.0,6.123234e-17,0.101168,-0.994869,8.660254e-01,0.5,1.0,6.123234e-17,0.964636,0.263587,0.960150,0.279486,168.856595,175.128433,166.253016,180.583832,178.321324,175.309405,58.0
2022-03-16,-0.935001,-0.707775,1.883377,2.026570,0.796779,2.141018,2.182600,1.542837,1.575052,0.346413,3.367837,1.0,6.123234e-17,-0.101168,-0.994869,8.660254e-01,-0.5,1.0,6.123234e-17,0.964636,0.263587,0.964806,0.262962,168.856595,174.370066,166.379522,180.583832,178.321324,176.821240,54.0
2022-03-17,-0.517049,-0.453336,1.475891,1.370794,0.796779,-0.642976,0.349718,1.542837,1.575052,0.346413,3.367837,1.0,6.123234e-17,-0.299363,-0.954139,1.224647e-16,-1.0,1.0,6.123234e-17,0.964636,0.263587,0.969178,0.246361,168.856595,175.262262,171.180725,180.583832,178.321324,174.175529,58.0


In [18]:
baseTabularDataset.enc_X_test

Unnamed: 0_level_0,O3_FR26005,O3_FR26010,PM10_FR26005,PM10_FR26014,PM25_FR26005,NO2_FR26005,NO2_FR26014,inc_grippe,inc_diarrhee,inc_varicelle,inc_ira,date##month_sin,date##month_cos,date##day_sin,date##day_cos,date##dayofweek_sin,date##dayofweek_cos,date##quarter_sin,date##quarter_cos,date##week_sin,date##week_cos,date##dayofYear_sin,date##dayofYear_cos,date##month##cat##target_Total_CHU Dijon,date##day##cat##target_Total_CHU Dijon,date##dayofweek##cat##target_Total_CHU Dijon,date##quarter##cat##target_Total_CHU Dijon,date##week##cat##target_Total_CHU Dijon,date##dayofYear##cat##target_Total_CHU Dijon,nb_vers_hospit
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1
2023-01-03,-1.311157,-1.471092,-0.561538,-0.465381,-0.222490,0.457673,0.471910,1.782505,0.523420,-0.023122,4.262829,5.000000e-01,0.866025,5.712682e-01,0.820763,8.660254e-01,0.5,1.000000e+00,6.123234e-17,0.118273,0.992981,5.147875e-02,0.998674,184.289821,176.805951,166.253016,180.583832,177.674503,178.540581,49.0
2023-01-04,-0.809615,-1.004620,-0.561538,-0.465381,-0.290441,-0.332205,-0.139050,1.782505,0.523420,-0.023122,4.262829,5.000000e-01,0.866025,7.247928e-01,0.688967,8.660254e-01,-0.5,1.000000e+00,6.123234e-17,0.118273,0.992981,6.861474e-02,0.997643,184.289821,175.947802,166.379522,180.583832,177.674503,178.231650,58.0
2023-01-05,-1.060386,-1.047027,-0.833196,-0.990002,-0.630197,0.069208,0.716295,1.782505,0.523420,-0.023122,4.262829,5.000000e-01,0.866025,8.486443e-01,0.528964,1.224647e-16,-1.0,1.000000e+00,6.123234e-17,0.118273,0.992981,8.573050e-02,0.996318,184.289821,177.777014,171.180725,180.583832,177.674503,175.657229,45.0
2023-01-06,-1.060386,-0.962214,-0.561538,-0.596536,-0.460319,0.457673,1.327255,1.782505,0.523420,-0.023122,4.262829,5.000000e-01,0.866025,9.377521e-01,0.347305,-8.660254e-01,-0.5,1.000000e+00,6.123234e-17,0.118273,0.992981,1.028210e-01,0.994700,184.289821,176.444625,173.771085,180.583832,177.674503,174.833414,60.0
2023-01-07,-0.433458,-0.622962,-0.697367,-0.465381,-0.392368,-0.798362,-0.261242,1.782505,0.523420,-0.023122,4.262829,5.000000e-01,0.866025,9.884683e-01,0.151428,-8.660254e-01,0.5,1.000000e+00,6.123234e-17,0.118273,0.992981,1.198812e-01,0.992788,184.289821,174.615413,190.654538,180.583832,177.674503,174.370018,45.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2023-12-27,-1.394748,-1.343872,0.660920,0.715017,1.646170,0.975625,1.082871,2.633554,0.050117,1.837910,6.819777,-2.449294e-16,1.000000,-7.247928e-01,0.688967,8.660254e-01,-0.5,-2.449294e-16,1.000000e+00,-0.118273,0.992981,-6.861474e-02,0.997643,177.622204,176.105882,166.379522,174.666667,177.524509,177.047416,45.0
2023-12-28,-0.600639,-0.792588,-0.289881,0.059241,0.372084,0.587161,0.960679,2.633554,0.050117,1.837910,6.819777,-2.449294e-16,1.000000,-5.712682e-01,0.820763,1.224647e-16,-1.0,-2.449294e-16,1.000000e+00,-0.118273,0.992981,-5.147875e-02,0.998674,177.622204,177.822180,171.180725,174.666667,177.524509,176.995928,63.0
2023-12-29,-0.308073,-0.495743,-0.561538,-0.203070,0.117266,-0.319256,0.105334,2.633554,0.050117,1.837910,6.819777,-2.449294e-16,1.000000,-3.943559e-01,0.918958,-8.660254e-01,-0.5,-2.449294e-16,1.000000e+00,-0.118273,0.992981,-3.432760e-02,0.999411,177.622204,173.310950,173.771085,174.666667,177.524509,174.936391,56.0
2023-12-30,-0.809615,-1.259059,-0.154052,0.059241,0.253169,0.069208,0.349718,2.633554,0.050117,1.837910,6.819777,-2.449294e-16,1.000000,-2.012985e-01,0.979530,-8.660254e-01,0.5,-2.449294e-16,1.000000e+00,-0.118273,0.992981,-1.716633e-02,0.999853,177.622204,172.545418,190.654538,174.666667,177.524509,174.730437,45.0


In [19]:
baseTabularDataset.y_test

Unnamed: 0_level_0,target_Total_CHU Dijon
date,Unnamed: 1_level_1
2023-01-03,185
2023-01-04,182
2023-01-05,178
2023-01-06,187
2023-01-07,202
...,...
2023-12-27,165
2023-12-28,192
2023-12-29,157
2023-12-30,212


In [20]:
baseTabularDataset.enc_X_val

Unnamed: 0_level_0,O3_FR26005,O3_FR26010,PM10_FR26005,PM10_FR26014,PM25_FR26005,NO2_FR26005,NO2_FR26014,inc_grippe,inc_diarrhee,inc_varicelle,inc_ira,date##month_sin,date##month_cos,date##day_sin,date##day_cos,date##dayofweek_sin,date##dayofweek_cos,date##quarter_sin,date##quarter_cos,date##week_sin,date##week_cos,date##dayofYear_sin,date##dayofYear_cos,date##month##cat##target_Total_CHU Dijon,date##day##cat##target_Total_CHU Dijon,date##dayofweek##cat##target_Total_CHU Dijon,date##quarter##cat##target_Total_CHU Dijon,date##week##cat##target_Total_CHU Dijon,date##dayofYear##cat##target_Total_CHU Dijon,nb_vers_hospit
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1
2022-03-19,0.695012,0.691639,1.883377,1.764260,2.325682,-0.707720,0.960679,1.542837,1.575052,0.346413,3.367837,1.000000e+00,6.123234e-17,-6.513725e-01,-0.758758,-8.660254e-01,0.5,1.000000e+00,6.123234e-17,0.964636,0.263587,9.770639e-01,0.212947,168.856595,174.908990,190.654538,180.583832,178.321324,168.912246,52.0
2022-03-20,0.527831,0.479607,1.068405,0.977328,0.966657,-0.500539,-0.139050,1.542837,1.575052,0.346413,3.367837,1.000000e+00,6.123234e-17,-7.907757e-01,-0.612106,-2.449294e-16,1.0,1.000000e+00,6.123234e-17,0.964636,0.263587,9.805754e-01,0.196143,168.856595,175.925219,179.806058,180.583832,178.321324,172.053040,50.0
2022-03-21,0.318855,0.437200,1.204234,1.239638,1.136535,0.975625,1.571639,3.074001,0.944891,-0.628625,6.618763,1.000000e+00,6.123234e-17,-8.978045e-01,-0.440394,0.000000e+00,1.0,1.000000e+00,6.123234e-17,0.98904,0.147647,9.837980e-01,0.179281,168.856595,175.812305,180.412119,180.583832,165.174998,169.015223,59.0
2022-03-22,0.068084,0.394794,2.155035,1.895415,1.646170,1.752554,2.060408,3.074001,0.944891,-0.628625,6.618763,1.000000e+00,6.123234e-17,-9.680771e-01,-0.250653,8.660254e-01,0.5,1.000000e+00,6.123234e-17,0.98904,0.147647,9.867306e-01,0.162366,168.856595,177.573768,166.253016,180.583832,165.174998,170.662852,64.0
2022-03-23,0.109879,0.818859,2.155035,1.895415,1.646170,2.529483,2.671368,3.074001,0.944891,-0.628625,6.618763,1.000000e+00,6.123234e-17,-9.987165e-01,-0.050649,8.660254e-01,-0.5,1.000000e+00,6.123234e-17,0.98904,0.147647,9.893724e-01,0.145404,168.856595,172.853949,166.379522,180.583832,165.174998,171.332202,56.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2022-12-29,-0.308073,-0.283710,-0.561538,-0.596536,-0.341405,-0.384000,-0.016858,2.764693,-0.097194,0.341961,6.119817,-2.449294e-16,1.000000e+00,-3.943559e-01,0.918958,1.224647e-16,-1.0,-2.449294e-16,1.000000e+00,-0.118273,0.992981,-3.432760e-02,0.999411,177.622204,173.310950,171.180725,174.666667,177.524509,174.936391,62.0
2022-12-30,-0.558844,-0.919807,-0.561538,-0.334225,-0.069600,-0.435795,-0.139050,2.764693,-0.097194,0.341961,6.119817,-2.449294e-16,1.000000e+00,-2.012985e-01,0.979530,-8.660254e-01,-0.5,-2.449294e-16,1.000000e+00,-0.118273,0.992981,-1.716633e-02,0.999853,177.622204,172.545418,173.771085,174.666667,177.524509,174.730437,58.0
2022-12-31,-0.600639,-1.047027,-0.425710,-0.334225,-0.154539,-0.630028,-0.261242,2.764693,-0.097194,0.341961,6.119817,-2.449294e-16,1.000000e+00,-2.449294e-16,1.000000,-8.660254e-01,0.5,-2.449294e-16,1.000000e+00,-0.118273,0.992981,-2.449294e-16,1.000000,177.622204,174.987916,190.654538,174.666667,177.524509,172.104528,54.0
2023-01-01,0.277060,0.013135,0.660920,0.452707,0.066303,-1.502777,-1.177683,2.764693,-0.097194,0.341961,6.119817,5.000000e-01,8.660254e-01,2.012985e-01,0.979530,-2.449294e-16,1.0,1.000000e+00,6.123234e-17,-0.118273,0.992981,1.716633e-02,0.999853,184.289821,174.705744,179.806058,180.583832,177.524509,178.077185,46.0


In [21]:
dataset_config["features_names"] = ['PM10_FR26014', 'inc_diarrhee', 'inc_varicelle']

In [22]:
baseTabularDataset.get_dataset(**dataset_config)

root 2024-10-10 16:44:12,038: INFO: Getting the dataset from 15-01-2019 to 31-12-2023...
root 2024-10-10 16:44:12,039: INFO: ['Total_CHU Dijon']
root 2024-10-10 16:44:12,040: INFO: ['PM10_FR26014', 'inc_diarrhee', 'inc_varicelle']
root 2024-10-10 16:44:12,042: INFO: Getting data for airqualityfeatures from 2019-01-15 00:00:00 to 2023-12-31 00:00:00, at a 1D frequency
root 2024-10-10 16:44:12,048: INFO: Column 'PM10_FR26010' is constant at 15.0 for 68.79% of the rows.
root 2024-10-10 16:44:12,049: INFO: Column 'PM10_FR26094' is constant at 19.0 for 88.06% of the rows.
root 2024-10-10 16:44:12,050: INFO: Column 'PM25_FR26094' is constant at 6.8 for 87.58% of the rows.
root 2024-10-10 16:44:12,052: INFO: Column 'NO2_FR26010' is constant at 6.9 for 68.62% of the rows.
root 2024-10-10 16:44:12,053: INFO: Column 'NO2_FR26094' is constant at 7.0 for 87.61% of the rows.
root 2024-10-10 16:44:12,056: INFO: Getting data for hopitalfeatures from 2019-01-15 00:00:00 to 2023-12-31 00:00:00, at a 1D

X shape: (1159, 3), y shape: (1159, 1)
[FeatureUnion]  (step 1 of 4) Processing columntransformer-1, total=   0.0s
[FeatureUnion]  (step 2 of 4) Processing columntransformer-2, total=   0.0s
[FeatureUnion]  (step 3 of 4) Processing columntransformer-3, total=   0.0s
[FeatureUnion]  (step 4 of 4) Processing columntransformer-4, total=   0.0s


In [29]:
baseTabularDataset.y_test

Unnamed: 0_level_0,target_Total_CHU Dijon
date,Unnamed: 1_level_1
2023-01-03,185
2023-01-04,182
2023-01-05,178
2023-01-06,187
2023-01-07,202
...,...
2023-12-27,165
2023-12-28,192
2023-12-29,157
2023-12-30,212


baseTabularDataset.fetch_data(save=False)
dataset = baseTabularDataset.get_dataset(from_date=dt.datetime.strptime('15-01-2019', '%d-%m-%Y'), to_date=dt.datetime.strptime('30-12-2023', '%d-%m-%Y'), shift=[1, 2, 3, 4, 5, 6, 7], rolling_window=[7, 14], create_X_y=True, split_config = {'test_size': 0.2, 'val_size': 0.2, 'shuffle': False}, encoding_pipeline=pipeline, freq='1D', targets_names=target_colomns)
print(dataset.enc_X_train.columns.to_list())
print(dataset.y_train.columns.to_list())
dataset.save_dataset()
dataset2 = dataset.get_dataset(features_names=['O3_FR26005%%J-1', 'confinement1%%J-5##Total_CHU_Dijon'])
print(dataset2.data.columns.to_list())
print(dataset2.train_set.columns.to_list())
print(dataset2.X_train.columns.to_list())
print(dataset2.y_train.columns.to_list())
print(dataset2.enc_X_train.columns.to_list())

dataset.plot(max_subplots=16)
print(dataset.data)
dataset.save_data(root_dir, "data")

baseTabularDataset.plot(from_date='01-01-2022', to_date='15-01-2022', freq='1D', max_subplots=4)
dataset.encode(pipeline=piepline)

dataset.save_data(root_dir, "data")

print(baseTabularDataset.data.info())
baseTabularDataset.encode(encoders=encoders)

print(baseTabularDataset.encoded_data)
print(baseTabularDataset.features)