### Imports

In [1]:
import os
import sys
sys.path.insert(0, os.path.abspath(
    os.path.join(os.path.dirname('__file__'), '..')))

In [2]:
# %pip install -r ../requirements.txt

In [3]:
from src.encoding.encoders import *
from src.encoding.tools import create_encoding_pipeline
from src.models.sklearn_api_model import save_object, Model
from src.models.sklearn_api_models_config import get_model
from src.datasets.base_tabular_dataset import BaseTabularDataset
from src.experiments.base_experiment import BaseExperiment
import src.features as ft
import logging
import pandas as pd
import pathlib
import numpy as np

In [4]:
import logging
import os
import sys
import datetime as dt
from typing import List, Union, Optional
import pathlib
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from src.datasets.base_tabular_dataset import BaseTabularDataset
from src.encoding.tools import create_encoding_pipeline
from src.experiments.features_selection import get_features, explore_features
from src.models.sklearn_api_model import Model, ModelTree
import src.features as ft
import mlflow.sklearn
import mlflow
import mlflow.data.pandas_dataset
from mlflow.models import infer_signature
import os
import matplotlib.pyplot as plt
%matplotlib widget
import cudf as cd
import numpy as np
import re


### Config

In [5]:
# Define a logger used by all modules
logger = logging.getLogger()
logger.setLevel(logging.INFO)
logging.basicConfig(level=logging.INFO, encoding="utf-8",
                    format="%(name)s %(asctime)s: %(levelname)s: %(message)s", handlers=[logging.StreamHandler()])

In [6]:
# Define the root directory of the project
root_dir = os.path.abspath(os.path.join(os.path.dirname('__file__'), '..'))
root_dir = pathlib.Path(root_dir)

##### Encoding Pipeline

In [7]:
# Define an encoding scheme to create the encoding pipeline
encoders_dict = {
    'number': {
        'as_number': {
            'imputers': [imputers.SimpleImputer(strategy='mean')],
            'encoders': [
                ne.StandardScaler(),
            ]
        }
    },
    'category': {
        'as_category': {
            'imputers': [imputers.SimpleImputer(strategy='most_frequent')],
            'encoders': [
                ne.MultiTargetEncoder(drop_invariant=True, return_df=True),
            ]
        }
    },
    'datetime': {
        'as_number': {
            'imputers': [de.DateFeatureExtractor()],
            'encoders': [
                ne.CyclicalFeatures(drop_original=True)
            ]
        },
        'as_category': {
            'imputers': [de.DateFeatureExtractor(dtype='category')],
            'encoders': [
                ne.MultiTargetEncoder(drop_invariant=True, return_df=True),
            ]
        }
    },
    'boolean': {
        'as_number': {
            'imputers': [imputers.SimpleImputer(strategy='most_frequent')],
             'encoders': [ne.BooleanEncoder()]
         }

    }
}

In [8]:
# Create the encoding pipeline
pipeline = create_encoding_pipeline(encoders_dict)

Creating encoding pipeline


In [9]:
pipeline

##### Dataset

In [10]:
# Define the configuration for the fetching of the data
fetch_config = {
    "data_start": '01-01-2019',
    "data_stop": '31-12-2023',
    'data_dir': root_dir / 'data',
    "locations": ['CHU Dijon', 'CH Beaune', 'CH Semur', 'CH Chatillon Montbard', 'CH privé Dijon', 'CH Langres', 'CH Chaumont', 'HNFC', 'CHU Besançon']
    }

In [11]:
# Select the features to be used in the dataset
ars_features_class = [
    ft.HospitalFeatures,
    ft.AirQualityFeatures,
    ft.EpidemiologicalFeatures,
    # ft.FireFightersFeatures(include_calls=False),
    ft.GoogleTrendFeatures,
    ft.MeteorologicalFeatures,
    ft.SociologicalFeatures,
    ft.SportsCompetitionFeatures,
    ft.TrafficFeatures
    ]

In [12]:
# Select the target columns to be predicted
target_colomns = ['nb_emmergencies']
# target_colomns = ['nb_vers_hospit']
# target_colomns = ['nb_hospit_np_adults%%J+1%%mean_7J']

In [13]:
# Define the splitting scheme to create the sets
split_config = {'test_size': 0.2, 'val_size': 0.2, 'shuffle': False}

In [14]:
# Create the dataset and fetch the data from the source then call get_dataset() method to fill the different attributes (X and y) of the different sets, and their encodings
arsTabularDataset = BaseTabularDataset(features_classes=ars_features_class, logger=logger, fetch_config=fetch_config)

root 2024-11-15 16:31:48,330: INFO: Initialisation de la classe BaseTabularDataset
root 2024-11-15 16:31:48,335: INFO: Initialisation des features
root 2024-11-15 16:31:48,336: INFO: Fetching dataset
root 2024-11-15 16:31:48,550: INFO: hospitalfeatures's data already fetched for CHU Dijon
root 2024-11-15 16:31:49,771: INFO: hospitalfeatures's data already fetched for CH Beaune
root 2024-11-15 16:31:50,283: INFO: hospitalfeatures's data already fetched for CH Semur
root 2024-11-15 16:31:50,535: INFO: hospitalfeatures's data already fetched for CH Chatillon Montbard
root 2024-11-15 16:31:50,785: INFO: hospitalfeatures's data already fetched for CH privé Dijon
root 2024-11-15 16:31:51,036: INFO: hospitalfeatures's data already fetched for CH Langres
root 2024-11-15 16:31:51,328: INFO: hospitalfeatures's data already fetched for CH Chaumont
root 2024-11-15 16:31:51,903: INFO: hospitalfeatures's data already fetched for HNFC
root 2024-11-15 16:31:52,139: INFO: hospitalfeatures's data alread

In [15]:
for location in fetch_config['locations']:
    # Define the configuration of the dataset
    dataset_config = {
    'from_date': '01-01-2019',
    'to_date': '30-12-2023',
    'locations': [location],
    # 'locations': ['CHU Dijon', 'CH Beaune', 'CH Semur', 'CH Chatillon Montbard', 'CH privé Dijon'],
    # 'axis': 'rows',
    'shift': range(1, 8, 1),
    # 'rolling_window': [7, 14, 31, 365],
    'freq': '1D',
    'split_config': split_config,
    'create_X_y': True,
    'encoding_pipeline': pipeline,
    'targets_names': target_colomns,
    'targets_shift': -3,
    'targets_rolling_window': 3,
    'targets_history_shifts': range(1, 8, 1),
    'targets_history_rolling_windows': [7, 14, 31, 365],
    'targets_locations': [location],
    'drop_constant_thr': 1.0,
    'data_dir': root_dir / 'data'
    }

    arsTabularDataset.get_dataset(**dataset_config)

    df = arsTabularDataset.enc_data

    # Identifie les colonnes contenant des NaN
    cols_with_nan = df.columns[df.isna().any()].tolist()
    print("Colonnes contenant des NaN:", cols_with_nan)

    # Affiche la liste des index des lignes contenant des NaN pour chaque colonne
    nan_indices = {col: df[df[col].isna()].index.tolist() for col in cols_with_nan}
    print(nan_indices)

    df.to_csv(fetch_config['data_dir'] / f'datasets/full_dataset_{location}.csv')
    df_target = arsTabularDataset.data[arsTabularDataset.targets_names]
    df_target.to_csv(fetch_config['data_dir'] / f'datasets/full_dataset_{location}_targets.csv')

root 2024-11-15 16:32:09,876: INFO: Getting the dataset from 01-01-2019 to 30-12-2023 for CHU Dijon
root 2024-11-15 16:32:10,107: INFO: Getting data for hospitalfeatures from 2019-01-01 00:00:00 to 2023-12-30 00:00:00, at a 1D frequency
root 2024-11-15 16:32:10,114: INFO: Augmentation des features...
root 2024-11-15 16:32:10,121: INFO: Getting data for airqualityfeatures from 2019-01-01 00:00:00 to 2023-12-30 00:00:00, at a 1D frequency
root 2024-11-15 16:32:10,130: INFO: Augmentation des features...
root 2024-11-15 16:32:10,158: INFO: Getting data for epidemiologicalfeatures from 2019-01-01 00:00:00 to 2023-12-30 00:00:00, at a 1D frequency
root 2024-11-15 16:32:10,163: INFO: Augmentation des features...
root 2024-11-15 16:32:10,174: INFO: Getting data for googletrendfeatures from 2019-01-01 00:00:00 to 2023-12-30 00:00:00, at a 1D frequency
root 2024-11-15 16:32:10,202: INFO: Augmentation des features...
root 2024-11-15 16:32:10,353: INFO: Getting data for meteorologicalfeatures from

            nb_emmergencies  nb_emmergencies%%J-1  nb_emmergencies%%J-2  \
date                                                                      
2019-01-01              202                 202.0                 202.0   
2019-01-02              198                 202.0                 202.0   
2019-01-03              186                 198.0                 202.0   
2019-01-04              186                 186.0                 198.0   
2019-01-05              204                 186.0                 186.0   
...                     ...                   ...                   ...   
2023-12-26              177                 187.0                 177.0   
2023-12-27              165                 177.0                 187.0   
2023-12-28              192                 165.0                 177.0   
2023-12-29              157                 192.0                 165.0   
2023-12-30              212                 157.0                 192.0   

            nb_emmergenc

root 2024-11-15 16:32:10,892: INFO: 30 features not encoded (same unit as target)


[ColumnTransformer] .... (4 of 4) Processing pipeline-5, total=   0.0s
Colonnes contenant des NaN: []
{}


root 2024-11-15 16:32:12,016: INFO: Getting the dataset from 01-01-2019 to 30-12-2023 for CH Beaune
root 2024-11-15 16:32:12,255: INFO: Getting data for hospitalfeatures from 2019-01-01 00:00:00 to 2023-12-30 00:00:00, at a 1D frequency
root 2024-11-15 16:32:12,263: INFO: Augmentation des features...
root 2024-11-15 16:32:12,272: INFO: Getting data for airqualityfeatures from 2019-01-01 00:00:00 to 2023-12-30 00:00:00, at a 1D frequency
root 2024-11-15 16:32:12,282: INFO: Augmentation des features...
root 2024-11-15 16:32:12,310: INFO: Getting data for epidemiologicalfeatures from 2019-01-01 00:00:00 to 2023-12-30 00:00:00, at a 1D frequency
root 2024-11-15 16:32:12,315: INFO: Augmentation des features...
root 2024-11-15 16:32:12,326: INFO: Getting data for googletrendfeatures from 2019-01-01 00:00:00 to 2023-12-30 00:00:00, at a 1D frequency
root 2024-11-15 16:32:12,357: INFO: Augmentation des features...
root 2024-11-15 16:32:12,502: INFO: Getting data for meteorologicalfeatures from

            nb_emmergencies  nb_emmergencies%%J-1  nb_emmergencies%%J-2  \
date                                                                      
2019-01-01               54                  54.0                  54.0   
2019-01-02               64                  54.0                  54.0   
2019-01-03               54                  64.0                  54.0   
2019-01-04               61                  54.0                  64.0   
2019-01-05               60                  61.0                  54.0   
...                     ...                   ...                   ...   
2023-12-26               65                  48.0                  42.0   
2023-12-27               55                  65.0                  48.0   
2023-12-28               57                  55.0                  65.0   
2023-12-29               68                  57.0                  55.0   
2023-12-30               54                  68.0                  57.0   

            nb_emmergenc

root 2024-11-15 16:32:12,933: INFO: 30 features not encoded (same unit as target)


[ColumnTransformer] .... (1 of 4) Processing pipeline-1, total=   0.0s
[ColumnTransformer] .... (2 of 4) Processing pipeline-3, total=   0.0s
[ColumnTransformer] .... (3 of 4) Processing pipeline-4, total=   0.0s
[ColumnTransformer] .... (4 of 4) Processing pipeline-5, total=   0.0s
Colonnes contenant des NaN: []
{}


root 2024-11-15 16:32:13,988: INFO: Getting the dataset from 01-01-2019 to 30-12-2023 for CH Semur
root 2024-11-15 16:32:14,242: INFO: Getting data for hospitalfeatures from 2019-01-01 00:00:00 to 2023-12-30 00:00:00, at a 1D frequency
root 2024-11-15 16:32:14,248: INFO: Augmentation des features...
root 2024-11-15 16:32:14,257: INFO: Getting data for airqualityfeatures from 2019-01-01 00:00:00 to 2023-12-30 00:00:00, at a 1D frequency
root 2024-11-15 16:32:14,302: INFO: Augmentation des features...
root 2024-11-15 16:32:14,342: INFO: Getting data for epidemiologicalfeatures from 2019-01-01 00:00:00 to 2023-12-30 00:00:00, at a 1D frequency
root 2024-11-15 16:32:14,346: INFO: Augmentation des features...
root 2024-11-15 16:32:14,357: INFO: Getting data for googletrendfeatures from 2019-01-01 00:00:00 to 2023-12-30 00:00:00, at a 1D frequency
root 2024-11-15 16:32:14,386: INFO: Augmentation des features...
root 2024-11-15 16:32:14,526: INFO: Getting data for meteorologicalfeatures from 

Dropped columns with zero variance: ['meteo_tavg', 'meteo_tmin', 'meteo_tmax', 'meteo_prcp', 'meteo_snow', 'meteo_wdir', 'meteo_wspd', 'meteo_pres']
            nb_emmergencies  nb_emmergencies%%J-1  nb_emmergencies%%J-2  \
date                                                                      
2019-01-01               39                  39.0                  39.0   
2019-01-02               69                  39.0                  39.0   
2019-01-03               66                  69.0                  39.0   
2019-01-04               62                  66.0                  69.0   
2019-01-05               47                  62.0                  66.0   
...                     ...                   ...                   ...   
2023-12-26               92                  57.0                  86.0   
2023-12-27               88                  92.0                  57.0   
2023-12-28              101                  88.0                  92.0   
2023-12-29               9

root 2024-11-15 16:32:14,914: INFO: 30 features not encoded (same unit as target)


Dropped 48 constant columns from both sets: {'PM10_FR26010', 'PM25_FR26094', 'PM10_FR26094', 'trend_épilepsie', 'NO2_FR26094', 'NO2_FR26010'}
X shape: (1162, 681), y shape: (1162, 1)
[ColumnTransformer] .... (1 of 4) Processing pipeline-1, total=   0.0s
[ColumnTransformer] .... (2 of 4) Processing pipeline-3, total=   0.0s
[ColumnTransformer] .... (3 of 4) Processing pipeline-4, total=   0.0s
[ColumnTransformer] .... (4 of 4) Processing pipeline-5, total=   0.0s
Colonnes contenant des NaN: []
{}


root 2024-11-15 16:32:15,907: INFO: Getting the dataset from 01-01-2019 to 30-12-2023 for CH Chatillon Montbard
root 2024-11-15 16:32:16,141: INFO: Getting data for hospitalfeatures from 2019-01-01 00:00:00 to 2023-12-30 00:00:00, at a 1D frequency
root 2024-11-15 16:32:16,147: INFO: Augmentation des features...
root 2024-11-15 16:32:16,156: INFO: Getting data for airqualityfeatures from 2019-01-01 00:00:00 to 2023-12-30 00:00:00, at a 1D frequency
root 2024-11-15 16:32:16,188: INFO: Augmentation des features...
root 2024-11-15 16:32:16,277: INFO: Getting data for epidemiologicalfeatures from 2019-01-01 00:00:00 to 2023-12-30 00:00:00, at a 1D frequency
root 2024-11-15 16:32:16,282: INFO: Augmentation des features...
root 2024-11-15 16:32:16,292: INFO: Getting data for googletrendfeatures from 2019-01-01 00:00:00 to 2023-12-30 00:00:00, at a 1D frequency
root 2024-11-15 16:32:16,324: INFO: Augmentation des features...
root 2024-11-15 16:32:16,477: INFO: Getting data for meteorologicalf

Dropped columns with zero variance: ['meteo_snow']
            nb_emmergencies  nb_emmergencies%%J-1  nb_emmergencies%%J-2  \
date                                                                      
2019-01-01               25                  25.0                  25.0   
2019-01-02               32                  25.0                  25.0   
2019-01-03               39                  32.0                  25.0   
2019-01-04               35                  39.0                  32.0   
2019-01-05               30                  35.0                  39.0   
...                     ...                   ...                   ...   
2023-12-26               39                  26.0                  34.0   
2023-12-27               41                  39.0                  26.0   
2023-12-28               44                  41.0                  39.0   
2023-12-29               38                  44.0                  41.0   
2023-12-30               40                  38.0

root 2024-11-15 16:32:16,990: INFO: 30 features not encoded (same unit as target)


[ColumnTransformer] .... (4 of 4) Processing pipeline-5, total=   0.0s
Colonnes contenant des NaN: []
{}


root 2024-11-15 16:32:18,056: INFO: Getting the dataset from 01-01-2019 to 30-12-2023 for CH privé Dijon
root 2024-11-15 16:32:18,306: INFO: Getting data for hospitalfeatures from 2019-01-01 00:00:00 to 2023-12-30 00:00:00, at a 1D frequency
root 2024-11-15 16:32:18,312: INFO: Augmentation des features...
root 2024-11-15 16:32:18,320: INFO: Getting data for airqualityfeatures from 2019-01-01 00:00:00 to 2023-12-30 00:00:00, at a 1D frequency
root 2024-11-15 16:32:18,329: INFO: Augmentation des features...
root 2024-11-15 16:32:18,356: INFO: Getting data for epidemiologicalfeatures from 2019-01-01 00:00:00 to 2023-12-30 00:00:00, at a 1D frequency
root 2024-11-15 16:32:18,360: INFO: Augmentation des features...
root 2024-11-15 16:32:18,371: INFO: Getting data for googletrendfeatures from 2019-01-01 00:00:00 to 2023-12-30 00:00:00, at a 1D frequency
root 2024-11-15 16:32:18,399: INFO: Augmentation des features...
root 2024-11-15 16:32:18,539: INFO: Getting data for meteorologicalfeatures

            nb_emmergencies  nb_emmergencies%%J-1  nb_emmergencies%%J-2  \
date                                                                      
2019-01-01               72                  72.0                  72.0   
2019-01-02               76                  72.0                  72.0   
2019-01-03               83                  76.0                  72.0   
2019-01-04               69                  83.0                  76.0   
2019-01-05               78                  69.0                  83.0   
...                     ...                   ...                   ...   
2023-12-26              122                  64.0                  63.0   
2023-12-27              107                 122.0                  64.0   
2023-12-28              102                 107.0                 122.0   
2023-12-29              105                 102.0                 107.0   
2023-12-30               97                 105.0                 102.0   

            nb_emmergenc

root 2024-11-15 16:32:18,966: INFO: 30 features not encoded (same unit as target)


[ColumnTransformer] .... (1 of 4) Processing pipeline-1, total=   0.0s
[ColumnTransformer] .... (2 of 4) Processing pipeline-3, total=   0.0s
[ColumnTransformer] .... (3 of 4) Processing pipeline-4, total=   0.0s
[ColumnTransformer] .... (4 of 4) Processing pipeline-5, total=   0.0s
Colonnes contenant des NaN: []
{}


root 2024-11-15 16:32:20,053: INFO: Getting the dataset from 01-01-2019 to 30-12-2023 for CH Langres
root 2024-11-15 16:32:20,286: INFO: Getting data for hospitalfeatures from 2019-01-01 00:00:00 to 2023-12-30 00:00:00, at a 1D frequency
root 2024-11-15 16:32:20,293: INFO: Augmentation des features...
root 2024-11-15 16:32:20,302: INFO: Getting data for airqualityfeatures from 2019-01-01 00:00:00 to 2023-12-30 00:00:00, at a 1D frequency
root 2024-11-15 16:32:20,315: INFO: Augmentation des features...
root 2024-11-15 16:32:20,341: INFO: Getting data for epidemiologicalfeatures from 2019-01-01 00:00:00 to 2023-12-30 00:00:00, at a 1D frequency
root 2024-11-15 16:32:20,346: INFO: Augmentation des features...
root 2024-11-15 16:32:20,356: INFO: Getting data for googletrendfeatures from 2019-01-01 00:00:00 to 2023-12-30 00:00:00, at a 1D frequency
root 2024-11-15 16:32:20,385: INFO: Augmentation des features...
root 2024-11-15 16:32:20,573: INFO: Getting data for meteorologicalfeatures fro

Dropped columns with zero variance: ['meteo_snow']
            nb_emmergencies  nb_emmergencies%%J-1  nb_emmergencies%%J-2  \
date                                                                      
2019-01-01               43                  43.0                  43.0   
2019-01-02               36                  43.0                  43.0   
2019-01-03               37                  36.0                  43.0   
2019-01-04               37                  37.0                  36.0   
2019-01-05               31                  37.0                  37.0   
...                     ...                   ...                   ...   
2023-12-26               54                  37.0                  38.0   
2023-12-27               44                  54.0                  37.0   
2023-12-28               42                  44.0                  54.0   
2023-12-29               52                  42.0                  44.0   
2023-12-30               37                  52.0

root 2024-11-15 16:32:20,996: INFO: 30 features not encoded (same unit as target)


Dropped 40 constant columns from both sets: {'PM10_FR26010', 'PM25_FR26094', 'PM10_FR26094', 'NO2_FR26094', 'NO2_FR26010'}
X shape: (1162, 745), y shape: (1162, 1)
[ColumnTransformer] .... (1 of 4) Processing pipeline-1, total=   0.0s
[ColumnTransformer] .... (2 of 4) Processing pipeline-3, total=   0.0s
[ColumnTransformer] .... (3 of 4) Processing pipeline-4, total=   0.0s
[ColumnTransformer] .... (4 of 4) Processing pipeline-5, total=   0.0s
Colonnes contenant des NaN: []
{}


root 2024-11-15 16:32:22,101: INFO: Getting the dataset from 01-01-2019 to 30-12-2023 for CH Chaumont
root 2024-11-15 16:32:22,363: INFO: Getting data for hospitalfeatures from 2019-01-01 00:00:00 to 2023-12-30 00:00:00, at a 1D frequency
root 2024-11-15 16:32:22,371: INFO: Augmentation des features...
root 2024-11-15 16:32:22,382: INFO: Getting data for airqualityfeatures from 2019-01-01 00:00:00 to 2023-12-30 00:00:00, at a 1D frequency
root 2024-11-15 16:32:22,395: INFO: Augmentation des features...
root 2024-11-15 16:32:22,424: INFO: Getting data for epidemiologicalfeatures from 2019-01-01 00:00:00 to 2023-12-30 00:00:00, at a 1D frequency
root 2024-11-15 16:32:22,429: INFO: Augmentation des features...
root 2024-11-15 16:32:22,447: INFO: Getting data for googletrendfeatures from 2019-01-01 00:00:00 to 2023-12-30 00:00:00, at a 1D frequency
root 2024-11-15 16:32:22,481: INFO: Augmentation des features...
root 2024-11-15 16:32:22,640: INFO: Getting data for meteorologicalfeatures fr

Dropped columns with zero variance: ['meteo_snow']
            nb_emmergencies  nb_emmergencies%%J-1  nb_emmergencies%%J-2  \
date                                                                      
2019-01-01               50                  50.0                  50.0   
2019-01-02               73                  50.0                  50.0   
2019-01-03               61                  73.0                  50.0   
2019-01-04               67                  61.0                  73.0   
2019-01-05               52                  67.0                  61.0   
...                     ...                   ...                   ...   
2023-12-26               64                  53.0                  49.0   
2023-12-27               64                  64.0                  53.0   
2023-12-28               59                  64.0                  64.0   
2023-12-29               60                  59.0                  64.0   
2023-12-30               64                  60.0

root 2024-11-15 16:32:23,155: INFO: 30 features not encoded (same unit as target)


[ColumnTransformer] .... (3 of 4) Processing pipeline-4, total=   0.0s
[ColumnTransformer] .... (4 of 4) Processing pipeline-5, total=   0.0s
Colonnes contenant des NaN: []
{}


root 2024-11-15 16:32:24,280: INFO: Getting the dataset from 01-01-2019 to 30-12-2023 for HNFC
root 2024-11-15 16:32:24,507: INFO: Getting data for hospitalfeatures from 2019-01-01 00:00:00 to 2023-12-30 00:00:00, at a 1D frequency
root 2024-11-15 16:32:24,514: INFO: Augmentation des features...
root 2024-11-15 16:32:24,521: INFO: Getting data for airqualityfeatures from 2019-01-01 00:00:00 to 2023-12-30 00:00:00, at a 1D frequency
root 2024-11-15 16:32:24,531: INFO: Augmentation des features...
root 2024-11-15 16:32:24,558: INFO: Getting data for epidemiologicalfeatures from 2019-01-01 00:00:00 to 2023-12-30 00:00:00, at a 1D frequency
root 2024-11-15 16:32:24,562: INFO: Augmentation des features...
root 2024-11-15 16:32:24,573: INFO: Getting data for googletrendfeatures from 2019-01-01 00:00:00 to 2023-12-30 00:00:00, at a 1D frequency
root 2024-11-15 16:32:24,600: INFO: Augmentation des features...
root 2024-11-15 16:32:24,760: INFO: Getting data for meteorologicalfeatures from 2019

            nb_emmergencies  nb_emmergencies%%J-1  nb_emmergencies%%J-2  \
date                                                                      
2019-01-01              203                 203.0                 203.0   
2019-01-02              213                 203.0                 203.0   
2019-01-03              228                 213.0                 203.0   
2019-01-04              233                 228.0                 213.0   
2019-01-05              215                 233.0                 228.0   
...                     ...                   ...                   ...   
2023-12-26              314                 254.0                 241.0   
2023-12-27              330                 314.0                 254.0   
2023-12-28              280                 330.0                 314.0   
2023-12-29              282                 280.0                 330.0   
2023-12-30              286                 282.0                 280.0   

            nb_emmergenc

root 2024-11-15 16:32:25,170: INFO: 30 features not encoded (same unit as target)


[ColumnTransformer] .... (1 of 4) Processing pipeline-1, total=   0.0s
[ColumnTransformer] .... (2 of 4) Processing pipeline-3, total=   0.0s
[ColumnTransformer] .... (3 of 4) Processing pipeline-4, total=   0.0s
[ColumnTransformer] .... (4 of 4) Processing pipeline-5, total=   0.0s
Colonnes contenant des NaN: []
{}


root 2024-11-15 16:32:26,240: INFO: Getting the dataset from 01-01-2019 to 30-12-2023 for CHU Besançon
root 2024-11-15 16:32:26,477: INFO: Getting data for hospitalfeatures from 2019-01-01 00:00:00 to 2023-12-30 00:00:00, at a 1D frequency
root 2024-11-15 16:32:26,484: INFO: Augmentation des features...
root 2024-11-15 16:32:26,492: INFO: Getting data for airqualityfeatures from 2019-01-01 00:00:00 to 2023-12-30 00:00:00, at a 1D frequency
root 2024-11-15 16:32:26,525: INFO: Augmentation des features...
root 2024-11-15 16:32:26,576: INFO: Getting data for epidemiologicalfeatures from 2019-01-01 00:00:00 to 2023-12-30 00:00:00, at a 1D frequency
root 2024-11-15 16:32:26,582: INFO: Augmentation des features...
root 2024-11-15 16:32:26,593: INFO: Getting data for googletrendfeatures from 2019-01-01 00:00:00 to 2023-12-30 00:00:00, at a 1D frequency
root 2024-11-15 16:32:26,621: INFO: Augmentation des features...
root 2024-11-15 16:32:26,781: INFO: Getting data for meteorologicalfeatures f

Dropped columns with zero variance: ['meteo_snow']
            nb_emmergencies  nb_emmergencies%%J-1  nb_emmergencies%%J-2  \
date                                                                      
2019-01-01              186                 186.0                 186.0   
2019-01-02              228                 186.0                 186.0   
2019-01-03              183                 228.0                 186.0   
2019-01-04              212                 183.0                 228.0   
2019-01-05              188                 212.0                 183.0   
...                     ...                   ...                   ...   
2023-12-26              224                 152.0                 174.0   
2023-12-27              181                 224.0                 152.0   
2023-12-28              201                 181.0                 224.0   
2023-12-29              218                 201.0                 181.0   
2023-12-30              210                 218.0

root 2024-11-15 16:32:27,196: INFO: 30 features not encoded (same unit as target)


Dropped 40 constant columns from both sets: {'PM10_FR26010', 'PM25_FR26094', 'PM10_FR26094', 'NO2_FR26094', 'NO2_FR26010'}
X shape: (1162, 745), y shape: (1162, 1)
[ColumnTransformer] .... (1 of 4) Processing pipeline-1, total=   0.0s
[ColumnTransformer] .... (2 of 4) Processing pipeline-3, total=   0.0s
[ColumnTransformer] .... (3 of 4) Processing pipeline-4, total=   0.0s
[ColumnTransformer] .... (4 of 4) Processing pipeline-5, total=   0.0s
Colonnes contenant des NaN: []
{}
