# 01 - Creation du dataset des données brutes de metrics fractionnées

Ce notebook génère :

- 1 fichier csv "raw_merge_metrics_dataset.csv" qui fusionne les colonnes fractionnées avec le dataset d'origine
- 1 fichier json "metrics_events_dict.json" pour lister les code d'identification des évènements

**Etapes de création :**

- Pour chaque colonne contenant des valeurs de type list ou dict

    - Fractionnement des colonnes en dataframe

    - Fusion des dataframes issus de la ou des fraction(s)

- Fusion des colonnes fractionnées avec les colonnes non fratcionnées du dataset de départ

## Imports

In [1]:
import sys
print(sys.path)


['c:\\Users\\Allan\\Documents\\MGI\\predict-ia-notebook\\JetVarnish3DEvo\\2024_data\\Machine_14\\notebooks', 'C:\\Users\\Allan\\AppData\\Local\\Programs\\Python\\Python310\\python310.zip', 'C:\\Users\\Allan\\AppData\\Local\\Programs\\Python\\Python310\\DLLs', 'C:\\Users\\Allan\\AppData\\Local\\Programs\\Python\\Python310\\lib', 'C:\\Users\\Allan\\AppData\\Local\\Programs\\Python\\Python310', 'c:\\Users\\Allan\\Documents\\MGI\\predict-ia-notebook\\venv', '', 'c:\\Users\\Allan\\Documents\\MGI\\predict-ia-notebook\\venv\\lib\\site-packages', 'c:\\Users\\Allan\\Documents\\MGI\\predict-ia-notebook\\venv\\lib\\site-packages\\win32', 'c:\\Users\\Allan\\Documents\\MGI\\predict-ia-notebook\\venv\\lib\\site-packages\\win32\\lib', 'c:\\Users\\Allan\\Documents\\MGI\\predict-ia-notebook\\venv\\lib\\site-packages\\Pythonwin']


In [2]:
print(sys.executable)

c:\Users\Allan\Documents\MGI\predict-ia-notebook\venv\Scripts\python.exe


In [3]:
import os, json, ast
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from datetime import datetime
from pathlib import Path

## 1. Création dataset metrics

In [4]:
# source path to raw metrics dataset
filename = 'metrics.csv'
path = '../data/raw/'
source_csv = os.path.join(path, filename)
# target path to save metrics dictionnaire
save_json ='../data/metrics/metrics_events_dict.json'
# target path to save merge raw metrics dataset
save_csv = '../data/metrics/raw_merge_metrics_dataset.csv'

### a) Import des données brutes

In [5]:
# # téléchargement dans le repertoire 'data' d'un fichiers 'csv' depuis le blob Azure
# from azure_blob import download_blob_file
# download_blob_file(file_name=filename, local_path=path)

In [6]:
# création d'un dataframe à partir du csv de données
metrics_df = pd.read_csv(filepath_or_buffer=Path(source_csv)).sort_values(by='created_at')
metrics_df.reset_index(level=None, drop=True, inplace=True, col_level=0, col_fill='')
metrics_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 59372 entries, 0 to 59371
Data columns (total 21 columns):
 #   Column                      Non-Null Count  Dtype  
---  ------                      --------------  -----  
 0   id                          59372 non-null  int64  
 1   status                      59372 non-null  object 
 2   created_at                  59372 non-null  object 
 3   cyan_capacity               0 non-null      float64
 4   cyan_remaining              0 non-null      float64
 5   magenta_capacity            0 non-null      float64
 6   magenta_remaining           0 non-null      float64
 7   yellow_capacity             0 non-null      float64
 8   yellow_remaining            0 non-null      float64
 9   black_capacity              0 non-null      float64
 10  black_remaining             0 non-null      float64
 11  machineId                   59372 non-null  int64  
 12  connected_operators         59372 non-null  object 
 13  varnishLevelsTargetvolume   593

In [7]:
# suppression des colonnes ne contenant aucune valeurs
metrics_df = metrics_df.dropna(axis=1)
# suppression de la colonne machineId
metrics_df = metrics_df.drop('machineId', axis=1)
# visualisation des 3 premières lignes
metrics_df.head(3)

Unnamed: 0,id,status,created_at,connected_operators,varnishLevelsTargetvolume,varnishLevelsTotalvolume,modules,events
0,26377486,WARNING,2024-01-02 13:39:57.321000,"[{""name"": ""JAN"", ""level"": ""Operator""}]",40490.237879,100000,"[{""sn"": """", ""name"": ""Print Engine 1"", ""type"": ...",[]
1,26377496,WARNING,2024-01-02 13:40:07.223000,"[{""name"": ""JAN"", ""level"": ""Operator""}]",40490.237879,100000,"[{""sn"": """", ""name"": ""Print Engine 1"", ""type"": ...","[{""source"": ""PLC"", ""message"": "" JV-Ti non prêt..."
2,26377507,WARNING,2024-01-02 13:40:17.284000,"[{""name"": ""JAN"", ""level"": ""Operator""}]",40490.237879,100000,"[{""sn"": """", ""name"": ""Print Engine 1"", ""type"": ...",[]


### b) Fractionnement des colonnes contenant des listes

In [8]:
# on verifie le type des valeurs contenu dans les colonnes de type objet contenant des listes
print('modules :', type(metrics_df.modules.loc[0]))
print('events :', type(metrics_df.events.loc[0]))
print('connected_operators :', type(metrics_df.connected_operators.loc[0]))

modules : <class 'str'>
events : <class 'str'>
connected_operators : <class 'str'>


In [9]:
# fonction retournant le dataframe d'une colonne fractionnée
# col=colonne à fractionner
# df=dataframe source
# data=dict des colonnes du df à conserver dans le df à retourner
def convert_col_to_df(col, df, data=None):
    
    # création du dictionnaire de données vide
    if data == None :
        data = {}
    # ou liste des clés du dictionnaire input
    else :
        data_keys = list(data.keys())

    # on converti le type des valeurs str en list
    if not isinstance(df[col].loc[0], list):
        df[col] = df[col].apply(lambda x : json.loads(x))

    # liste des clés du dictionnaire de la colonne à partir de la première occurence
    # on recherche la première occurence non vide et de type list 
    # pour l'affecter à une variable first
    for i in range(0, (len(df[col]))):
        value = df[col].loc[i]
        if len(value) > 0 and isinstance(value, list):
            first = value[0]
            print('first : ', type(first), first)
            break

    # on liste les clés du dictionnaire de l'occurence
    col_keys = first.keys()
    for ck in col_keys :
        data[ck+'_'+col] = []

    # on itére dans la serie pour récupérer les valeurs et les stocker dans le dictionnaire data
    for i in range(df.index.start, df.index.stop):
        # evaluation des valeurs 'str' en 'list'
        values = df[col].loc[i]
        if isinstance(values, list) and len(values) > 0 :
            # ajout des valeurs dans le dictionnaire 'd'
            for value in values :
                for k in value.keys():
                    data[k+'_'+col].append(value.get(k))
                for dk in data_keys:
                    data[dk].append(df[dk].loc[i])

    # re-assignation de la variable df
    df = pd.DataFrame(data)

    return df

In [10]:
# id temoin pour vérifier le fractionnement et la fusion des colonnes
check_id = 26377588

In [11]:
# # Initialize an empty list to collect all the 'counters' values
# counters_values = []

# # Define a function to extract 'counters' values from the 'modules' column
# def extract_counters_value(row):
#     try:
#         # Convert the JSON-like string to a Python object (list of dictionaries in this case)
#         modules_list = json.loads(row.replace("'", "\""))
        
#         # Loop through the list of dictionaries to find the 'counters' key-value pair
#         for module in modules_list:
#             if 'counters' in module:
#                 for counter in module['counters']:
#                     counters_values.append(counter['name'])
#     except:
#         # Handle any exceptions that may occur during JSON conversion or key access
#         pass

# # Apply the function to each row in the 'modules' column
# metrics_df['modules'].apply(extract_counters_value)

# # Find unique 'counters' values
# unique_counters_values = set(counters_values)

# # Display the unique 'counters' values and their count
# unique_counters_values, len(unique_counters_values)

In [12]:
check_line = metrics_df[metrics_df.id == check_id]

In [13]:
# Trouver la valeur maximale de la colonne 'id'
max_id = metrics_df['id'].max()
print("Valeur maximale de l'id:", max_id)

# Trouver la valeur minimale de la colonne 'id'
min_id = metrics_df['id'].min()
print("Valeur minimale de l'id:", min_id)


Valeur maximale de l'id: 27252818
Valeur minimale de l'id: 26377486


In [14]:
# détail de la ligne témoin
check_line = metrics_df[metrics_df.id == check_id]
print('######## id %d ######## ' %check_id)
print(check_line.values)
print('######## id %d modules details ######## ' %check_id)
print(json.loads(check_line.modules.values[0])[0].get('counters'))
print(json.loads(check_line.modules.values[0])[1].get('counters'))
print('######## id %d events details ######## ' %check_id)
print(json.loads(check_line.events.values[0])[0])
print(json.loads(check_line.events.values[0])[1])

######## id 26377588 ######## 
[[26377588 'IDLE' '2024-01-02 13:41:37.441000'
  '[{"name": "JAN", "level": "Operator"}]' 40490.23787903 100000
  '[{"sn": "", "name": "Print Engine 1", "type": "Varnish Printer", "counters": [{"name": "3D Varnish Counter", "value": 36042}], "generation": ""}, {"sn": "", "name": "iFoil L", "type": "iFoil", "counters": [{"name": "Total Pages Counter", "value": 52108}, {"name": "Foiled Pages Counter", "value": 132061}], "generation": "Gen. 2"}]'
  '[{"source": "PLC", "message": " Portes margeur ouvertes E-0417", "timestamp": "2024-01-02T13:41:28.848Z", "criticality": "ERROR", "identification": "417"}, {"source": "PLC", "message": " Plateau de têtes en mouvement", "timestamp": "2024-01-02T13:41:31.674Z", "criticality": "INFO", "identification": "333"}]']]
######## id 26377588 modules details ######## 
[{'name': '3D Varnish Counter', 'value': 36042}]
[{'name': 'Total Pages Counter', 'value': 52108}, {'name': 'Foiled Pages Counter', 'value': 132061}]
######## 

#### 1) Colonne 'connected_operators'

In [15]:
# creation d'un dataframe connected_operators (~42s)
connected_operators_df = convert_col_to_df('connected_operators', metrics_df, {'id':[]})
print(connected_operators_df.info())
connected_operators_df.head(2)

first :  <class 'dict'> {'name': 'JAN', 'level': 'Operator'}
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 59372 entries, 0 to 59371
Data columns (total 3 columns):
 #   Column                     Non-Null Count  Dtype 
---  ------                     --------------  ----- 
 0   id                         59372 non-null  int64 
 1   name_connected_operators   59372 non-null  object
 2   level_connected_operators  59372 non-null  object
dtypes: int64(1), object(2)
memory usage: 1.4+ MB
None


Unnamed: 0,id,name_connected_operators,level_connected_operators
0,26377486,JAN,Operator
1,26377496,JAN,Operator


#### 2) Colonne 'events'

In [16]:
# creation d'un dataframe events (~20s)
events_df = convert_col_to_df('events', metrics_df, {'id':[]})
print(events_df.info())
events_df.head(2)

first :  <class 'dict'> {'source': 'PLC', 'message': ' JV-Ti non prêt : impression impossible', 'timestamp': '2024-01-02T13:39:58.986Z', 'criticality': 'INFO', 'identification': '391'}
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3479 entries, 0 to 3478
Data columns (total 6 columns):
 #   Column                 Non-Null Count  Dtype 
---  ------                 --------------  ----- 
 0   id                     3479 non-null   int64 
 1   source_events          3479 non-null   object
 2   message_events         3479 non-null   object
 3   timestamp_events       3479 non-null   object
 4   criticality_events     3479 non-null   object
 5   identification_events  3479 non-null   object
dtypes: int64(1), object(5)
memory usage: 163.2+ KB
None


Unnamed: 0,id,source_events,message_events,timestamp_events,criticality_events,identification_events
0,26377496,PLC,JV-Ti non prêt : impression impossible,2024-01-02T13:39:58.986Z,INFO,391
1,26377496,iFoil,JV-Ti non prêt : impression impossible,2024-01-02T13:39:59.075Z,INFO,391


##### Identification

In [17]:
# liste des codes d'identification
identification_codes_list = events_df['identification_events'].unique()
np.sort(identification_codes_list)

array(['0', '311', '313', '315', '321', '323', '324', '330', '331', '332',
       '333', '334', '343', '344', '345', '352', '354', '355', '356',
       '357', '358', '371', '372', '373', '374', '376', '377', '386',
       '387', '391', '405', '406', '407', '408', '417', '440', '445',
       '453', '454', '460', '470', '472', '475', '480', 'Kernel_Error',
       'RCB communication error'], dtype=object)

In [18]:
# liste des évènements uniques
identification_dict = {}
c = 1
id_list = []
for i in range(events_df.index.start, events_df.index.stop):
    id = events_df.identification_events.loc[i]
    if id not in id_list:
        id_list.append(id)
        identification_dict[id] = events_df.message_events.loc[i]
        c += 1
identification_dict

{'391': ' JV-Ti non prêt : impression impossible',
 '333': ' Plateau de têtes en mouvement',
 '330': ' En attente',
 '334': ' Préchauffage',
 '417': ' Portes margeur ouvertes E-0417',
 '387': ' Erreur positionnement plateau E-0387',
 '311': ' Attente initialisation',
 '354': ' Bourrage : module impression E-0354',
 '315': ' Support lingette non installé',
 '406': ' Purge en cours',
 '440': ' Défaut remplissage réservoirs E-0440',
 '407': ' Essuyage en cours',
 '332': ' Disponible',
 '331': ' Impression en cours',
 '377': ' Chargeur: mode auto non activé',
 '344': ' Manque papier',
 '321': " Arrêt d'urgence impression E-0321",
 '480': " Arrêt d'urgence stacker E-0480",
 '386': ' Erreur four UV E-0386',
 '372': ' Attente démarrage UV',
 '371': ' Démarrrage UV en cours',
 '0': '',
 '374': ' Défaut LED UV E-0374',
 'Kernel_Error': 'AIS, There might be a communication problem with the following cameras : 1 2 3 4\n',
 '376': ' Réception: mode auto non activé',
 '470': ' Défaut communication 

##### Source

In [19]:
# liste des sources
source_list = events_df['source_events'].unique()
np.sort(source_list)

array(['Kernel', 'PLC', 'RCB n°1', 'RCB n°2', 'RCB n°3', 'iFoil'],
      dtype=object)

##### Criticality

In [20]:
# liste des sources
criticality_list = events_df['criticality_events'].unique()
np.sort(criticality_list)



##### Events Json dict

In [21]:
# on sauvegarde la liste des codes d'identification d'event
with open(file=Path(save_json), mode="r+", encoding='utf-8') as jsonFile:
    try :
        # chargement des données du fichier dans un dictionnaire
        data = json.load(jsonFile)
        # ajout des données dans le dictionnaire
        data['identification'] = identification_dict
        data['criticality'] = list(np.sort(criticality_list)),
        data['source'] = list(np.sort(source_list))
        # définit la position actuelle du fichier à l'offset
        jsonFile.seek(0)
        # écriture du dicitonnaire dans le fichier
        json.dump(data, jsonFile, indent=4, ensure_ascii=False)
    except ValueError as e:
        print(e)
    finally :
        # fermeture du fichier
        jsonFile.close()

#### 3) Colonne 'modules'

In [22]:
# creation d'un dataframe modules (~1m15s)
modules_df = convert_col_to_df('modules', metrics_df, {'id':[]})
# visualisation des données
print(modules_df.info())
modules_df.head(2)

first :  <class 'dict'> {'sn': '', 'name': 'Print Engine 1', 'type': 'Varnish Printer', 'counters': [{'name': '3D Varnish Counter', 'value': 36042}], 'generation': ''}
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 118744 entries, 0 to 118743
Data columns (total 6 columns):
 #   Column              Non-Null Count   Dtype 
---  ------              --------------   ----- 
 0   id                  118744 non-null  int64 
 1   sn_modules          118744 non-null  object
 2   name_modules        118744 non-null  object
 3   type_modules        118744 non-null  object
 4   counters_modules    118744 non-null  object
 5   generation_modules  118744 non-null  object
dtypes: int64(1), object(5)
memory usage: 5.4+ MB
None


Unnamed: 0,id,sn_modules,name_modules,type_modules,counters_modules,generation_modules
0,26377486,,Print Engine 1,Varnish Printer,"[{'name': '3D Varnish Counter', 'value': 36042}]",
1,26377486,,iFoil L,iFoil,"[{'name': 'Total Pages Counter', 'value': 5210...",Gen. 2


##### Colonne counters

In [23]:
# creation d'un dataframe counters (~2m7s)
counters_df = convert_col_to_df('counters_modules', modules_df, {'type_modules':[], 'id': []})
# visualisation des données
print(counters_df.info())
counters_df.head(2)

first :  <class 'dict'> {'name': '3D Varnish Counter', 'value': 36042}
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 178116 entries, 0 to 178115
Data columns (total 4 columns):
 #   Column                  Non-Null Count   Dtype 
---  ------                  --------------   ----- 
 0   type_modules            178116 non-null  object
 1   id                      178116 non-null  int64 
 2   name_counters_modules   178116 non-null  object
 3   value_counters_modules  178116 non-null  int64 
dtypes: int64(2), object(2)
memory usage: 5.4+ MB
None


Unnamed: 0,type_modules,id,name_counters_modules,value_counters_modules
0,Varnish Printer,26377486,3D Varnish Counter,36042
1,iFoil,26377486,Total Pages Counter,52108


In [24]:
# verification de l'intégrité des données
counters_check_line = counters_df[counters_df.id == check_id]
counters_check_line

Unnamed: 0,type_modules,id,name_counters_modules,value_counters_modules
30,Varnish Printer,26377588,3D Varnish Counter,36042
31,iFoil,26377588,Total Pages Counter,52108
32,iFoil,26377588,Foiled Pages Counter,132061


### c) Fusion des dataframes des colonnes fractionnées

#### 1) Merge modules et counters

In [25]:
# fusion du df modue et du df counter
merge_modules_df = pd.merge(modules_df, counters_df, on=['id','type_modules'])
# suppression de la colonne fractionnées
merge_modules_df = merge_modules_df.drop(['counters_modules'], axis=1)
# verification de l'intégrité des données
module_check_line = merge_modules_df[merge_modules_df.id == check_id]
module_check_line

Unnamed: 0,id,sn_modules,name_modules,type_modules,generation_modules,name_counters_modules,value_counters_modules
30,26377588,,Print Engine 1,Varnish Printer,,3D Varnish Counter,36042
31,26377588,,iFoil L,iFoil,Gen. 2,Total Pages Counter,52108
32,26377588,,iFoil L,iFoil,Gen. 2,Foiled Pages Counter,132061


In [26]:
merge_modules_df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 178116 entries, 0 to 178115
Data columns (total 7 columns):
 #   Column                  Non-Null Count   Dtype 
---  ------                  --------------   ----- 
 0   id                      178116 non-null  int64 
 1   sn_modules              178116 non-null  object
 2   name_modules            178116 non-null  object
 3   type_modules            178116 non-null  object
 4   generation_modules      178116 non-null  object
 5   name_counters_modules   178116 non-null  object
 6   value_counters_modules  178116 non-null  int64 
dtypes: int64(2), object(5)
memory usage: 10.9+ MB


#### 2) Merge operators

In [27]:
# on merge avec le df operators en fonction de l'id de message
merge_operators_df = pd.merge(merge_modules_df, connected_operators_df, on='id', suffixes=['','_op'])
# verification de l'intégrité des données
op_check_line = merge_operators_df[merge_operators_df.id == check_id]
op_check_line

Unnamed: 0,id,sn_modules,name_modules,type_modules,generation_modules,name_counters_modules,value_counters_modules,name_connected_operators,level_connected_operators
30,26377588,,Print Engine 1,Varnish Printer,,3D Varnish Counter,36042,JAN,Operator
31,26377588,,iFoil L,iFoil,Gen. 2,Total Pages Counter,52108,JAN,Operator
32,26377588,,iFoil L,iFoil,Gen. 2,Foiled Pages Counter,132061,JAN,Operator


#### 3) Merge events

In [28]:
# on crée un df, à partir du df events, ne contenant que les lignes ayant un évènement de source Ifoil
events_ifoil = events_df[events_df.source_events == 'iFoil']
# on crée un df, à partir du df merge, ne contenant que les lignes ayant un module de type Ifoil
module_ifoil = merge_operators_df[merge_operators_df.type_modules == 'iFoil']
# on merge les deux df des lignes Ifoil en focntion de l'id de message
merge_ifoil_df = pd.merge(events_ifoil, module_ifoil, how='outer', on='id', suffixes=['_event','_module'])
# verification de l'intégrité des données
events_check_line = merge_ifoil_df[merge_ifoil_df.id == check_id]
events_check_line

Unnamed: 0,id,source_events,message_events,timestamp_events,criticality_events,identification_events,sn_modules,name_modules,type_modules,generation_modules,name_counters_modules,value_counters_modules,name_connected_operators,level_connected_operators
810,26377588,,,,,,,iFoil L,iFoil,Gen. 2,Total Pages Counter,52108,JAN,Operator
811,26377588,,,,,,,iFoil L,iFoil,Gen. 2,Foiled Pages Counter,132061,JAN,Operator


Comme il y avait 2 counters ('Total Pages Counter' et 'Foiled Pages Counter') pour le module de type 'Ifoil', nous avons bien 2 lignes.

In [29]:
# on crée un df, à partir du df events, ne contenant que les lignes ayant un évènement de source Ifoil
events_no_ifoil = events_df[events_df.source_events != 'iFoil']
# on crée un df, à partir du df merge, ne contenant que les lignes ayant un module de type Ifoil
module_no_ifoil = merge_operators_df[merge_operators_df.type_modules != 'iFoil']
# on merge les deux df des lignes Ifoil en focntion de l'id de message
merge_no_ifoil_df = pd.merge(events_no_ifoil, module_no_ifoil, how='outer', on='id', suffixes=['_event','_module'])
# verification de l'intégrité des données
events_check_line = merge_no_ifoil_df[merge_no_ifoil_df.id == check_id]
events_check_line

Unnamed: 0,id,source_events,message_events,timestamp_events,criticality_events,identification_events,sn_modules,name_modules,type_modules,generation_modules,name_counters_modules,value_counters_modules,name_connected_operators,level_connected_operators
5,26377588,PLC,Portes margeur ouvertes E-0417,2024-01-02T13:41:28.848Z,ERROR,417,,Print Engine 1,Varnish Printer,,3D Varnish Counter,36042,JAN,Operator
6,26377588,PLC,Plateau de têtes en mouvement,2024-01-02T13:41:31.674Z,INFO,333,,Print Engine 1,Varnish Printer,,3D Varnish Counter,36042,JAN,Operator


##### Concaténation

In [30]:
# on concatene les df ifoi et no_ifoil pour ne perdre aucune valeur
concat_events_df = pd.concat([merge_ifoil_df, merge_no_ifoil_df])
# verification de l'intégrité des données
events_check_line = concat_events_df[concat_events_df.id == check_id]
events_check_line

Unnamed: 0,id,source_events,message_events,timestamp_events,criticality_events,identification_events,sn_modules,name_modules,type_modules,generation_modules,name_counters_modules,value_counters_modules,name_connected_operators,level_connected_operators
810,26377588,,,,,,,iFoil L,iFoil,Gen. 2,Total Pages Counter,52108,JAN,Operator
811,26377588,,,,,,,iFoil L,iFoil,Gen. 2,Foiled Pages Counter,132061,JAN,Operator
5,26377588,PLC,Portes margeur ouvertes E-0417,2024-01-02T13:41:28.848Z,ERROR,417.0,,Print Engine 1,Varnish Printer,,3D Varnish Counter,36042,JAN,Operator
6,26377588,PLC,Plateau de têtes en mouvement,2024-01-02T13:41:31.674Z,INFO,333.0,,Print Engine 1,Varnish Printer,,3D Varnish Counter,36042,JAN,Operator


In [31]:
concat_events_df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 178761 entries, 0 to 59860
Data columns (total 14 columns):
 #   Column                     Non-Null Count   Dtype 
---  ------                     --------------   ----- 
 0   id                         178761 non-null  int64 
 1   source_events              3876 non-null    object
 2   message_events             3876 non-null    object
 3   timestamp_events           3876 non-null    object
 4   criticality_events         3876 non-null    object
 5   identification_events      3876 non-null    object
 6   sn_modules                 178761 non-null  object
 7   name_modules               178761 non-null  object
 8   type_modules               178761 non-null  object
 9   generation_modules         178761 non-null  object
 10  name_counters_modules      178761 non-null  object
 11  value_counters_modules     178761 non-null  int64 
 12  name_connected_operators   178761 non-null  object
 13  level_connected_operators  178761 non-null  o

#### 4) Merge metrics

In [32]:
# dernier merge de toutes les colonnes
merge_metrics_df = pd.merge(concat_events_df, metrics_df, how='outer', on='id', suffixes=['','_metrics'])
# suppression des colonnes fractionnées
merge_metrics_df = merge_metrics_df.drop(['connected_operators','modules','events'], axis=1)
# verification de l'intégrité des données
metrics_check_line = merge_metrics_df[merge_metrics_df.id == check_id]
metrics_check_line

Unnamed: 0,id,source_events,message_events,timestamp_events,criticality_events,identification_events,sn_modules,name_modules,type_modules,generation_modules,name_counters_modules,value_counters_modules,name_connected_operators,level_connected_operators,status,created_at,varnishLevelsTargetvolume,varnishLevelsTotalvolume
1156,26377588,,,,,,,iFoil L,iFoil,Gen. 2,Total Pages Counter,52108,JAN,Operator,IDLE,2024-01-02 13:41:37.441000,40490.237879,100000
1157,26377588,,,,,,,iFoil L,iFoil,Gen. 2,Foiled Pages Counter,132061,JAN,Operator,IDLE,2024-01-02 13:41:37.441000,40490.237879,100000
1158,26377588,PLC,Portes margeur ouvertes E-0417,2024-01-02T13:41:28.848Z,ERROR,417.0,,Print Engine 1,Varnish Printer,,3D Varnish Counter,36042,JAN,Operator,IDLE,2024-01-02 13:41:37.441000,40490.237879,100000
1159,26377588,PLC,Plateau de têtes en mouvement,2024-01-02T13:41:31.674Z,INFO,333.0,,Print Engine 1,Varnish Printer,,3D Varnish Counter,36042,JAN,Operator,IDLE,2024-01-02 13:41:37.441000,40490.237879,100000


In [33]:
merge_metrics_df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 178761 entries, 0 to 178760
Data columns (total 18 columns):
 #   Column                     Non-Null Count   Dtype  
---  ------                     --------------   -----  
 0   id                         178761 non-null  int64  
 1   source_events              3876 non-null    object 
 2   message_events             3876 non-null    object 
 3   timestamp_events           3876 non-null    object 
 4   criticality_events         3876 non-null    object 
 5   identification_events      3876 non-null    object 
 6   sn_modules                 178761 non-null  object 
 7   name_modules               178761 non-null  object 
 8   type_modules               178761 non-null  object 
 9   generation_modules         178761 non-null  object 
 10  name_counters_modules      178761 non-null  object 
 11  value_counters_modules     178761 non-null  int64  
 12  name_connected_operators   178761 non-null  object 
 13  level_connected_operators  17

## 2. Outout csv

In [34]:
merge_metrics_df.to_csv(path_or_buf=Path(save_csv))