# 01 - Creation du dataset des données brutes de metrics fractionnées

Ce notebook génère :

- 1 fichier csv "raw_merge_metrics_dataset.csv" qui fusionne les colonnes fractionnées avec le dataset d'origine
- 1 fichier json "metrics_events_dict.json" pour lister les code d'identification des évènements

**Etapes de création :**

- Pour chaque colonne contenant des valeurs de type list ou dict

    - Fractionnement des colonnes en dataframe

    - Fusion des dataframes issus de la ou des fraction(s)

- Fusion des colonnes fractionnées avec les colonnes non fratcionnées du dataset de départ

## Imports

In [1]:
import os, json, ast
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from datetime import datetime
from pathlib import Path

## 1. Création dataset metrics

In [2]:
# source path to raw metrics dataset
filename = 'metrics.csv'
path = '../data/raw/'
source_csv = os.path.join(path, filename)
# target path to save metrics dictionnaire
save_json ='../data/metrics/metrics_events_dict.json'
# target path to save merge raw metrics dataset
save_csv = '../data/metrics/raw_merge_metrics_dataset.csv'

### a) Import des données brutes

In [3]:
# # téléchargement dans le repertoire 'data' d'un fichiers 'csv' depuis le blob Azure
# from azure_blob import download_blob_file
# download_blob_file(file_name=filename, local_path=path)

In [4]:
# création d'un dataframe à partir du csv de données
metrics_df = pd.read_csv(filepath_or_buffer=Path(source_csv)).sort_values(by='created_at')
metrics_df.reset_index(level=None, drop=True, inplace=True, col_level=0, col_fill='')
metrics_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 251840 entries, 0 to 251839
Data columns (total 21 columns):
 #   Column                      Non-Null Count   Dtype  
---  ------                      --------------   -----  
 0   id                          251840 non-null  int64  
 1   status                      251840 non-null  object 
 2   created_at                  251840 non-null  object 
 3   cyan_capacity               0 non-null       float64
 4   cyan_remaining              0 non-null       float64
 5   magenta_capacity            0 non-null       float64
 6   magenta_remaining           0 non-null       float64
 7   yellow_capacity             0 non-null       float64
 8   yellow_remaining            0 non-null       float64
 9   black_capacity              0 non-null       float64
 10  black_remaining             0 non-null       float64
 11  machineId                   251840 non-null  int64  
 12  connected_operators         251840 non-null  object 
 13  varnishLevelsT

In [5]:
# suppression des colonnes ne contenant aucune valeurs
metrics_df = metrics_df.dropna(axis=1)
# suppression de la colonne machineId
metrics_df = metrics_df.drop('machineId', axis=1)
# visualisation des 3 premières lignes
metrics_df.head(3)

Unnamed: 0,id,status,created_at,connected_operators,varnishLevelsTargetvolume,varnishLevelsTotalvolume,modules,events
0,9698765,WARNING,2022-11-03 08:43:13.960000,"[{""name"": ""User"", ""level"": ""Operator""}]",1386.302305,18000,"[{""sn"": """", ""name"": ""Print Engine 1"", ""type"": ...",[]
1,9698772,WARNING,2022-11-03 08:43:23.785000,"[{""name"": ""User"", ""level"": ""Operator""}]",1386.302305,18000,"[{""sn"": """", ""name"": ""Print Engine 1"", ""type"": ...","[{""source"": ""PLC"", ""message"": "" Essuyage en co..."
2,9698779,WARNING,2022-11-03 08:43:33.802000,"[{""name"": ""User"", ""level"": ""Operator""}]",1386.302305,18000,"[{""sn"": """", ""name"": ""Print Engine 1"", ""type"": ...",[]


### b) Fractionnement des colonnes contenant des listes

In [6]:
# on verifie le type des valeurs contenu dans les colonnes de type objet contenant des listes
print('modules :', type(metrics_df.modules.loc[0]))
print('events :', type(metrics_df.events.loc[0]))
print('connected_operators :', type(metrics_df.connected_operators.loc[0]))

modules : <class 'str'>
events : <class 'str'>
connected_operators : <class 'str'>


In [7]:
# fonction retournant le dataframe d'une colonne fractionnée
# col=colonne à fractionner
# df=dataframe source
# data=dict des colonnes du df à conserver dans le df à retourner
def convert_col_to_df(col, df, data=None):
    
    # création du dictionnaire de données vide
    if data == None :
        data = {}
    # ou liste des clés du dictionnaire input
    else :
        data_keys = list(data.keys())

    # on converti le type des valeurs str en list
    if not isinstance(df[col].loc[0], list):
        df[col] = df[col].apply(lambda x : json.loads(x))

    # liste des clés du dictionnaire de la colonne à partir de la première occurence
    # on recherche la première occurence non vide et de type list 
    # pour l'affecter à une variable first
    for i in range(0, (len(df[col]))):
        value = df[col].loc[i]
        if len(value) > 0 and isinstance(value, list):
            first = value[0]
            print('first : ', type(first), first)
            break

    # on liste les clés du dictionnaire de l'occurence
    col_keys = first.keys()
    for ck in col_keys :
        data[ck+'_'+col] = []

    # on itére dans la serie pour récupérer les valeurs et les stocker dans le dictionnaire data
    for i in range(df.index.start, df.index.stop):
        # evaluation des valeurs 'str' en 'list'
        values = df[col].loc[i]
        if isinstance(values, list) and len(values) > 0 :
            # ajout des valeurs dans le dictionnaire 'd'
            for value in values :
                for k in value.keys():
                    data[k+'_'+col].append(value.get(k))
                for dk in data_keys:
                    data[dk].append(df[dk].loc[i])

    # re-assignation de la variable df
    df = pd.DataFrame(data)

    return df

In [8]:
# id temoin pour vérifier le fractionnement et la fusion des colonnes
check_id = 9698772

In [9]:
# # Initialize an empty list to collect all the 'counters' values
# counters_values = []

# # Define a function to extract 'counters' values from the 'modules' column
# def extract_counters_value(row):
#     try:
#         # Convert the JSON-like string to a Python object (list of dictionaries in this case)
#         modules_list = json.loads(row.replace("'", "\""))
        
#         # Loop through the list of dictionaries to find the 'counters' key-value pair
#         for module in modules_list:
#             if 'counters' in module:
#                 for counter in module['counters']:
#                     counters_values.append(counter['name'])
#     except:
#         # Handle any exceptions that may occur during JSON conversion or key access
#         pass

# # Apply the function to each row in the 'modules' column
# metrics_df['modules'].apply(extract_counters_value)

# # Find unique 'counters' values
# unique_counters_values = set(counters_values)

# # Display the unique 'counters' values and their count
# unique_counters_values, len(unique_counters_values)

In [10]:
check_line = metrics_df[metrics_df.id == check_id]

In [11]:
# détail de la ligne témoin
check_line = metrics_df[metrics_df.id == check_id]
print('######## id %d ######## ' %check_id)
print(check_line.values)
print('######## id %d modules details ######## ' %check_id)
print(json.loads(check_line.modules.values[0])[0].get('counters'))
print(json.loads(check_line.modules.values[0])[1].get('counters'))
print('######## id %d events details ######## ' %check_id)
print(json.loads(check_line.events.values[0])[0])
print(json.loads(check_line.events.values[0])[1])

######## id 9698772 ######## 
  '[{"name": "User", "level": "Operator"}]' 1386.3023053696015 18000
  '[{"sn": "", "name": "Print Engine 1", "type": "Varnish Printer", "counters": [{"name": "3D Varnish Counter", "value": 308536}], "generation": ""}, {"sn": "", "name": "iFoil L", "type": "iFoil", "counters": [{"name": "Total Pages Counter", "value": 31185}, {"name": "Foiled Pages Counter", "value": 79566}], "generation": "Gen. 2"}]'
######## id 9698772 modules details ######## 
[{'name': '3D Varnish Counter', 'value': 308536}]
[{'name': 'Total Pages Counter', 'value': 31185}, {'name': 'Foiled Pages Counter', 'value': 79566}]
######## id 9698772 events details ######## 
{'source': 'iFoil', 'message': ' Essuyage en cours', 'timestamp': '2022-11-03T08:43:16.808Z', 'criticality': 'INFO', 'identification': '407'}


#### 1) Colonne 'connected_operators'

In [12]:
# creation d'un dataframe connected_operators (~42s)
connected_operators_df = convert_col_to_df('connected_operators', metrics_df, {'id':[]})
print(connected_operators_df.info())
connected_operators_df.head(2)

first :  <class 'dict'> {'name': 'User', 'level': 'Operator'}
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 251840 entries, 0 to 251839
Data columns (total 3 columns):
 #   Column                     Non-Null Count   Dtype 
---  ------                     --------------   ----- 
 0   id                         251840 non-null  int64 
 1   name_connected_operators   251840 non-null  object
 2   level_connected_operators  251840 non-null  object
dtypes: int64(1), object(2)
memory usage: 5.8+ MB
None


Unnamed: 0,id,name_connected_operators,level_connected_operators
0,9698765,User,Operator
1,9698772,User,Operator


#### 2) Colonne 'events'

In [13]:
# creation d'un dataframe events (~20s)
events_df = convert_col_to_df('events', metrics_df, {'id':[]})
print(events_df.info())
events_df.head(2)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 25728 entries, 0 to 25727
Data columns (total 6 columns):
 #   Column                 Non-Null Count  Dtype 
---  ------                 --------------  ----- 
 0   id                     25728 non-null  int64 
 1   source_events          25728 non-null  object
 2   message_events         25728 non-null  object
 3   timestamp_events       25728 non-null  object
 4   criticality_events     25728 non-null  object
 5   identification_events  25728 non-null  object
dtypes: int64(1), object(5)
memory usage: 1.2+ MB
None


Unnamed: 0,id,source_events,message_events,timestamp_events,criticality_events,identification_events
0,9698772,PLC,Essuyage en cours,2022-11-03T08:43:16.746Z,WARNING,407
1,9698772,iFoil,Essuyage en cours,2022-11-03T08:43:16.808Z,INFO,407


##### Identification

In [14]:
# liste des codes d'identification
identification_codes_list = events_df['identification_events'].unique()
np.sort(identification_codes_list)

array(['0', '311', '313', '314', '320', '321', '325', '328', '330', '331',
       '332', '333', '334', '343', '344', '345', '346', '350', '352',
       '354', '355', '356', '357', '358', '371', '372', '376', '377',
       '380', '381', '382', '383', '384', '385', '386', '387', '388',
       '389', '391', '405', '406', '407', '408', '416', '417', '418',
       '430', '440', '445', '446', '447', '451', '453', '454', '460',
       '466', '480', 'Kernel_Error', 'RCB communication error',
       'iFoil communication error'], dtype=object)

In [15]:
# liste des évènements uniques
identification_dict = {}
c = 1
id_list = []
for i in range(events_df.index.start, events_df.index.stop):
    id = events_df.identification_events.loc[i]
    if id not in id_list:
        id_list.append(id)
        identification_dict[id] = events_df.message_events.loc[i]
        c += 1
identification_dict

{'407': ' Essuyage en cours',
 '358': ' Démarrage machine',
 '391': ' JV-Ti non prêt : impression impossible',
 '330': ' En attente',
 '332': ' Disponible',
 '376': ' Réception: mode auto non activé',
 '333': ' Plateau de têtes en mouvement',
 '334': ' Préchauffage',
 '331': ' Impression en cours',
 'Kernel_Error': 'Cannot find enough search zones on the sheet (0.00%), please check your margins\n',
 '352': ' Double feuille détectée E-0352',
 '377': ' Chargeur: mode auto non activé',
 '454': ' Bourrage Corona - E0454',
 '313': ' Ecart nb. feuilles demandées /nb. feuilles sorties',
 '383': ' Capot UV ouvert E-0383',
 '354': ' Bourrage : module impression E-0354',
 '320': ' Porte avant ouverte E-0320',
 '381': ' Capot après têtes ouvert E-0381',
 '480': " Arrêt d'urgence stacker E-0480",
 '430': ' Erreur système de dégazage E-0430',
 '406': ' Purge en cours',
 '445': " Erreur plaque d'empreintes E-0445",
 '446': ' Erreur moteur tapis impression E-0446',
 '384': ' Capot sortie sécheur ouve

##### Source

In [16]:
# liste des sources
source_list = events_df['source_events'].unique()
np.sort(source_list)

array(['Kernel', 'PLC', 'RCB n°1', 'RCB n°2', 'iFoil'], dtype=object)

##### Criticality

In [17]:
# liste des sources
criticality_list = events_df['criticality_events'].unique()
np.sort(criticality_list)



##### Events Json dict

In [18]:
# on sauvegarde la liste des codes d'identification d'event
with open(file=Path(save_json), mode="r+", encoding='utf-8') as jsonFile:
    try :
        # chargement des données du fichier dans un dictionnaire
        data = json.load(jsonFile)
        # ajout des données dans le dictionnaire
        data['identification'] = identification_dict
        data['criticality'] = list(np.sort(criticality_list)),
        data['source'] = list(np.sort(source_list))
        # définit la position actuelle du fichier à l'offset
        jsonFile.seek(0)
        # écriture du dicitonnaire dans le fichier
        json.dump(data, jsonFile, indent=4, ensure_ascii=False)
    except ValueError as e:
        print(e)
    finally :
        # fermeture du fichier
        jsonFile.close()

#### 3) Colonne 'modules'

In [19]:
# creation d'un dataframe modules (~1m15s)
modules_df = convert_col_to_df('modules', metrics_df, {'id':[]})
# visualisation des données
print(modules_df.info())
modules_df.head(2)

first :  <class 'dict'> {'sn': '', 'name': 'Print Engine 1', 'type': 'Varnish Printer', 'counters': [{'name': '3D Varnish Counter', 'value': 308536}], 'generation': ''}
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 503680 entries, 0 to 503679
Data columns (total 6 columns):
 #   Column              Non-Null Count   Dtype 
---  ------              --------------   ----- 
 0   id                  503680 non-null  int64 
 1   sn_modules          503680 non-null  object
 2   name_modules        503680 non-null  object
 3   type_modules        503680 non-null  object
 4   counters_modules    503680 non-null  object
 5   generation_modules  503680 non-null  object
dtypes: int64(1), object(5)
memory usage: 23.1+ MB
None


Unnamed: 0,id,sn_modules,name_modules,type_modules,counters_modules,generation_modules
0,9698765,,Print Engine 1,Varnish Printer,"[{'name': '3D Varnish Counter', 'value': 308536}]",
1,9698765,,iFoil L,iFoil,"[{'name': 'Total Pages Counter', 'value': 3118...",Gen. 2


##### Colonne counters

In [20]:
# creation d'un dataframe counters (~2m7s)
counters_df = convert_col_to_df('counters_modules', modules_df, {'type_modules':[], 'id': []})
# visualisation des données
print(counters_df.info())
counters_df.head(2)

first :  <class 'dict'> {'name': '3D Varnish Counter', 'value': 308536}
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 755520 entries, 0 to 755519
Data columns (total 4 columns):
 #   Column                  Non-Null Count   Dtype 
---  ------                  --------------   ----- 
 0   type_modules            755520 non-null  object
 1   id                      755520 non-null  int64 
 2   name_counters_modules   755520 non-null  object
 3   value_counters_modules  755520 non-null  int64 
dtypes: int64(2), object(2)
memory usage: 23.1+ MB
None


Unnamed: 0,type_modules,id,name_counters_modules,value_counters_modules
0,Varnish Printer,9698765,3D Varnish Counter,308536
1,iFoil,9698765,Total Pages Counter,31185


In [21]:
# verification de l'intégrité des données
counters_check_line = counters_df[counters_df.id == check_id]
counters_check_line

Unnamed: 0,type_modules,id,name_counters_modules,value_counters_modules
3,Varnish Printer,9698772,3D Varnish Counter,308536
4,iFoil,9698772,Total Pages Counter,31185
5,iFoil,9698772,Foiled Pages Counter,79566


### c) Fusion des dataframes des colonnes fractionnées

#### 1) Merge modules et counters

In [22]:
# fusion du df modue et du df counter
merge_modules_df = pd.merge(modules_df, counters_df, on=['id','type_modules'])
# suppression de la colonne fractionnées
merge_modules_df = merge_modules_df.drop(['counters_modules'], axis=1)
# verification de l'intégrité des données
module_check_line = merge_modules_df[merge_modules_df.id == check_id]
module_check_line

Unnamed: 0,id,sn_modules,name_modules,type_modules,generation_modules,name_counters_modules,value_counters_modules
3,9698772,,Print Engine 1,Varnish Printer,,3D Varnish Counter,308536
4,9698772,,iFoil L,iFoil,Gen. 2,Total Pages Counter,31185
5,9698772,,iFoil L,iFoil,Gen. 2,Foiled Pages Counter,79566


In [23]:
merge_modules_df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 755520 entries, 0 to 755519
Data columns (total 7 columns):
 #   Column                  Non-Null Count   Dtype 
---  ------                  --------------   ----- 
 0   id                      755520 non-null  int64 
 1   sn_modules              755520 non-null  object
 2   name_modules            755520 non-null  object
 3   type_modules            755520 non-null  object
 4   generation_modules      755520 non-null  object
 5   name_counters_modules   755520 non-null  object
 6   value_counters_modules  755520 non-null  int64 
dtypes: int64(2), object(5)
memory usage: 46.1+ MB


#### 2) Merge operators

In [24]:
# on merge avec le df operators en fonction de l'id de message
merge_operators_df = pd.merge(merge_modules_df, connected_operators_df, on='id', suffixes=['','_op'])
# verification de l'intégrité des données
op_check_line = merge_operators_df[merge_operators_df.id == check_id]
op_check_line

Unnamed: 0,id,sn_modules,name_modules,type_modules,generation_modules,name_counters_modules,value_counters_modules,name_connected_operators,level_connected_operators
3,9698772,,Print Engine 1,Varnish Printer,,3D Varnish Counter,308536,User,Operator
4,9698772,,iFoil L,iFoil,Gen. 2,Total Pages Counter,31185,User,Operator
5,9698772,,iFoil L,iFoil,Gen. 2,Foiled Pages Counter,79566,User,Operator


#### 3) Merge events

In [25]:
# on crée un df, à partir du df events, ne contenant que les lignes ayant un évènement de source Ifoil
events_ifoil = events_df[events_df.source_events == 'iFoil']
# on crée un df, à partir du df merge, ne contenant que les lignes ayant un module de type Ifoil
module_ifoil = merge_operators_df[merge_operators_df.type_modules == 'iFoil']
# on merge les deux df des lignes Ifoil en focntion de l'id de message
merge_ifoil_df = pd.merge(events_ifoil, module_ifoil, how='outer', on='id', suffixes=['_event','_module'])
# verification de l'intégrité des données
events_check_line = merge_ifoil_df[merge_ifoil_df.id == check_id]
events_check_line

Unnamed: 0,id,source_events,message_events,timestamp_events,criticality_events,identification_events,sn_modules,name_modules,type_modules,generation_modules,name_counters_modules,value_counters_modules,name_connected_operators,level_connected_operators
0,9698772,iFoil,Essuyage en cours,2022-11-03T08:43:16.808Z,INFO,407,,iFoil L,iFoil,Gen. 2,Total Pages Counter,31185,User,Operator
1,9698772,iFoil,Essuyage en cours,2022-11-03T08:43:16.808Z,INFO,407,,iFoil L,iFoil,Gen. 2,Foiled Pages Counter,79566,User,Operator


Comme il y avait 2 counters ('Total Pages Counter' et 'Foiled Pages Counter') pour le module de type 'Ifoil', nous avons bien 2 lignes.

In [26]:
# on crée un df, à partir du df events, ne contenant que les lignes ayant un évènement de source Ifoil
events_no_ifoil = events_df[events_df.source_events != 'iFoil']
# on crée un df, à partir du df merge, ne contenant que les lignes ayant un module de type Ifoil
module_no_ifoil = merge_operators_df[merge_operators_df.type_modules != 'iFoil']
# on merge les deux df des lignes Ifoil en focntion de l'id de message
merge_no_ifoil_df = pd.merge(events_no_ifoil, module_no_ifoil, how='outer', on='id', suffixes=['_event','_module'])
# verification de l'intégrité des données
events_check_line = merge_no_ifoil_df[merge_no_ifoil_df.id == check_id]
events_check_line

Unnamed: 0,id,source_events,message_events,timestamp_events,criticality_events,identification_events,sn_modules,name_modules,type_modules,generation_modules,name_counters_modules,value_counters_modules,name_connected_operators,level_connected_operators
0,9698772,PLC,Essuyage en cours,2022-11-03T08:43:16.746Z,WARNING,407,,Print Engine 1,Varnish Printer,,3D Varnish Counter,308536,User,Operator


##### Concaténation

In [27]:
# on concatene les df ifoi et no_ifoil pour ne perdre aucune valeur
concat_events_df = pd.concat([merge_ifoil_df, merge_no_ifoil_df])
# verification de l'intégrité des données
events_check_line = concat_events_df[concat_events_df.id == check_id]
events_check_line

Unnamed: 0,id,source_events,message_events,timestamp_events,criticality_events,identification_events,sn_modules,name_modules,type_modules,generation_modules,name_counters_modules,value_counters_modules,name_connected_operators,level_connected_operators
0,9698772,iFoil,Essuyage en cours,2022-11-03T08:43:16.808Z,INFO,407,,iFoil L,iFoil,Gen. 2,Total Pages Counter,31185,User,Operator
1,9698772,iFoil,Essuyage en cours,2022-11-03T08:43:16.808Z,INFO,407,,iFoil L,iFoil,Gen. 2,Foiled Pages Counter,79566,User,Operator
0,9698772,PLC,Essuyage en cours,2022-11-03T08:43:16.746Z,WARNING,407,,Print Engine 1,Varnish Printer,,3D Varnish Counter,308536,User,Operator


In [28]:
concat_events_df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 761371 entries, 0 to 255726
Data columns (total 14 columns):
 #   Column                     Non-Null Count   Dtype 
---  ------                     --------------   ----- 
 0   id                         761371 non-null  int64 
 1   source_events              30316 non-null   object
 2   message_events             30316 non-null   object
 3   timestamp_events           30316 non-null   object
 4   criticality_events         30316 non-null   object
 5   identification_events      30316 non-null   object
 6   sn_modules                 761371 non-null  object
 7   name_modules               761371 non-null  object
 8   type_modules               761371 non-null  object
 9   generation_modules         761371 non-null  object
 10  name_counters_modules      761371 non-null  object
 11  value_counters_modules     761371 non-null  int64 
 12  name_connected_operators   761371 non-null  object
 13  level_connected_operators  761371 non-null  

#### 4) Merge metrics

In [29]:
# dernier merge de toutes les colonnes
merge_metrics_df = pd.merge(concat_events_df, metrics_df, how='outer', on='id', suffixes=['','_metrics'])
# suppression des colonnes fractionnées
merge_metrics_df = merge_metrics_df.drop(['connected_operators','modules','events'], axis=1)
# verification de l'intégrité des données
metrics_check_line = merge_metrics_df[merge_metrics_df.id == check_id]
metrics_check_line

Unnamed: 0,id,source_events,message_events,timestamp_events,criticality_events,identification_events,sn_modules,name_modules,type_modules,generation_modules,name_counters_modules,value_counters_modules,name_connected_operators,level_connected_operators,status,created_at,varnishLevelsTargetvolume,varnishLevelsTotalvolume
0,9698772,iFoil,Essuyage en cours,2022-11-03T08:43:16.808Z,INFO,407,,iFoil L,iFoil,Gen. 2,Total Pages Counter,31185,User,Operator,WARNING,2022-11-03 08:43:23.785000,1386.302305,18000
1,9698772,iFoil,Essuyage en cours,2022-11-03T08:43:16.808Z,INFO,407,,iFoil L,iFoil,Gen. 2,Foiled Pages Counter,79566,User,Operator,WARNING,2022-11-03 08:43:23.785000,1386.302305,18000
2,9698772,PLC,Essuyage en cours,2022-11-03T08:43:16.746Z,WARNING,407,,Print Engine 1,Varnish Printer,,3D Varnish Counter,308536,User,Operator,WARNING,2022-11-03 08:43:23.785000,1386.302305,18000


In [30]:
merge_metrics_df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 761371 entries, 0 to 761370
Data columns (total 18 columns):
 #   Column                     Non-Null Count   Dtype  
---  ------                     --------------   -----  
 0   id                         761371 non-null  int64  
 1   source_events              30316 non-null   object 
 2   message_events             30316 non-null   object 
 3   timestamp_events           30316 non-null   object 
 4   criticality_events         30316 non-null   object 
 5   identification_events      30316 non-null   object 
 6   sn_modules                 761371 non-null  object 
 7   name_modules               761371 non-null  object 
 8   type_modules               761371 non-null  object 
 9   generation_modules         761371 non-null  object 
 10  name_counters_modules      761371 non-null  object 
 11  value_counters_modules     761371 non-null  int64  
 12  name_connected_operators   761371 non-null  object 
 13  level_connected_operators  76

## 2. Outout csv

In [31]:
merge_metrics_df.to_csv(path_or_buf=Path(save_csv))