In [1]:
import zipfile
import pandas as pd

## Unzip Alarms Datasets

In [3]:
# Percorso del file ZIP da estrarre
zip_mob_file_path = "alarms datasets/mob/20230101-20240101_inpas_mob_preprocess__an__last_event__last_event__ext1.zip"
zip_tx_adsl_file_path = "alarms datasets/tx/20230101-20240101_inpas_tx_preprocess__adsl__last_event__last_event__ext1.zip"
zip_tx_pdh_file_path = "alarms datasets/tx/20230101-20240101_inpas_tx_preprocess__pdh__last_event__last_event__ext1.zip"
zip_tx_ptn_file_path = "alarms datasets/tx/20230101-20240101_inpas_tx_preprocess__ptn__last_event__last_event__ext1.zip"
zip_tx_sdh_file_path = "alarms datasets/tx/20230101-20240101_inpas_tx_preprocess__sdh__last_event__last_event__ext1.zip"

# Percorso della cartella di destinazione per l'estrazione
extract_to_folder_mob = "alarms datasets/mob"
extract_to_folder_tx = "alarms datasets/tx"

# Percorso del file ZIP da estrarre
zip_files_paths = {
    zip_mob_file_path: extract_to_folder_mob,
    zip_tx_adsl_file_path: extract_to_folder_tx,
    zip_tx_pdh_file_path: extract_to_folder_tx,
    zip_tx_ptn_file_path: extract_to_folder_tx,
    zip_tx_sdh_file_path: extract_to_folder_tx
}

# Aprire il file ZIP
for zip_file_path, extract_to_folder in zip_files_paths.items():
    with zipfile.ZipFile(zip_file_path, 'r') as zip_ref:
        # Estrarre tutto il contenuto nella cartella di destinazione
        zip_ref.extractall(extract_to_folder)

## Load Datasets in a Dataframe

In [2]:
mob_alarms_df = pd.read_parquet('alarms datasets/mob/20230101-20240101_inpas_mob_preprocess__an__last_event__last_event__ext1.parquet')

In [2]:
tx_alarms_adsl_df = pd.read_parquet('alarms datasets/tx/20230101-20240101_inpas_tx_preprocess__adsl__last_event__last_event__ext1.parquet')
tx_alarms_pdh_df = pd.read_parquet('alarms datasets/tx/20230101-20240101_inpas_tx_preprocess__pdh__last_event__last_event__ext1.parquet')
tx_alarms_ptn_df = pd.read_parquet('alarms datasets/tx/20230101-20240101_inpas_tx_preprocess__ptn__last_event__last_event__ext1.parquet')
tx_alarms_sdh_df = pd.read_parquet('alarms datasets/tx/20230101-20240101_inpas_tx_preprocess__sdh__last_event__last_event__ext1.parquet')

In [3]:
mob_alarms_df.columns

Index(['alarm_id', 'data', 'first_occurrence', 'first_severity',
       'last_occurrence', 'last_update', 'loc_descr', 'make_prediction',
       'ne_id', 'network', 'severity', 'std_probable_cause_no',
       'correlation_role', 'corr_intra', 'data_presentazione',
       'delay_expire_time', 'is_correlated', 'is_root_cause', 'mob_cat_sito',
       'mob_key_type', 'num_figli', 'outage', 'park', 'tt_correlation_state',
       'to_operation_view', 'tt_request_status', 'wr_status', 'alarm_count',
       'category', 'cx_auton_batterie', 'em', 'loc_group', 'manager',
       'manufacturer', 'mob_slogan', 'ne_type', 'origine_guasto',
       'strategicita', 'subnetwork', 'alarm_duration',
       'first_occurrence_time_of_week', 'first_occurrence_day_of_week',
       'first_occurrence_hour', 'province', 'loc_descr_cat', 'province_cat',
       'is_note_present', 'is_ack_or_notes', 'is_tt', 'is_abam_tt', 'loc_name',
       'tt_id', 'link_id', 'loc_name_a', 'loc_name_z', 'lp_id', 'lp_type'],
      

In [6]:
tx_alarms_pdh_df["link_type"].unique()

['/0-', 'itl', NaN, 'nq0', '30', ..., '.nq', 'dz1', 'lla', 'xsh', 'al']
Length: 437
Categories (436, object): ['-', '-an', '-bo', '-bz', ..., 'x1c', 'xf', 'xsh', 'xti']

## Clean Alarms Datasets removing old "lavori programmati" columns

In [5]:
filtered_mob_alarms_df = mob_alarms_df.drop(columns=['lp_type', 'lp_id'])

In [4]:
filtered_tx_alarms_adsl_df = tx_alarms_adsl_df.drop(columns=['lp_type', 'lp_id'])

In [5]:
filtered_tx_alarms_pdh_df = tx_alarms_pdh_df.drop(columns=['lp_type', 'lp_id'])

In [6]:
filtered_tx_alarms_ptn_df = tx_alarms_ptn_df.drop(columns=['lp_type', 'lp_id'])

In [7]:
filtered_tx_alarms_sdh_df = tx_alarms_sdh_df.drop(columns=['lp_type', 'lp_id'])

## Associate Alarms with Lavori Programmati 

In [9]:
lavori_programmati_short_df = pd.read_csv("lavori programmati/20230101_20240101_lp.csv")
lavori_programmati_short_df = lavori_programmati_short_df.iloc[:, 1:] # drop the first column

In [7]:
new_mob_alarms_df = pd.merge(filtered_mob_alarms_df,lavori_programmati_short_df, on='alarm_id', how='left')
new_mob_alarms_df.to_parquet('alarms datasets/mob/[new]20230101-20240101_inpas_mob_preprocess__an__last_event__last_event__ext1.parquet')

In [10]:
new_tx_alarms_adsl_df = pd.merge(filtered_tx_alarms_adsl_df,lavori_programmati_short_df, on='alarm_id', how='left')
new_tx_alarms_adsl_df.to_parquet('alarms datasets/tx/[new]20230101-20240101_inpas_tx_preprocess__adsl__last_event__last_event__ext1.parquet')

In [11]:
new_tx_alarms_pdh_df = pd.merge(filtered_tx_alarms_pdh_df,lavori_programmati_short_df, on='alarm_id', how='left')
new_tx_alarms_pdh_df.to_parquet('alarms datasets/tx/[new]20230101-20240101_inpas_tx_preprocess__pdh__last_event__last_event__ext1.parquet')

In [12]:
new_tx_alarms_ptn_df = pd.merge(filtered_tx_alarms_ptn_df,lavori_programmati_short_df, on='alarm_id', how='left')
new_tx_alarms_ptn_df.to_parquet('alarms datasets/tx/[new]20230101-20240101_inpas_tx_preprocess__ptn__last_event__last_event__ext1.parquet')

In [13]:
new_tx_alarms_sdh_df = pd.merge(filtered_tx_alarms_sdh_df,lavori_programmati_short_df, on='alarm_id', how='left')
new_tx_alarms_sdh_df.to_parquet('alarms datasets/tx/[new]20230101-20240101_inpas_tx_preprocess__sdh__last_event__last_event__ext1.parquet')