# Pre-requisite Configuration

Following pre-requisites are performed before the extraction of data from relevant systems.

1. **Set Configuration:**
    * Set a `CONFIG_ID` for execution.
    * Code picks & loads configuration file (yaml) from `config` folder based on the `config_id` maintained in the file(s). 
2. **Create directories:**
    * Pick the extraction directory maintained in configuration for Indicators
    * Extraction data such as CSV / parquet files are staged in this directory.
    * This data is local.
    * A `reports` directory is also created within, where the error reports are stored.
3. **Create database tables:**
    * The script creates necessary tables in the database specified in the configuration.
    * Tables are only created if they do not already exist in the database.

In [None]:
# ------------------------------------------------------------------------------ #
# For testing
# ------------------------------------------------------------------------------ #
%load_ext autoreload
%autoreload 2

import pandas as pd
import numpy as np
import os
import concurrent.futures
from modules.util.helpers import Logger
from modules.util.config import get_config_by_id
from modules.pai.alerts import AlertsAPIWrapper 
from modules.acf.alerttypes import AlertTypeAPIWrapper 
from modules.util.database import SQLAlchemyClient

# ------------------------------------------------------------------------------ #
# Configurations
# ------------------------------------------------------------------------------ #

CONFIG_ID = 'dca-test'

EXTRACTION_DIR = get_config_by_id(CONFIG_ID)["extract"]["alerts"]["directory"]
DATA_DIR = f"{EXTRACTION_DIR}/data"
ERROR_DIR = f"{EXTRACTION_DIR}/errors"
UDR_DIR = f"{EXTRACTION_DIR}/udr"

# ------------------------------------------------------------------------------ #
# Directories
# ------------------------------------------------------------------------------ #

if not os.path.exists(DATA_DIR):
    os.makedirs(DATA_DIR)

if not os.path.exists(ERROR_DIR):
    os.makedirs(ERROR_DIR)

if not os.path.exists(UDR_DIR):
    os.makedirs(UDR_DIR)

log = Logger.get_logger(CONFIG_ID)
log.info("** EXTRACT - ALERTS **")
Logger.blank_line(log)
log.info(f"Configuration ID: {CONFIG_ID}")
log.info(f"Extraction directory: {EXTRACTION_DIR}")
log.info(f"Data directory: {DATA_DIR}")
log.info(f"Error directory: {ERROR_DIR}")


file_pai_alerts = rf'{DATA_DIR}/Alerts.parquet'
file_alerttypes = rf'{ERROR_DIR}/Alerttypes.csv'
file_alerts = rf'{ERROR_DIR}/Alerts.csv'
UDR_Alerttypes = rf'{UDR_DIR}/UDR_Alerttypes.csv'
UDR_Alerts = rf'{UDR_DIR}/UDR_Alerts.csv'

# ------------------------------------------------------------------------------ #
# Database
# ------------------------------------------------------------------------------ #

log.info(f"Connecting to database")
db = SQLAlchemyClient(config_id=CONFIG_ID)
log.info(f"Connected to {db.get_database_url()}")

log.info(f"Creating all tables")
db.table_create_all()
log.info(f"Tables created")

# 1 - Extract PAI Alerts

* Alerts for both Technical Objects (Equipments and Functional Locations) will be extracted.
* Storing in a dataframe df_alerts for filtering/transformation 

In [None]:
 # Importing AlertAPIWrapper from alerts.py
alert_api = AlertsAPIWrapper(config_id=CONFIG_ID)
alert_count = alert_api.getCount()
log.info(f"{alert_count} alerts found")


# Function to fetch alerts for a given skip value
def fetch_alerts_chunk(skip):
    return alert_api.getAlerts(skip=skip, top=100)

# Fetch the alerts using threading
alerts = []
if alert_count > 0:
    with concurrent.futures.ThreadPoolExecutor() as executor:
        futures = [executor.submit(fetch_alerts_chunk, skip) for skip in range(0, alert_count + 100, 100)]
        for future in concurrent.futures.as_completed(futures):
            try:
                alerts.extend(future.result())
            except Exception as e:
                log.error(f"Failed to fetch alerts chunk: {e}")

# Convert alerts to DataFrame and save to parquet
if alerts:
    df_alerts = pd.DataFrame(alerts)
    df_alerts.to_parquet(file_pai_alerts, index=False)
    log.info(f"Alerts saved to {file_pai_alerts}")
else:
    log.error("No alerts to save")





In [None]:
columns_to_remove = [
     "__metadata"
]

df_alerts = df_alerts.drop(columns=columns_to_remove)
df_alerts.head()

### Staging the PAI system Alerts

* Above data is staged in the coreresponding table in database: T_PAI_ALERTS


In [None]:
# Step 2 : Stage Alerts in DB
from modules.util.database import SQLAlchemyClient, PaiAlerts

log.info(f"Connecting to database")
db = SQLAlchemyClient(config_id=CONFIG_ID)
log.info(f"Connected to {db.get_database_url()}")

log.info(f"Creating {PaiAlerts.__tablename__} table")
db.table_create_all()

log.info(f"Updated {PaiAlerts.__tablename__} with {db.count(PaiAlerts)} records")
Logger.blank_line(log)

log.info(f"Updating {PaiAlerts.__tablename__}")
log.info(f"Drop and Reload configuration is set to {db.drop_reload}")
if db.drop_reload:
    log.info(f"Clearing data from {PaiAlerts.__tablename__}")
    db.truncate(PaiAlerts)  # truncate all existing data (remove if needed)
pai_alerts = SQLAlchemyClient.dataframe_to_object(df_alerts, PaiAlerts)
db.insert_batches(data=pai_alerts)  # insert the new PAI alerts

log.info(f"Updated {PaiAlerts.__tablename__} with {db.count(PaiAlerts)} records")



# 2 - Filter PAI Alerts for Valid Technical Objects

* Filter the equipments and functional locations, in the PAI Alerts, that are present in the system.
* Using view V_Transform_Indicators that contains the valid TO's for filtering.

In [None]:
from modules.util.database import V_Transform_Indicators, SQLAlchemyClient

log.info(f"Connecting to database")
db = SQLAlchemyClient(config_id=CONFIG_ID)
log.info(f"Connected to {db.get_database_url()}")

log.info(f"Reading {V_Transform_Indicators.__tablename__}")
transform_indicators = db.select(
    model=V_Transform_Indicators,
    distinct=True
)
log.info(f"{len(transform_indicators)} transform indicators fetched.")
df_transform_indicators = pd.DataFrame(transform_indicators)
df_transform_indicators.head()

### All possible equipment ids/ floc ids that are in final transformed list of ids

In [None]:
# Check if EquipmentID in df_alerts are present in df_transform_indicators['id']
equipment_ids_in_transform = df_alerts['EquipmentID'].isin(df_transform_indicators['id'])
num_equipment_ids_in_transform = equipment_ids_in_transform.sum()
log.info(f"Number of EquipmentID in df_alerts present in df_transform_indicators: {num_equipment_ids_in_transform}")

# Check if FunctionalLocationID in df_alerts are present in df_transform_indicators['id']
functional_location_ids_in_transform = df_alerts['FunctionalLocationID'].isin(df_transform_indicators['id'])
num_functional_location_ids_in_transform = functional_location_ids_in_transform.sum()
log.info(f"Number of FunctionalLocationID in df_alerts present in df_transform_indicators: {num_functional_location_ids_in_transform}")

In [None]:
# Filter the df_transform_indicators dataframe to get the valid equipment IDs and functional location IDs
filtered_df = df_transform_indicators[
    (df_transform_indicators['id'].isin(df_alerts['EquipmentID'])) |
    (df_transform_indicators['id'].isin(df_alerts['FunctionalLocationID']))
]

# Display the filtered dataframe
filtered_df.head()

# Create a new dataframe with the valid equipment IDs from df_alerts
valid_alert_TO = df_alerts[
    (df_alerts['EquipmentID'].isin(filtered_df['id'])) |
    (df_alerts['FunctionalLocationID'].isin(filtered_df['id']))
]

# Display the new dataframe
valid_alert_TO.reset_index(drop=True, inplace=True)

valid_alert_TO.head()


### Fetch the available alerttypes to migrate the alerttypes corresponding to the valid TO's

In [None]:
from modules.acf.alerttypes import AlertTypeAPIWrapper

alert_api = AlertTypeAPIWrapper(config_id=CONFIG_ID)
alerttypes = alert_api.getAlerttypes()
log.info(f"{len(alerttypes)} alert types fetched.")
df_alerttypes = pd.DataFrame(alerttypes)
df_alerttypes.head()


In [None]:
from modules.util.database import SQLAlchemyClient,PaiAlertTypes

# Step 2 : Stage Alerts in DB

log.info(f"Connecting to database")
db = SQLAlchemyClient(config_id=CONFIG_ID)
log.info(f"Connected to {db.get_database_url()}")

log.info(f"Updated {PaiAlertTypes.__tablename__} with {db.count(PaiAlertTypes)} records")
Logger.blank_line(log)

log.info(f"tUpdating {PaiAlertTypes.__tablename__}")
log.info(f"Drop and Reload configuration is set to {db.drop_reload}")
if db.drop_reload:
    log.warning(f"Clearing data from {PaiAlertTypes.__tablename__}")
    db.truncate(PaiAlertTypes)  # truncate all existing data (remove if needed)
pai_alerttypes = SQLAlchemyClient.dataframe_to_object(df_alerttypes, PaiAlertTypes)
db.insert_batches(data=pai_alerttypes)  # insert the new PAI alerts

log.info(f"Updated {PaiAlertTypes.__tablename__} with {db.count(PaiAlertTypes)} records")



##### Extract the alerts whose TO id's are in the validated ids 

In [None]:
required_at = valid_alert_TO['AlertType'].unique()
df_alerttypes = df_alerttypes[df_alerttypes['alertTypeID'].isin(required_at)]
df_alerttypes.reset_index(drop=True, inplace=True)
df_alerttypes.head()


# 3a - Staging the Alerttypes to APM

In [None]:
df_post_alerttypes = df_alerttypes[['alertTypeID', 'category', 'alertDescription', 'severity', 'severityDescription']]
df_post_alerttypes = df_post_alerttypes.rename(columns={
    'alertTypeID': 'Name',
    'category': 'Category',
    'alertDescription': 'Description',
    'severity': 'DefaultSeverity',
    'severityDescription': 'DefaultSeverityDescription'
})
df_post_alerttypes['Source'] = 'API'



# # Perform the mapping check
# df_post_alerttypes['DefaultSeverity'] = df_post_alerttypes.apply(
#     lambda row: 5 if row['DefaultSeverity'] == '2' else (10 if row['DefaultSeverity'] == '3' else row['DefaultSeverity']),
#     axis=1
# )

# Perform the mapping check
# Check for incorrect severity values
incorrect_severity_values = df_post_alerttypes[~df_post_alerttypes['DefaultSeverity'].astype(str).isin(['1', '2', '3', '5', '10'])]

if not incorrect_severity_values.empty:
    incorrect_severity_file = os.path.join(ERROR_DIR, 'Incorrect_Alert_Types.csv')
    incorrect_severity_values.to_csv(incorrect_severity_file, index=False)
    log.error(f"Incorrect severity values found. Details saved to {incorrect_severity_file}")
else:
    df_post_alerttypes['DefaultSeverity'] = df_post_alerttypes.apply(
        lambda row: 5 if row['DefaultSeverity'] == '2' else (10 if row['DefaultSeverity'] == '3' else row['DefaultSeverity']),
        axis=1
    )

df_post_alerttypes['DefaultSeverity'] = df_post_alerttypes['DefaultSeverity'].astype('int32')

# Enabling Deduplication

df_post_alerttypes['DeduplicationPeriod'] = ''
df_post_alerttypes['DeduplicationIsEnabled'] = ''

# Save to CSV
df_post_alerttypes.to_csv(UDR_Alerttypes, index=False)
log.info(f"Alert types saved as UDR in {UDR_Alerttypes}")

df_post_alerttypes.head()




#### Read the UDR after user has provided inputs

In [None]:
df_post_alerttypes = pd.read_csv(UDR_Alerttypes)

# Reorder columns to place 'Source' after 'Category'
df_post_alerttypes = df_post_alerttypes[['Name', 'Category', 'Source', 'Description', 'DefaultSeverity', 'DefaultSeverityDescription', 'DeduplicationPeriod', 'DeduplicationIsEnabled']]
df_post_alerttypes.head()

In [None]:
from modules.util.database import SQLAlchemyClient, ApmAlertTypes

log.info(f"Connecting to database")
db = SQLAlchemyClient(config_id=CONFIG_ID)
log.info(f"Connected to {db.get_database_url()}")

log.info(f"Updated {ApmAlertTypes.__tablename__} with {db.count(ApmAlertTypes)} records")
log.info("")  # better readability

log.info(f"Updating {ApmAlertTypes.__tablename__}")
log.info(f"Drop and Reload configuration is set to {db.drop_reload}")
if db.drop_reload:
    log.warning(f"Clearing data from {ApmAlertTypes.__tablename__}")
    db.truncate(ApmAlertTypes)  # truncate all existing data (remove if needed)
pai_alerttypes = SQLAlchemyClient.dataframe_to_object(df_post_alerttypes, ApmAlertTypes)
db.insert_batches(data=pai_alerttypes)  # insert the new PAI alerts

log.info(f"Updated {ApmAlertTypes.__tablename__} with {db.count(ApmAlertTypes)} records")


#### Post the alerttypes to APM

* To keep track of the alert types migrated, stage the alert types that are migrated  in another table "Post AlertTypes"
* If successful, the error message and error code will be None, else capture them as well.

In [None]:
from modules.apm.manage_alerts import APMAlertAPIWrapper
from modules.util.database import SQLAlchemyClient, PostAlertTypes
import json
import requests

alert_api = APMAlertAPIWrapper(config_id=CONFIG_ID)
if db.drop_reload:
    log.info(f"Clearing data from {PostAlertTypes.__tablename__}")
    db.truncate(PostAlertTypes)  # truncate all existing data (remove if needed)
for index, row in df_post_alerttypes.iterrows():
    alert_type_data = {
        "name": row['Name'],
        "category": str(row['Category']),
        "source": row['Source'],
        "description": row['Description'],
        "default_severity": int(row['DefaultSeverity']),
        "default_severity_description": row['DefaultSeverityDescription']
        }
    print([alert_type_data['category']])
    if row['DeduplicationPeriod'] and row['DeduplicationIsEnabled']:
        
        alert_type_data['deduplication_period'] = row['DeduplicationPeriod']
        alert_type_data['deduplication_is_enabled'] = bool(row['DeduplicationIsEnabled'])
        
        try:
            response = alert_api.postAlerttype(**alert_type_data)
            current_row_df = row.to_frame().T
            current_row_df['ErrorMessage'] = None
            current_row_df['ErrorCode'] = None
            posting_alerttype = SQLAlchemyClient.dataframe_to_object(current_row_df, PostAlertTypes)
            db.insert_batches(data=posting_alerttype)
            log.info(f"Response: {response}")
        except requests.exceptions.HTTPError as e:
            log.warning(f"ERROR:\tFailed to post alert type: {alert_type_data}")
            log.warning(f"ERROR:\t{e}")
            current_row_df = row.to_frame().T
            current_row_df['ErrorMessage'] = e.response.text
            current_row_df['ErrorCode'] = e.response.status_code
            posting_alerttype = SQLAlchemyClient.dataframe_to_object(current_row_df, PostAlertTypes)
            db.insert_batches(data=posting_alerttype)
    else:
        try:
            response = alert_api.postAlerttype(**alert_type_data)
            current_row_df = row.to_frame().T
            current_row_df['ErrorMessage'] = None
            current_row_df['ErrorCode'] = None
            posting_alerttype = SQLAlchemyClient.dataframe_to_object(current_row_df, PostAlertTypes)
            db.insert_batches(data=posting_alerttype)
            log.info(f"Response: {response}")
        except requests.exceptions.HTTPError as e:
            log.warning(f"ERROR:\tFailed to post alert type: {alert_type_data}")
            log.warning(f"ERROR:\t{e}")
            current_row_df = row.to_frame().T
            current_row_df['ErrorMessage'] = e.response.text
            current_row_df['ErrorCode'] = e.response.status_code
            posting_alerttype = SQLAlchemyClient.dataframe_to_object(current_row_df, PostAlertTypes)
            db.insert_batches(data=posting_alerttype) 
      
        


#### Create an error report for alerttypes from collected data

In [None]:
from modules.util.database import SQLAlchemyClient, V_APM_AlertType
valid_alerttype = db.select(model = V_APM_AlertType, distinct = True, where=[V_APM_AlertType.valid !='X'])
# Convert the list of dictionaries to a DataFrame
df_valid_alerttype = pd.DataFrame(valid_alerttype)

if(len(df_valid_alerttype) == 0):
    log.info("No invalid alert types found")
     # Display the resulting DataFrame
    df_valid_alerttype.to_csv(file_alerttypes)
    print(f"INFO:\tAlerttypes saved to {file_alerttypes}")
else:
    # Drop the specified columns
    df_valid_alerttype = df_valid_alerttype.drop(columns=['idx', 'tenantid', 'valid'])

    # Display the resulting DataFrame
    df_valid_alerttype.to_csv(file_alerttypes)
    print(f"INFO:\tAlerttypes saved to {file_alerttypes}")

    df_valid_alerttype.head()

# 3b - Staging the Alerts to APM

In [None]:
#Step 1: Filter the alerts based on the valid alert types
df_alerts_filtered = df_alerts[df_alerts['AlertType'].isin(df_alerttypes['alertTypeID'])]
df_alerts_filtered.reset_index(drop=True, inplace=True)

# Step 2: Filter the df_transform_indicators based on the condition
filtered_TO_df = df_transform_indicators[
    df_transform_indicators['id'].isin(df_alerts_filtered['EquipmentID']) |
    df_transform_indicators['id'].isin(df_alerts_filtered['FunctionalLocationID'])
]

# Step 3: Merge the filtered DataFrame with testing_alerts on EquipmentID
merged_df_equipment = filtered_TO_df.merge(
    df_alerts_filtered,
    left_on='id',
    right_on='EquipmentID',
    how='inner'
)

# Step 4: Merge the filtered DataFrame with testing_alerts on FunctionalLocationID
merged_df_functional = filtered_TO_df.merge(
    df_alerts_filtered,
    left_on='id',
    right_on='FunctionalLocationID',
    how='inner'
)

# Concatenate the two merged DataFrames
merged_df = pd.concat([merged_df_equipment, merged_df_functional], ignore_index=True)

# Step 5: Convert the date columns to datetime

#Function to convert the date string to the required format
def convert_date(date_str):
    # Extract the timestamp in milliseconds
    timestamp_ms = int(date_str[6:-2])
    # Convert to datetime, then format as required
    datetime_obj = pd.to_datetime(timestamp_ms, unit='ms')
    return datetime_obj.strftime('%Y-%m-%dT%H:%M:%S.%f')[:-4] + 'Z'

# Apply the function to the date_column
merged_df['TriggeredOn'] = merged_df['TriggeredOn'].apply(convert_date)
merged_df['LastOccuredOn'] = merged_df['LastOccuredOn'].apply(convert_date)

# Step 6: Combine EquipmentID and FunctionalLocationID into a single column

# Create the new column 'TechnicalObject'
merged_df['TechnicalObject'] = np.where(
    merged_df['EquipmentID'].notna() & (merged_df['EquipmentID'] != ''),
    merged_df['EquipmentID'],
    merged_df['FunctionalLocationID']
)

# Drop the original columns if they're no longer needed
merged_df = merged_df.drop(columns=['EquipmentID', 'FunctionalLocationID'])

# Step 7: Drop the columns which are not required
# List of columns to keep
columns_to_keep = ['internalId', 'type', 'externalId','AlertType','TechnicalObject','TriggeredOn']

# Select only these columns
merged_df = merged_df[columns_to_keep]

merged_df.head()



### Transform the data as per the structure required to post 

In [None]:
import json

# Step 1: Group and explode to get individual rows for each externalId
grouped_alerts = merged_df.groupby(['TriggeredOn', 'AlertType', 'type']).agg({'externalId': list}).reset_index()
expanded_alerts = grouped_alerts.explode('externalId').reset_index(drop=True)

# Step 2: Create the TechnicalObject dictionary for each row
expanded_alerts['TechnicalObject'] = expanded_alerts.apply(
    lambda row: {
        'Number': row['externalId'],
        'SSID': 'QM7_910',
        'Type': 'EQUI' if row['type'] == 'EQU' else 'FLOC'
    },
    axis=1
)

# Step 3: Group by 'TriggeredOn' and 'AlertType' to aggregate distinct TechnicalObject dictionaries into a list
grouped_df = expanded_alerts.groupby(['TriggeredOn', 'AlertType']).agg({
    'TechnicalObject': lambda x: list({v['Number']: v for v in x}.values())  # Aggregates distinct dictionaries into a list for each unique (TriggeredOn, AlertType) pair
}).reset_index()


# Step 3: Select the columns you need in the final DataFrame
final_df = grouped_df[['TriggeredOn', 'AlertType', 'TechnicalObject']].copy()

# Step 4: Convert TechnicalObject column to JSON string if needed
final_df['TechnicalObject'] = final_df['TechnicalObject'].apply(json.dumps)

# Display the resulting DataFrame
final_df.head()


### Stage the data for reference

In [None]:
from modules.util.database import SQLAlchemyClient, ApmAlerts

# Step 2 : Stage Alerts in DB

log.info(f"Connecting to database")
db = SQLAlchemyClient(config_id=CONFIG_ID)
log.info(f"Connected to {db.get_database_url()}")

log.info(f"Creating {ApmAlerts.__tablename__} table")
db.table_create_all()

log.info(f"Updated {ApmAlerts.__tablename__} with {db.count(ApmAlerts)} records")
Logger.blank_line(log)

log.info(f"Updating {ApmAlerts.__tablename__}")
log.info(f"Drop and Reload configuration is set to {db.drop_reload}")
if db.drop_reload:
    log.warning(f"Clearing data from {ApmAlerts.__tablename__}")
    db.truncate(ApmAlerts)  # truncate all existing data (remove if needed)
Apmalerts = SQLAlchemyClient.dataframe_to_object(final_df, ApmAlerts)
db.insert_batches(data=Apmalerts)  # insert the new PAI alerts

log.info(f"Updated {ApmAlerts.__tablename__} with {db.count(ApmAlerts)} records")



#### Post the alerts to APM

* To keep track of the alerts migrated, stage the alerts that are migrated  in another table "Post Alerts"
* If successful, the error message and error code will be None, wlse capture them as well.

##### Generate a UDR for Alerts

* A UDR will be published for the users to fill in the Deduplication count. This information can be added to the api call to keep track of the alerts of that corresponding Alerttype.

In [None]:
to_post_alerts = db.select(ApmAlerts)
to_post_alerts_df = pd.DataFrame(to_post_alerts)
to_post_alerts_df['DeduplicationCount'] = None
to_post_alerts_df.to_csv(UDR_Alerts, index=False)

log.info(f"Posted Alerts saved as UDR in {UDR_Alerts}")

* After the UDR is filled with required information, we post the data to APM

In [None]:
from modules.util.database import SQLAlchemyClient, PostAlerts
import json
import requests

post_alerts_api = APMAlertAPIWrapper(config_id=CONFIG_ID)
if db.drop_reload:
    log.warning(f"Clearing data from {PostAlerts
    .__tablename__}")
    db.truncate(PostAlerts)

for index, row in final_df.iterrows():
    alert_data = {
        "alert_type": row['AlertType'],
        "triggered_on": row['TriggeredOn'],
        "technical_objects": json.loads(row['TechnicalObject'])
    }
    try:
        response = post_alerts_api.postAlert(**alert_data)
        current_row_df = row.to_frame().T
        current_row_df['ErrorMessage'] = None
        current_row_df['ErrorCode'] = None
        posting_alert = SQLAlchemyClient.dataframe_to_object(current_row_df, PostAlerts)
        db.insert_batches(data=posting_alert)
        log.info(f"Response: {response}")
    except requests.exceptions.HTTPError as e:
        log.error(f"ERROR:\tFailed to post alert: {alert_data}")
        log.error(f"ERROR:\t{e}")
        current_row_df = row.to_frame().T
        current_row_df['ErrorMessage'] = e.response.text
        current_row_df['ErrorCode'] = e.response.status_code
        posting_alert = SQLAlchemyClient.dataframe_to_object(current_row_df, PostAlerts)
        db.insert_batches(data=posting_alert)


#### Create an error report with collected data

In [None]:
from modules.util.database import SQLAlchemyClient, V_APM_Alerts

error_alerts = db.select(model=V_APM_Alerts,where=[V_APM_Alerts.valid != 'X'])
# Convert the list of dictionaries to a DataFrame
df_error_alerts = pd.DataFrame(error_alerts)
df_error_alerts.to_csv(file_alerts)
log.info(f"Alerts saved to {file_alerts}")

df_error_alerts.head()

# WIP : Extracting Alerts from APM

In [None]:
# from modules.apm.manage_alerts import APM_AlertAPIWrapper  # Importing AlertAPIWrapper from alerts.py
# import pandas as pd

# file_2 = r'../migration-data/alerts-download/2_apm_alerts.csv'
# file_3 = r'../migration-data/alerts-download/3_alert_types.csv'

# CONFIG_ID = 'aspm-test'
# alert_api = APM_AlertAPIWrapper(config_id=CONFIG_ID)
# print(f"INFO:\t{alert_api.get_apm_alert_count()} alerts found")

# alerts = alert_api.get_apm_alerts()

# # Normalize the alerts data
# df = pd.json_normalize(alerts, sep='_')

# # Further normalize the 'value' column
# value_normalized = pd.json_normalize(df['value'].explode().tolist(), sep='_')

# # Concatenate the normalized 'value' data with the original dataframe
# df = pd.concat([df.drop(columns=['value']).loc[df.index.repeat(df['value'].str.len())].reset_index(drop=True), value_normalized], axis=1)

# # Further normalize 'Technical Objects' into 3 columns with placeholders
# technical_objects_normalized = pd.json_normalize(df['TechnicalObject'].explode().dropna().tolist(), sep='_')
# technical_objects_normalized.columns = ['TechnicalObject_' + col for col in technical_objects_normalized.columns]

# # Concatenate the normalized 'Technical Objects' data with the original dataframe
# df = pd.concat([df.drop(columns=['TechnicalObject']).loc[df.index.repeat(df['TechnicalObject'].str.len())].reset_index(drop=True), technical_objects_normalized], axis=1)

# print(f"INFO:\t{len(df)} alerts extracted")

# df.to_csv(file_2, index=False)
# print(f'INFO:\t{file_2} generated')

# # Drop off the columns that are not needed
# df = df.drop(columns=['@context', '@metadataEtag'])


In [None]:
# from modules.util.sqlalchemy import SQLAlchemyClient, Alerts

# # Step 2 : Stage Alerts in DB

# print(f"INFO:\tConnecting to database")
# db = SQLAlchemyClient(config_id=CONFIG_ID)
# print(f"INFO:\tConnected to {db.get_database_url()}")
# print(f"INFO:\tUpdating {Alerts.__tablename__}")

# print(f"WARN:\tClearing data from {Alerts.__tablename__}")
# db.table_create_all()
# db.truncate(Alerts)  # truncate all existing data (remove if needed)

# alerts_data = []
# for index, row in df.iterrows():
#     row_data = row.to_dict()
#     alert = Alerts(**row_data)
#     alerts_data.append(alert)

# db.insert_batches(data=alerts_data)  # insert the new alerts
# print(f"INFO:\tUpdated {Alerts.__tablename__} with {db.count(Alerts)} records")

In [None]:
# # Fetch alert types
# alert_types = alert_api.get_apm_alerttypes()

# # Normalize the alert types data
# df_alert_types = pd.json_normalize(alert_types, sep='_')

# # Ensure the entire file is extracted and stored
# df_alert_types.to_csv(file_3, index=False)
# print(f'INFO:\t{file_3} generated')

# # Remove @context and @metadataEtag columns
# df_alert_types = df_alert_types.drop(columns=['@context', '@metadataEtag'])

# # Split the 'value' column into separate columns
# value_normalized = pd.json_normalize(df_alert_types['value'].explode().tolist(), sep='_')

# # Concatenate the normalized 'value' data with the original dataframe
# df_alerttype = pd.concat([df_alert_types.drop(columns=['value']).loc[df_alert_types.index.repeat(df_alert_types['value'].str.len())].reset_index(drop=True), value_normalized], axis=1)

# # Display the alert types dataframe
# print(f"INFO:\t{len(df_alerttype)} alert types extracted")

In [None]:
# from modules.util.sqlalchemy import  AlertTypes

# # Step 3: Stage Alert Types in DB

# print(f"INFO:\tUpdating {AlertTypes.__tablename__} with alert types")

# print(f"WARN:\tClearing data from {Alerts.__tablename__}")
# db.table_create_all()
# db.truncate(AlertTypes)  # truncate all existing data (remove if needed)

# alert_types_data = []
# for index, row in df_alerttype.iterrows():
#     row_data = row.to_dict()
#     alert_type = AlertTypes(**row_data)
#     alert_types_data.append(alert_type)

# db.insert_batches(data=alert_types_data)  # insert the new alert types
# print(f"INFO:\tUpdated {AlertTypes.__tablename__} with {db.count(AlertTypes)} records")

In [None]:
# # Group the dataframe by 'Id' and aggregate the technical objects
# grouped_df = df.groupby('Id').agg({
#     'AlertType': 'first',
#     'TriggeredOn': 'first',
#     'TechnicalObject_Number': list,
#     'TechnicalObject_Type': list
# }).reset_index()

# # grouped_df[grouped_df['Id'] == '2d11181c-114a-4657-81b3-229f88425f6f']

# ssid = "QM7_910"



In [None]:
# # Iterate through the alert types dataframe and post each alert type
# for _, row in df_alerttype.iterrows():
#     alert_type_data = {
#         "Name": row['Name'],
#         "Category": row['Category'],
#         "CategoryDescription": row['CategoryDescription'],
#         "Source": row['Source'],
#         "Description": row['Description'],
#         "DefaultSeverity": row['DefaultSeverity'],
#         "DefaultSeverityDescription": row['DefaultSeverityDescription'],
#         "DeduplicationPeriod": row['DeduplicationPeriod'],
#         "DeduplicationIsEnabled": row['DeduplicationIsEnabled']
#     }
    
#     # Call the post_alerttype method
#     response = alert_api.post_alerttype(
#         name=row['Name'],
#         category=row['Category'],
#         category_description=row['CategoryDescription'],
#         source=row['Source'],
#         description=row['Description'],
#         default_severity=row['DefaultSeverity'],
#         default_severity_description=row['DefaultSeverityDescription'],
#         deduplication_period=row['DeduplicationPeriod'],
#         deduplication_is_enabled=row['DeduplicationIsEnabled']
#     )
    
#     # Print the response for debugging
#     print(response)

# print(f"INFO:\t{len(df_alerttype)} alert types posted in the system")

In [None]:
# # Group the dataframe by 'Id' and aggregate the technical objects
# grouped_df = df.groupby('Id').agg({
#     'AlertType': 'first',
#     'TriggeredOn': 'first',
#     'TechnicalObject_Number': list,
#     'TechnicalObject_Type': list
# }).reset_index()

# # grouped_df[grouped_df['Id'] == '2d11181c-114a-4657-81b3-229f88425f6f']

# ssid = "QM7_910"

# # Iterate through the grouped dataframe and prepare the technical objects list
# for _, row in grouped_df.iterrows():
#     technical_objects = [
#         {
#             "Number": number,
#             "SSID": ssid,
#             "Type": type_
#         }
#         for number, type_ in zip(row['TechnicalObject_Number'], row['TechnicalObject_Type'])
#     ]
#     # Call the post_alert method
#     response = alert_api.post_alert(
#         alert_type=row['AlertType'],
#         triggered_on=row['TriggeredOn'],
#         technical_objects=technical_objects
#     )
    
#     # Print the response for debugging
#     print(response)