In [28]:
import sys, os, warnings
import eland as ed
import pandas as pd
import re
import html
from bs4 import BeautifulSoup
from dotenv import load_dotenv
from elasticsearch import Elasticsearch
from datetime import datetime, timedelta

In [40]:
pd.set_option('display.max_columns', 50)
pd.set_option('display.max_colwidth', None)

In [30]:
# Construct the path to the .env file which is one directory up
dotenv_path = "/home/d3r/Documents/Github/vector_search_poc/.env"

# Load the environment variables from the specified path
load_dotenv(dotenv_path=dotenv_path)

# Access the environment variables using os.environ
es_host = os.environ.get("ELASTICSEARCH_HOST")
es_user = os.environ.get("ELASTICSEARCH_USER")
es_pass = os.environ.get("ELASTICSEARCH_PASS")

# Connect to Elasticsearch
es = Elasticsearch(
    hosts=[es_host],
    basic_auth=(es_user, es_pass)
)
source_index = 'odoo.helpdesk.ticket'
mapping = es.indices.get_mapping(index=source_index)

In [5]:
date_field = 'create_date'

# Calculate the start date of the past month
end_date = datetime.now()
start_date = end_date - timedelta(days=30)

# Format dates in a way Elasticsearch expects
start_date_str = start_date.strftime('%Y-%m-%d %H:%M:%S')
end_date_str = end_date.strftime('%Y-%m-%d %H:%M:%S')

# Elasticsearch query to filter data from the past month
query = {
    "query": {
        "range": {
            date_field: {  # Make sure this field name matches your date field in Elasticsearch
                "gte": start_date_str,
                "lt": end_date_str
            }
        }
    }
}

# Initialize scroll
scroll = '2m'  # Keep the scroll context alive for 2 minutes
data = []  # To hold all the documents

# Perform the initial search
response = es.search(index=source_index, body=query, scroll=scroll, size=1000)
scroll_id = response['_scroll_id']

# Fetch subsequent batches of results
while True:
    # Get the next batch of documents
    response = es.scroll(scroll_id=scroll_id, scroll=scroll)
    
    # Break out of the loop when no more documents are returned
    if not response['hits']['hits']:
        break
    
    # Add the documents from this batch to our list
    data.extend([hit["_source"] for hit in response['hits']['hits']])
    
    # Update the scroll ID for the next scroll request
    scroll_id = response['_scroll_id']

# Close the scroll context
es.clear_scroll(scroll_id=scroll_id)

# Convert to Pandas DataFrame
df = pd.DataFrame(data)


  response = es.search(index=source_index, body=query, scroll=scroll, size=1000)


KeyboardInterrupt: 

In [None]:
df

In [None]:
from ydata_profiling import ProfileReport

# Assuming 'df' is your DataFrame
profile = ProfileReport(df, title="Pandas Profiling Report", explorative=True)

# To save the report to a file
profile.to_file("your_report.html")

In [None]:
df.columns

In [None]:
df.info()

In [31]:
date_field = 'create_date'

# Calculate the start date of the past month
end_date = datetime.now()
start_date = end_date - timedelta(days=30)

# Format dates in a way Elasticsearch expects
start_date_str = start_date.strftime('%Y-%m-%d %H:%M:%S')
end_date_str = end_date.strftime('%Y-%m-%d %H:%M:%S')

# Elasticsearch query to filter data from the past month
query = {
    "query": {
        "range": {
            date_field: {  # Make sure this field name matches your date field in Elasticsearch
                "gte": start_date_str,
                "lt": end_date_str
            }
        }
    },
    "_source": [
    "description",
    "solution",
    "description_plain",
    "product_id",
    "ticket_type_id",
    "team_id",
    "team_level",
    "team",
    "is_alert",
    "stage_id",
    "stage_id_name",
    "handle_type",
    "current_duration",
    "message_total_count",
    "total_hours_spent",
    "cicore_id_name",
    "cicorfe_id",
    "cicore_id_name"]
}

# Initialize scroll
scroll = '2m'  # Keep the scroll context alive for 2 minutes
data = []  # To hold all the documents

# Perform the initial search
response = es.search(index=source_index, body=query, scroll=scroll, size=1000)
scroll_id = response['_scroll_id']

# Fetch subsequent batches of results
while True:
    # Get the next batch of documents
    response = es.scroll(scroll_id=scroll_id, scroll=scroll)
    
    # Break out of the loop when no more documents are returned
    if not response['hits']['hits']:
        break
    
    # Add the documents from this batch to our list
    data.extend([hit["_source"] for hit in response['hits']['hits']])
    
    # Update the scroll ID for the next scroll request
    scroll_id = response['_scroll_id']

# Close the scroll context
es.clear_scroll(scroll_id=scroll_id)

# Convert to Pandas DataFrame
df = pd.DataFrame(data)


  response = es.search(index=source_index, body=query, scroll=scroll, size=1000)


In [32]:
df.head(3)

Unnamed: 0,current_duration,message_total_count,team_id,solution,product_id,description_plain,stage_id,ticket_type_id,description,cicore_id_name,is_alert,team,stage_id_name,handle_type,team_level,total_hours_spent
0,0.0,6,1216.0,<p><br></p>,23061,"\n""As of 4/19/23, the Edelman Phishing triage ...",3.0,2.0,"<p>""As of 4/19/23, the Edelman Phishing triage...",SOC Incident Management Edelman,Alert,"{'team_category': 'Provider', 'id': 1216, 'tea...",Solved,Monitoring Ticket,Level 2,0.2
1,0.0,6,1216.0,<p><br></p>,23069,\n\n [EXTERNAL SENDER]\nThis message was se...,3.0,2.0,"<p style=""margin-bottom: 0px;""><br> [EXTERN...",SOC Incident Management Northside Hospital,Alert,"{'team_category': 'Provider', 'id': 1216, 'tea...",Solved,Monitoring Ticket,Level 2,0.17
2,0.0,8,2.0,<p><br></p>,0,\n [EXTERNAL SENDER]\n\n\nReview this incid...,3.0,5.0,"<p style=""border:1px; border-style:solid; bord...",,Alert,"{'team_category': 'Provider', 'id': 2, 'team_l...",Solved,Monitoring Ticket,Level 2,1.15


In [33]:
def clean_text(text):

    if not isinstance(text, str):
        text = str(text)

    from bs4 import BeautifulSoup
    soup = BeautifulSoup(text, "html.parser")
    text = soup.get_text(separator=" ")
    text = html.unescape(text)
    text = text.replace(u'\xa0', ' ')
    text = text.lower()
    text = re.sub(r'\s+', ' ', text)  # Replace multiple spaces with a single space
    text = re.sub(r'[^\w\s]', '', text)  # Remove punctuation and other non-word characters
    text = text.strip()  # Remove leading and trailing spaces
    return text


In [61]:
def safe_convert_to_string(x):
    try:
        # Attempt to convert to string
        result = str(x)
        # Check if the conversion results in an empty or undesirable string
        if result in ['', 'nan', '{}', '[]']:
            return 'Unknown'
        return result
    except:
        # In case of any error during conversion, return 'Unknown'
        return 'Unknown'


In [62]:
def process_column(data):
    data = clean_text(data)
    data = safe_convert_to_string(data)
    return data

In [10]:
def print_null_percentage(df):
    """
    This function takes a pandas DataFrame as input and prints the percentage of null
    values in each column, distinguishing between numeric and categorical data types.
    
    Args:
    df (pd.DataFrame): The DataFrame to analyze for null values.
    
    Returns:
    None: Outputs the percentage of null values to the console.
    """
    # Check total number of entries in the DataFrame
    total_rows = len(df)
    
    # Initialize lists to store results
    numeric_nulls = []
    categorical_nulls = []
    
    # Loop through each column in the DataFrame
    for column in df.columns:
        # Calculate percentage of null values
        null_count = df[column].isnull().sum()
        null_percentage = (null_count / total_rows) * 100
        
        # Check data type of the column
        if pd.api.types.is_numeric_dtype(df[column]):
            numeric_nulls.append(f"{column} (Numeric): {null_percentage:.2f}% Null Values")
        else:
            categorical_nulls.append(f"{column} (Categorical): {null_percentage:.2f}% Null Values")
    
    # Print the results
    print("Null Value Percentages by Column:")
    for info in numeric_nulls + categorical_nulls:
        print(info)

In [11]:
print_null_percentage(df)

Null Value Percentages by Column:
current_duration (Numeric): 0.00% Null Values
message_total_count (Numeric): 0.00% Null Values
team_id (Numeric): 1.51% Null Values
stage_id (Numeric): 0.12% Null Values
ticket_type_id (Numeric): 1.38% Null Values
total_hours_spent (Numeric): 0.00% Null Values
solution (Categorical): 1.24% Null Values
product_id (Categorical): 1.58% Null Values
description_plain (Categorical): 1.97% Null Values
description (Categorical): 1.18% Null Values
cicore_id_name (Categorical): 1.52% Null Values
is_alert (Categorical): 0.00% Null Values
team (Categorical): 1.51% Null Values
stage_id_name (Categorical): 0.12% Null Values
handle_type (Categorical): 0.00% Null Values
team_level (Categorical): 0.00% Null Values


In [36]:
columns_to_impute = ['team_id', 'stage_id', 'ticket_type_id','product_id', 'cicore_id_name', 'team','stage_id_name','handle_type','stage_id_name']

for column in columns_to_impute:
    df[column] = df[column].fillna(9999)

In [64]:
df = df.dropna(subset=['description', 'description_plain','solution'])
columns_to_process = ['description', 'description_plain', 'solution']
for col in columns_to_process:
    df[col] = df[col].apply(process_column)


In [65]:
df

Unnamed: 0,current_duration,message_total_count,team_id,solution,product_id,description_plain,stage_id,ticket_type_id,description,cicore_id_name,is_alert,stage_id_name,handle_type,team_level,total_hours_spent,team_category,id,team_level.1,owner_id_name
0,0.000000,6,1216.0,Unknown,23061,As of 41923 the Edelman Phishing triage process has been updated Reference the OneNote page Edelman Access Runbooks Phishing Tickets Guide for process instructions This ticket fires everyday at 8AM analyst who claims ticket is responsible for working Edelman phishing tickets for 30 minutes per ticket Do not worry about completing as many as possible or trying to empty out the queue,3.0,2.0,As of 41923 the Edelman Phishing triage process has been updated Reference the OneNote page Edelman Access Runbooks Phishing Tickets Guide for process instructions This ticket fires everyday at 8AM analyst who claims ticket is responsible for working Edelman phishing tickets for 30 minutes per ticket Do not worry about completing as many as possible or trying to empty out the queue,SOC Incident Management Edelman,Alert,Solved,Monitoring Ticket,Level 2,0.20,Provider,1216.0,Level 2,"Swisher, Ryan | ExactlyIT Inc"
1,0.000000,6,1216.0,Unknown,23069,EXTERNAL SENDER This message was sent to you automatically by Exabeam on the following high risk event by an asset High risk session by asset mish1aysnfwcb with a risk score of 160 Asset Name mish1aysnfwcb Asset IP 45594010 Top user for this asset Ryan Dockery nsf_train1 Sydney Pobee Radiology iis Session Start Time June 10 2024 1200AM GMT Session End Time June 11 2024 1200AM GMT Top Risk Reason A security alert is associated with the asset This is the first occurrence of this security alert name on this asset This is a SOC Alert for Northside Hospital Exabeam Advanced Analytics SIEM n EXTERNAL SENDER This message was sent to you automatically by Exabeam on the following high risk event by an asset High risk session by asset bokzt8kps with a risk score of 150 Asset Name bokzt8kps Asset IP 192168147140 Top user for this asset Tamara Onley Catalina Stephen Roberto Cimini Session Start Time June 10 2024 1200AM GMT Session End Time June 11 2024 1200AM GMT Top Risk Reason A security alert is associated with the asset This is a SOC Alert for Northside Hospital Exabeam Advanced Analytics SIEM,3.0,2.0,EXTERNAL SENDER This message was sent to you automatically by Exabeam on the following high risk event by an asset High risk session by asset mish1aysnfwcb with a risk score of 160 Asset Name mish1aysnfwcb Asset IP 45594010 Top user for this asset Ryan Dockery nsf_train1 Sydney Pobee Radiology iis Session Start Time June 10 2024 1200AM GMT Session End Time June 11 2024 1200AM GMT Top Risk Reason A security alert is associated with the asset This is the first occurrence of this security alert name on this asset This is a SOC Alert for Northside Hospital Exabeam Advanced Analytics SIEM n EXTERNAL SENDER This message was sent to you automatically by Exabeam on the following high risk event by an asset High risk session by asset bokzt8kps with a risk score of 150 Asset Name bokzt8kps Asset IP 192168147140 Top user for this asset Tamara Onley Catalina Stephen Roberto Cimini Session Start Time June 10 2024 1200AM GMT Session End Time June 11 2024 1200AM GMT Top Risk Reason A security alert is associated with the asset This is a SOC Alert for Northside Hospital Exabeam Advanced Analytics SIEM,SOC Incident Management Northside Hospital,Alert,Solved,Monitoring Ticket,Level 2,0.17,Provider,1216.0,Level 2,"Swisher, Ryan | ExactlyIT Inc"
2,0.000000,8,2.0,Unknown,0,EXTERNAL SENDER Review this incident Microsoft 365 Defender has detected a security threat in your environment View incident details ID 8610 Incident name User requested to release a quarantined message Severity Informational Categories InitialAccess Time June 10 2024 742 UTC Incident page httpssecuritymicrosoftcomincidentsbyalertalertIdfa7b05fb3fa749cfab480008dc89208864sourceincidentemailtidca84dc0c3fcb438685adfbb73fbfded4 Account information Organization name Gilson Inc Privacy Statement Microsoft Corporation One Microsoft Way Redmond WA 98052,3.0,5.0,EXTERNAL SENDER Review this incident Microsoft 365 Defender has detected a security threat in your environment View incident details ID 8610 Incident name User requested to release a quarantined message Severity Informational Categories InitialAccess Time June 10 2024 742 UTC Incident page httpssecuritymicrosoftcomincidentsbyalertalertIdfa7b05fb3fa749cfab480008dc89208864sourceincidentemailtidca84dc0c3fcb438685adfbb73fbfded4 Account information Organization name Gilson Inc Privacy Statement Microsoft Corporation One Microsoft Way Redmond WA 98052,,Alert,Solved,Monitoring Ticket,Level 2,1.15,Provider,2.0,Level 2,"Lovelace, Jimmy_EIT | ExactlyIT Inc"
3,0.000000,6,1216.0,Unknown,23061,THE PROCEDURE FOR THIS TICKET WAS UPDATED ON 8102023 REFERENCE THE GUIDE LOCATED IN THE SOC ONENOTE EDELMAN SPYWARE CALLBACK CHECK,3.0,2.0,THE PROCEDURE FOR THIS TICKET WAS UPDATED ON 8102023 REFERENCE THE GUIDE LOCATED IN THE SOC ONENOTE EDELMAN SPYWARE CALLBACK CHECK,SOC Incident Management Edelman,Alert,Solved,Monitoring Ticket,Level 2,0.08,Provider,1216.0,Level 2,"Swisher, Ryan | ExactlyIT Inc"
4,0.000000,8,1134.0,Unknown,0,Alert Streaming Latency Breached Threshold Streaming latency of 25155 milliseconds between device JPA2327P0MA and CloudVision is greater than the threshold of 10000 milliseconds Q3YJHDS2NPGQZI Severity warning Integration name IP4GCloudvision,3.0,5.0,Alert Streaming Latency Breached Threshold Streaming latency of 25155 milliseconds between device JPA2327P0MA and CloudVision is greater than the threshold of 10000 milliseconds Q3YJHDS2NPGQZI Severity warning Integration name IP4GCloudvision,,Alert,Solved,Monitoring Ticket,Level 2,0.00,Provider,1134.0,Level 2,"Redish, Graham_EIT | ExactlyIT Inc"
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
23135,8.683333,6,1155.0,Based on previous records no actions required ticket will be closed as informative,22061,This message is a warning that an email you are trying to send has not yet been delivered You do not have to do anything yet as the email is still queued for delivery An email you addressed to email address dpratapuladominoinvacarecontractorcom has not yet been delivered The problem appears to be Recipient server unavailable or busy Additional information follows Connection timed out Connection timed out This condition occurred after 6 attempts to deliver over a period of 1 hours You will receive a rejection notice after a maximum of 4 days if the email cannot be delivered If you sent the email to multiple recipients you will receive one of these messages for each one which is still undelivered otherwise they have been sent,562.0,6.0,This message is a warning that an email you are trying to send has not yet been delivered You do not have to do anything yet as the email is still queued for delivery An email you addressed to email address dpratapuladominoinvacarecontractorcom has not yet been delivered The problem appears to be Recipient server unavailable or busy Additional information follows Connection timed out Connection timed out This condition occurred after 6 attempts to deliver over a period of 1 hours You will receive a rejection notice after a maximum of 4 days if the email cannot be delivered If you sent the email to multiple recipients you will receive one of these messages for each one which is still undelivered otherwise they have been sent,IVC - Other,Others,Solved,Service Desk handled,Level 1,0.00,Provider,1155.0,Level 1,"Jacome, Roberto_EIT | ExactlyIT Inc"
23136,2.600000,30,1155.0,Unknown,22061,Other Information ___________ Application Type IVC ESKER on Demand Request Type IVCInstallation Leah will need Esker application to approve invoices while I am out of office next week,558.0,9.0,Other Information ___________ Application Type IVC ESKER on Demand Request Type IVCInstallation Leah will need Esker application to approve invoices while I am out of office next week,IVC - ESKER on Demand,Others,With Vendor,Service Desk handled,Level 1,0.00,Provider,1155.0,Level 1,"Jacome, Roberto_EIT | ExactlyIT Inc"
23137,2.833333,72,373.0,Unknown,12552,Original title Failed Connectivity Description Alert Nable Tom Barrow TOM General TBCORLANDO2 Failed Connectivity Severity Failed Customer Tom Barrow TOM General Device Name TBCORLANDO2 AlertLookupConnectivityFailed Device IP 192168416 Status URL Integration Name Alert Code IOC000 Connectivity Notes No device note for device TBCORLANDO2 To remotely access this device click the following link httpsncod504nablecom443deepLinkActiondomethoddeviceRCcustomerID2060deviceID203683526languageen_US Device Property Device Description Network device discovered using Asset Discovery 203683526 Device Property Enviroment Device Description Enviroment Network device discovered using Asset Discovery 203683526 Issue At 20240621 195827 the Connectivity service transitioned from a Warning state to a Failed state Here are the details of the Connectivity service Packet Loss 1000 Time To Live 12400 Hops Average Round Trip Time 43000 msec DNS Resolution True,3.0,3.0,Original title Failed Connectivity Description Alert Nable Tom Barrow TOM General TBCORLANDO2 Failed Connectivity Severity Failed Customer Tom Barrow TOM General Device Name TBCORLANDO2 AlertLookupConnectivityFailed Device IP 192168416 Status URL Integration Name Alert Code IOC000 Connectivity Notes No device note for device TBCORLANDO2 To remotely access this device click the following link httpsncod504nablecom443deepLinkActiondomethoddeviceRCcustomerID2060deviceID203683526languageen_US Device Property Device Description Network device discovered using Asset Discovery 203683526 Device Property Enviroment Device Description Enviroment Network device discovered using Asset Discovery 203683526 Issue At 20240621 195827 the Connectivity service transitioned from a Warning state to a Failed state Here are the details of the Connectivity service Packet Loss 1000 Time To Live 12400 Hops Average Round Trip Time 43000 msec DNS Resolution True,,Alert,Solved,Monitoring Ticket,Level 2,0.67,Provider,373.0,Level 2,"Gaona, Esaul_EIT | ExactlyIT Inc"
23138,2.400000,171,373.0,Unknown,12552,Original title Failed Connectivity Description Alert Nable Tom Barrow TOM General TBCSAV1 Failed Connectivity Severity Failed Customer Tom Barrow TOM General Device Name TBCSAV1 AlertLookupConnectivityFailed Device IP 192168918 Status URL Integration Name Alert Code IOC000 Connectivity Notes No device note for device TBCSAV1 To remotely access this device click the following link httpsncod504nablecom443deepLinkActiondomethoddeviceRCcustomerID2060deviceID1476467970languageen_US Device Property Device Description Network device discovered using Asset Discovery 1476467970 Device Property Enviroment Device Description Enviroment Network device discovered using Asset Discovery 1476467970 Issue At 20240621 200942 the Connectivity service transitioned from a Normal state to a Failed state Here are the details of the Connectivity service Packet Loss 3000 Time To Live 12400 Hops Average Round Trip Time 19500 msec DNS Resolution True,3.0,3.0,Original title Failed Connectivity Description Alert Nable Tom Barrow TOM General TBCSAV1 Failed Connectivity Severity Failed Customer Tom Barrow TOM General Device Name TBCSAV1 AlertLookupConnectivityFailed Device IP 192168918 Status URL Integration Name Alert Code IOC000 Connectivity Notes No device note for device TBCSAV1 To remotely access this device click the following link httpsncod504nablecom443deepLinkActiondomethoddeviceRCcustomerID2060deviceID1476467970languageen_US Device Property Device Description Network device discovered using Asset Discovery 1476467970 Device Property Enviroment Device Description Enviroment Network device discovered using Asset Discovery 1476467970 Issue At 20240621 200942 the Connectivity service transitioned from a Normal state to a Failed state Here are the details of the Connectivity service Packet Loss 3000 Time To Live 12400 Hops Average Round Trip Time 19500 msec DNS Resolution True,,Alert,Solved,Monitoring Ticket,Level 2,1.17,Provider,373.0,Level 2,"Gaona, Esaul_EIT | ExactlyIT Inc"


In [41]:
df.team

0              {'team_category': 'Provider', 'id': 1216, 'team_level': 'Level 2', 'owner_id_name': 'Swisher, Ryan | ExactlyIT Inc'}
1              {'team_category': 'Provider', 'id': 1216, 'team_level': 'Level 2', 'owner_id_name': 'Swisher, Ryan | ExactlyIT Inc'}
2           {'team_category': 'Provider', 'id': 2, 'team_level': 'Level 2', 'owner_id_name': 'Lovelace, Jimmy_EIT | ExactlyIT Inc'}
3              {'team_category': 'Provider', 'id': 1216, 'team_level': 'Level 2', 'owner_id_name': 'Swisher, Ryan | ExactlyIT Inc'}
4         {'team_category': 'Provider', 'id': 1134, 'team_level': 'Level 2', 'owner_id_name': 'Redish, Graham_EIT | ExactlyIT Inc'}
                                                                    ...                                                            
23135    {'team_category': 'Provider', 'id': 1155, 'team_level': 'Level 1', 'owner_id_name': 'Jacome, Roberto_EIT | ExactlyIT Inc'}
23136    {'team_category': 'Provider', 'id': 1155, 'team_level': 'Level 1', 

In [42]:
df = pd.concat([df.drop('team', axis=1), df['team'].apply(pd.Series)], axis=1).drop(columns=[0]) 

In [45]:
df.replace(' ', 'unknown', inplace=True)
df

Unnamed: 0,current_duration,message_total_count,team_id,solution,product_id,description_plain,stage_id,ticket_type_id,description,cicore_id_name,is_alert,stage_id_name,handle_type,team_level,total_hours_spent,team_category,id,team_level.1,owner_id_name
0,0.000000,6,1216.0,,23061,As of 41923 the Edelman Phishing triage process has been updated Reference the OneNote page Edelman Access Runbooks Phishing Tickets Guide for process instructions This ticket fires everyday at 8AM analyst who claims ticket is responsible for working Edelman phishing tickets for 30 minutes per ticket Do not worry about completing as many as possible or trying to empty out the queue,3.0,2.0,As of 41923 the Edelman Phishing triage process has been updated Reference the OneNote page Edelman Access Runbooks Phishing Tickets Guide for process instructions This ticket fires everyday at 8AM analyst who claims ticket is responsible for working Edelman phishing tickets for 30 minutes per ticket Do not worry about completing as many as possible or trying to empty out the queue,SOC Incident Management Edelman,Alert,Solved,Monitoring Ticket,Level 2,0.20,Provider,1216.0,Level 2,"Swisher, Ryan | ExactlyIT Inc"
1,0.000000,6,1216.0,,23069,EXTERNAL SENDER This message was sent to you automatically by Exabeam on the following high risk event by an asset High risk session by asset mish1aysnfwcb with a risk score of 160 Asset Name mish1aysnfwcb Asset IP 45594010 Top user for this asset Ryan Dockery nsf_train1 Sydney Pobee Radiology iis Session Start Time June 10 2024 1200AM GMT Session End Time June 11 2024 1200AM GMT Top Risk Reason A security alert is associated with the asset This is the first occurrence of this security alert name on this asset This is a SOC Alert for Northside Hospital Exabeam Advanced Analytics SIEM n EXTERNAL SENDER This message was sent to you automatically by Exabeam on the following high risk event by an asset High risk session by asset bokzt8kps with a risk score of 150 Asset Name bokzt8kps Asset IP 192168147140 Top user for this asset Tamara Onley Catalina Stephen Roberto Cimini Session Start Time June 10 2024 1200AM GMT Session End Time June 11 2024 1200AM GMT Top Risk Reason A security alert is associated with the asset This is a SOC Alert for Northside Hospital Exabeam Advanced Analytics SIEM,3.0,2.0,EXTERNAL SENDER This message was sent to you automatically by Exabeam on the following high risk event by an asset High risk session by asset mish1aysnfwcb with a risk score of 160 Asset Name mish1aysnfwcb Asset IP 45594010 Top user for this asset Ryan Dockery nsf_train1 Sydney Pobee Radiology iis Session Start Time June 10 2024 1200AM GMT Session End Time June 11 2024 1200AM GMT Top Risk Reason A security alert is associated with the asset This is the first occurrence of this security alert name on this asset This is a SOC Alert for Northside Hospital Exabeam Advanced Analytics SIEM n EXTERNAL SENDER This message was sent to you automatically by Exabeam on the following high risk event by an asset High risk session by asset bokzt8kps with a risk score of 150 Asset Name bokzt8kps Asset IP 192168147140 Top user for this asset Tamara Onley Catalina Stephen Roberto Cimini Session Start Time June 10 2024 1200AM GMT Session End Time June 11 2024 1200AM GMT Top Risk Reason A security alert is associated with the asset This is a SOC Alert for Northside Hospital Exabeam Advanced Analytics SIEM,SOC Incident Management Northside Hospital,Alert,Solved,Monitoring Ticket,Level 2,0.17,Provider,1216.0,Level 2,"Swisher, Ryan | ExactlyIT Inc"
2,0.000000,8,2.0,,0,EXTERNAL SENDER Review this incident Microsoft 365 Defender has detected a security threat in your environment View incident details ID 8610 Incident name User requested to release a quarantined message Severity Informational Categories InitialAccess Time June 10 2024 742 UTC Incident page httpssecuritymicrosoftcomincidentsbyalertalertIdfa7b05fb3fa749cfab480008dc89208864sourceincidentemailtidca84dc0c3fcb438685adfbb73fbfded4 Account information Organization name Gilson Inc Privacy Statement Microsoft Corporation One Microsoft Way Redmond WA 98052,3.0,5.0,EXTERNAL SENDER Review this incident Microsoft 365 Defender has detected a security threat in your environment View incident details ID 8610 Incident name User requested to release a quarantined message Severity Informational Categories InitialAccess Time June 10 2024 742 UTC Incident page httpssecuritymicrosoftcomincidentsbyalertalertIdfa7b05fb3fa749cfab480008dc89208864sourceincidentemailtidca84dc0c3fcb438685adfbb73fbfded4 Account information Organization name Gilson Inc Privacy Statement Microsoft Corporation One Microsoft Way Redmond WA 98052,,Alert,Solved,Monitoring Ticket,Level 2,1.15,Provider,2.0,Level 2,"Lovelace, Jimmy_EIT | ExactlyIT Inc"
3,0.000000,6,1216.0,,23061,THE PROCEDURE FOR THIS TICKET WAS UPDATED ON 8102023 REFERENCE THE GUIDE LOCATED IN THE SOC ONENOTE EDELMAN SPYWARE CALLBACK CHECK,3.0,2.0,THE PROCEDURE FOR THIS TICKET WAS UPDATED ON 8102023 REFERENCE THE GUIDE LOCATED IN THE SOC ONENOTE EDELMAN SPYWARE CALLBACK CHECK,SOC Incident Management Edelman,Alert,Solved,Monitoring Ticket,Level 2,0.08,Provider,1216.0,Level 2,"Swisher, Ryan | ExactlyIT Inc"
4,0.000000,8,1134.0,,0,Alert Streaming Latency Breached Threshold Streaming latency of 25155 milliseconds between device JPA2327P0MA and CloudVision is greater than the threshold of 10000 milliseconds Q3YJHDS2NPGQZI Severity warning Integration name IP4GCloudvision,3.0,5.0,Alert Streaming Latency Breached Threshold Streaming latency of 25155 milliseconds between device JPA2327P0MA and CloudVision is greater than the threshold of 10000 milliseconds Q3YJHDS2NPGQZI Severity warning Integration name IP4GCloudvision,,Alert,Solved,Monitoring Ticket,Level 2,0.00,Provider,1134.0,Level 2,"Redish, Graham_EIT | ExactlyIT Inc"
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
23135,8.683333,6,1155.0,Based on previous records no actions required ticket will be closed as informative,22061,This message is a warning that an email you are trying to send has not yet been delivered You do not have to do anything yet as the email is still queued for delivery An email you addressed to email address dpratapuladominoinvacarecontractorcom has not yet been delivered The problem appears to be Recipient server unavailable or busy Additional information follows Connection timed out Connection timed out This condition occurred after 6 attempts to deliver over a period of 1 hours You will receive a rejection notice after a maximum of 4 days if the email cannot be delivered If you sent the email to multiple recipients you will receive one of these messages for each one which is still undelivered otherwise they have been sent,562.0,6.0,This message is a warning that an email you are trying to send has not yet been delivered You do not have to do anything yet as the email is still queued for delivery An email you addressed to email address dpratapuladominoinvacarecontractorcom has not yet been delivered The problem appears to be Recipient server unavailable or busy Additional information follows Connection timed out Connection timed out This condition occurred after 6 attempts to deliver over a period of 1 hours You will receive a rejection notice after a maximum of 4 days if the email cannot be delivered If you sent the email to multiple recipients you will receive one of these messages for each one which is still undelivered otherwise they have been sent,IVC - Other,Others,Solved,Service Desk handled,Level 1,0.00,Provider,1155.0,Level 1,"Jacome, Roberto_EIT | ExactlyIT Inc"
23136,2.600000,30,1155.0,,22061,Other Information ___________ Application Type IVC ESKER on Demand Request Type IVCInstallation Leah will need Esker application to approve invoices while I am out of office next week,558.0,9.0,Other Information ___________ Application Type IVC ESKER on Demand Request Type IVCInstallation Leah will need Esker application to approve invoices while I am out of office next week,IVC - ESKER on Demand,Others,With Vendor,Service Desk handled,Level 1,0.00,Provider,1155.0,Level 1,"Jacome, Roberto_EIT | ExactlyIT Inc"
23137,2.833333,72,373.0,,12552,Original title Failed Connectivity Description Alert Nable Tom Barrow TOM General TBCORLANDO2 Failed Connectivity Severity Failed Customer Tom Barrow TOM General Device Name TBCORLANDO2 AlertLookupConnectivityFailed Device IP 192168416 Status URL Integration Name Alert Code IOC000 Connectivity Notes No device note for device TBCORLANDO2 To remotely access this device click the following link httpsncod504nablecom443deepLinkActiondomethoddeviceRCcustomerID2060deviceID203683526languageen_US Device Property Device Description Network device discovered using Asset Discovery 203683526 Device Property Enviroment Device Description Enviroment Network device discovered using Asset Discovery 203683526 Issue At 20240621 195827 the Connectivity service transitioned from a Warning state to a Failed state Here are the details of the Connectivity service Packet Loss 1000 Time To Live 12400 Hops Average Round Trip Time 43000 msec DNS Resolution True,3.0,3.0,Original title Failed Connectivity Description Alert Nable Tom Barrow TOM General TBCORLANDO2 Failed Connectivity Severity Failed Customer Tom Barrow TOM General Device Name TBCORLANDO2 AlertLookupConnectivityFailed Device IP 192168416 Status URL Integration Name Alert Code IOC000 Connectivity Notes No device note for device TBCORLANDO2 To remotely access this device click the following link httpsncod504nablecom443deepLinkActiondomethoddeviceRCcustomerID2060deviceID203683526languageen_US Device Property Device Description Network device discovered using Asset Discovery 203683526 Device Property Enviroment Device Description Enviroment Network device discovered using Asset Discovery 203683526 Issue At 20240621 195827 the Connectivity service transitioned from a Warning state to a Failed state Here are the details of the Connectivity service Packet Loss 1000 Time To Live 12400 Hops Average Round Trip Time 43000 msec DNS Resolution True,,Alert,Solved,Monitoring Ticket,Level 2,0.67,Provider,373.0,Level 2,"Gaona, Esaul_EIT | ExactlyIT Inc"
23138,2.400000,171,373.0,,12552,Original title Failed Connectivity Description Alert Nable Tom Barrow TOM General TBCSAV1 Failed Connectivity Severity Failed Customer Tom Barrow TOM General Device Name TBCSAV1 AlertLookupConnectivityFailed Device IP 192168918 Status URL Integration Name Alert Code IOC000 Connectivity Notes No device note for device TBCSAV1 To remotely access this device click the following link httpsncod504nablecom443deepLinkActiondomethoddeviceRCcustomerID2060deviceID1476467970languageen_US Device Property Device Description Network device discovered using Asset Discovery 1476467970 Device Property Enviroment Device Description Enviroment Network device discovered using Asset Discovery 1476467970 Issue At 20240621 200942 the Connectivity service transitioned from a Normal state to a Failed state Here are the details of the Connectivity service Packet Loss 3000 Time To Live 12400 Hops Average Round Trip Time 19500 msec DNS Resolution True,3.0,3.0,Original title Failed Connectivity Description Alert Nable Tom Barrow TOM General TBCSAV1 Failed Connectivity Severity Failed Customer Tom Barrow TOM General Device Name TBCSAV1 AlertLookupConnectivityFailed Device IP 192168918 Status URL Integration Name Alert Code IOC000 Connectivity Notes No device note for device TBCSAV1 To remotely access this device click the following link httpsncod504nablecom443deepLinkActiondomethoddeviceRCcustomerID2060deviceID1476467970languageen_US Device Property Device Description Network device discovered using Asset Discovery 1476467970 Device Property Enviroment Device Description Enviroment Network device discovered using Asset Discovery 1476467970 Issue At 20240621 200942 the Connectivity service transitioned from a Normal state to a Failed state Here are the details of the Connectivity service Packet Loss 3000 Time To Live 12400 Hops Average Round Trip Time 19500 msec DNS Resolution True,,Alert,Solved,Monitoring Ticket,Level 2,1.17,Provider,373.0,Level 2,"Gaona, Esaul_EIT | ExactlyIT Inc"


In [46]:
df.solution.isna().sum()

0

In [47]:
df.solution.isnull().sum()  

0

In [51]:
df = df[~df[['solution']].applymap(lambda x: isinstance(x, float)).any(axis=1)]

  df = df[~df[['solution']].applymap(lambda x: isinstance(x, float)).any(axis=1)]


In [60]:
df

Unnamed: 0,current_duration,message_total_count,team_id,solution,product_id,description_plain,stage_id,ticket_type_id,description,cicore_id_name,is_alert,stage_id_name,handle_type,team_level,total_hours_spent,team_category,id,team_level.1,owner_id_name
0,0.000000,6,1216.0,Unknown,23061,As of 41923 the Edelman Phishing triage process has been updated Reference the OneNote page Edelman Access Runbooks Phishing Tickets Guide for process instructions This ticket fires everyday at 8AM analyst who claims ticket is responsible for working Edelman phishing tickets for 30 minutes per ticket Do not worry about completing as many as possible or trying to empty out the queue,3.0,2.0,As of 41923 the Edelman Phishing triage process has been updated Reference the OneNote page Edelman Access Runbooks Phishing Tickets Guide for process instructions This ticket fires everyday at 8AM analyst who claims ticket is responsible for working Edelman phishing tickets for 30 minutes per ticket Do not worry about completing as many as possible or trying to empty out the queue,SOC Incident Management Edelman,Alert,Solved,Monitoring Ticket,Level 2,0.20,Provider,1216.0,Level 2,"Swisher, Ryan | ExactlyIT Inc"
1,0.000000,6,1216.0,Unknown,23069,EXTERNAL SENDER This message was sent to you automatically by Exabeam on the following high risk event by an asset High risk session by asset mish1aysnfwcb with a risk score of 160 Asset Name mish1aysnfwcb Asset IP 45594010 Top user for this asset Ryan Dockery nsf_train1 Sydney Pobee Radiology iis Session Start Time June 10 2024 1200AM GMT Session End Time June 11 2024 1200AM GMT Top Risk Reason A security alert is associated with the asset This is the first occurrence of this security alert name on this asset This is a SOC Alert for Northside Hospital Exabeam Advanced Analytics SIEM n EXTERNAL SENDER This message was sent to you automatically by Exabeam on the following high risk event by an asset High risk session by asset bokzt8kps with a risk score of 150 Asset Name bokzt8kps Asset IP 192168147140 Top user for this asset Tamara Onley Catalina Stephen Roberto Cimini Session Start Time June 10 2024 1200AM GMT Session End Time June 11 2024 1200AM GMT Top Risk Reason A security alert is associated with the asset This is a SOC Alert for Northside Hospital Exabeam Advanced Analytics SIEM,3.0,2.0,EXTERNAL SENDER This message was sent to you automatically by Exabeam on the following high risk event by an asset High risk session by asset mish1aysnfwcb with a risk score of 160 Asset Name mish1aysnfwcb Asset IP 45594010 Top user for this asset Ryan Dockery nsf_train1 Sydney Pobee Radiology iis Session Start Time June 10 2024 1200AM GMT Session End Time June 11 2024 1200AM GMT Top Risk Reason A security alert is associated with the asset This is the first occurrence of this security alert name on this asset This is a SOC Alert for Northside Hospital Exabeam Advanced Analytics SIEM n EXTERNAL SENDER This message was sent to you automatically by Exabeam on the following high risk event by an asset High risk session by asset bokzt8kps with a risk score of 150 Asset Name bokzt8kps Asset IP 192168147140 Top user for this asset Tamara Onley Catalina Stephen Roberto Cimini Session Start Time June 10 2024 1200AM GMT Session End Time June 11 2024 1200AM GMT Top Risk Reason A security alert is associated with the asset This is a SOC Alert for Northside Hospital Exabeam Advanced Analytics SIEM,SOC Incident Management Northside Hospital,Alert,Solved,Monitoring Ticket,Level 2,0.17,Provider,1216.0,Level 2,"Swisher, Ryan | ExactlyIT Inc"
2,0.000000,8,2.0,Unknown,0,EXTERNAL SENDER Review this incident Microsoft 365 Defender has detected a security threat in your environment View incident details ID 8610 Incident name User requested to release a quarantined message Severity Informational Categories InitialAccess Time June 10 2024 742 UTC Incident page httpssecuritymicrosoftcomincidentsbyalertalertIdfa7b05fb3fa749cfab480008dc89208864sourceincidentemailtidca84dc0c3fcb438685adfbb73fbfded4 Account information Organization name Gilson Inc Privacy Statement Microsoft Corporation One Microsoft Way Redmond WA 98052,3.0,5.0,EXTERNAL SENDER Review this incident Microsoft 365 Defender has detected a security threat in your environment View incident details ID 8610 Incident name User requested to release a quarantined message Severity Informational Categories InitialAccess Time June 10 2024 742 UTC Incident page httpssecuritymicrosoftcomincidentsbyalertalertIdfa7b05fb3fa749cfab480008dc89208864sourceincidentemailtidca84dc0c3fcb438685adfbb73fbfded4 Account information Organization name Gilson Inc Privacy Statement Microsoft Corporation One Microsoft Way Redmond WA 98052,,Alert,Solved,Monitoring Ticket,Level 2,1.15,Provider,2.0,Level 2,"Lovelace, Jimmy_EIT | ExactlyIT Inc"
3,0.000000,6,1216.0,Unknown,23061,THE PROCEDURE FOR THIS TICKET WAS UPDATED ON 8102023 REFERENCE THE GUIDE LOCATED IN THE SOC ONENOTE EDELMAN SPYWARE CALLBACK CHECK,3.0,2.0,THE PROCEDURE FOR THIS TICKET WAS UPDATED ON 8102023 REFERENCE THE GUIDE LOCATED IN THE SOC ONENOTE EDELMAN SPYWARE CALLBACK CHECK,SOC Incident Management Edelman,Alert,Solved,Monitoring Ticket,Level 2,0.08,Provider,1216.0,Level 2,"Swisher, Ryan | ExactlyIT Inc"
4,0.000000,8,1134.0,Unknown,0,Alert Streaming Latency Breached Threshold Streaming latency of 25155 milliseconds between device JPA2327P0MA and CloudVision is greater than the threshold of 10000 milliseconds Q3YJHDS2NPGQZI Severity warning Integration name IP4GCloudvision,3.0,5.0,Alert Streaming Latency Breached Threshold Streaming latency of 25155 milliseconds between device JPA2327P0MA and CloudVision is greater than the threshold of 10000 milliseconds Q3YJHDS2NPGQZI Severity warning Integration name IP4GCloudvision,,Alert,Solved,Monitoring Ticket,Level 2,0.00,Provider,1134.0,Level 2,"Redish, Graham_EIT | ExactlyIT Inc"
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
23135,8.683333,6,1155.0,Based on previous records no actions required ticket will be closed as informative,22061,This message is a warning that an email you are trying to send has not yet been delivered You do not have to do anything yet as the email is still queued for delivery An email you addressed to email address dpratapuladominoinvacarecontractorcom has not yet been delivered The problem appears to be Recipient server unavailable or busy Additional information follows Connection timed out Connection timed out This condition occurred after 6 attempts to deliver over a period of 1 hours You will receive a rejection notice after a maximum of 4 days if the email cannot be delivered If you sent the email to multiple recipients you will receive one of these messages for each one which is still undelivered otherwise they have been sent,562.0,6.0,This message is a warning that an email you are trying to send has not yet been delivered You do not have to do anything yet as the email is still queued for delivery An email you addressed to email address dpratapuladominoinvacarecontractorcom has not yet been delivered The problem appears to be Recipient server unavailable or busy Additional information follows Connection timed out Connection timed out This condition occurred after 6 attempts to deliver over a period of 1 hours You will receive a rejection notice after a maximum of 4 days if the email cannot be delivered If you sent the email to multiple recipients you will receive one of these messages for each one which is still undelivered otherwise they have been sent,IVC - Other,Others,Solved,Service Desk handled,Level 1,0.00,Provider,1155.0,Level 1,"Jacome, Roberto_EIT | ExactlyIT Inc"
23136,2.600000,30,1155.0,Unknown,22061,Other Information ___________ Application Type IVC ESKER on Demand Request Type IVCInstallation Leah will need Esker application to approve invoices while I am out of office next week,558.0,9.0,Other Information ___________ Application Type IVC ESKER on Demand Request Type IVCInstallation Leah will need Esker application to approve invoices while I am out of office next week,IVC - ESKER on Demand,Others,With Vendor,Service Desk handled,Level 1,0.00,Provider,1155.0,Level 1,"Jacome, Roberto_EIT | ExactlyIT Inc"
23137,2.833333,72,373.0,Unknown,12552,Original title Failed Connectivity Description Alert Nable Tom Barrow TOM General TBCORLANDO2 Failed Connectivity Severity Failed Customer Tom Barrow TOM General Device Name TBCORLANDO2 AlertLookupConnectivityFailed Device IP 192168416 Status URL Integration Name Alert Code IOC000 Connectivity Notes No device note for device TBCORLANDO2 To remotely access this device click the following link httpsncod504nablecom443deepLinkActiondomethoddeviceRCcustomerID2060deviceID203683526languageen_US Device Property Device Description Network device discovered using Asset Discovery 203683526 Device Property Enviroment Device Description Enviroment Network device discovered using Asset Discovery 203683526 Issue At 20240621 195827 the Connectivity service transitioned from a Warning state to a Failed state Here are the details of the Connectivity service Packet Loss 1000 Time To Live 12400 Hops Average Round Trip Time 43000 msec DNS Resolution True,3.0,3.0,Original title Failed Connectivity Description Alert Nable Tom Barrow TOM General TBCORLANDO2 Failed Connectivity Severity Failed Customer Tom Barrow TOM General Device Name TBCORLANDO2 AlertLookupConnectivityFailed Device IP 192168416 Status URL Integration Name Alert Code IOC000 Connectivity Notes No device note for device TBCORLANDO2 To remotely access this device click the following link httpsncod504nablecom443deepLinkActiondomethoddeviceRCcustomerID2060deviceID203683526languageen_US Device Property Device Description Network device discovered using Asset Discovery 203683526 Device Property Enviroment Device Description Enviroment Network device discovered using Asset Discovery 203683526 Issue At 20240621 195827 the Connectivity service transitioned from a Warning state to a Failed state Here are the details of the Connectivity service Packet Loss 1000 Time To Live 12400 Hops Average Round Trip Time 43000 msec DNS Resolution True,,Alert,Solved,Monitoring Ticket,Level 2,0.67,Provider,373.0,Level 2,"Gaona, Esaul_EIT | ExactlyIT Inc"
23138,2.400000,171,373.0,Unknown,12552,Original title Failed Connectivity Description Alert Nable Tom Barrow TOM General TBCSAV1 Failed Connectivity Severity Failed Customer Tom Barrow TOM General Device Name TBCSAV1 AlertLookupConnectivityFailed Device IP 192168918 Status URL Integration Name Alert Code IOC000 Connectivity Notes No device note for device TBCSAV1 To remotely access this device click the following link httpsncod504nablecom443deepLinkActiondomethoddeviceRCcustomerID2060deviceID1476467970languageen_US Device Property Device Description Network device discovered using Asset Discovery 1476467970 Device Property Enviroment Device Description Enviroment Network device discovered using Asset Discovery 1476467970 Issue At 20240621 200942 the Connectivity service transitioned from a Normal state to a Failed state Here are the details of the Connectivity service Packet Loss 3000 Time To Live 12400 Hops Average Round Trip Time 19500 msec DNS Resolution True,3.0,3.0,Original title Failed Connectivity Description Alert Nable Tom Barrow TOM General TBCSAV1 Failed Connectivity Severity Failed Customer Tom Barrow TOM General Device Name TBCSAV1 AlertLookupConnectivityFailed Device IP 192168918 Status URL Integration Name Alert Code IOC000 Connectivity Notes No device note for device TBCSAV1 To remotely access this device click the following link httpsncod504nablecom443deepLinkActiondomethoddeviceRCcustomerID2060deviceID1476467970languageen_US Device Property Device Description Network device discovered using Asset Discovery 1476467970 Device Property Enviroment Device Description Enviroment Network device discovered using Asset Discovery 1476467970 Issue At 20240621 200942 the Connectivity service transitioned from a Normal state to a Failed state Here are the details of the Connectivity service Packet Loss 3000 Time To Live 12400 Hops Average Round Trip Time 19500 msec DNS Resolution True,,Alert,Solved,Monitoring Ticket,Level 2,1.17,Provider,373.0,Level 2,"Gaona, Esaul_EIT | ExactlyIT Inc"
