# Threat Hunting - Data Science

## Splunk REST API - JupyterNotebook Integration

### Basic Info Check

In [None]:
#!python3 --version
#!curl ipinfo.io/ip

### Imports

In [None]:
import os, sys, time

import pandas as pd

import json, boto3, requests, warnings

warnings.filterwarnings("ignore")

from lxml.etree import fromstring
import xml.etree.ElementTree as ET

### Floating Data Precision

In [None]:
pd.options.display.float_format = '{:.4f}'.format

### Splunk Secret From AWS Secrets Manager - replace "xxxxxxxxxx" with splunk secret arn

In [None]:
secrets_client = boto3.client('secretsmanager')
secret_arn = 'xxxxxxxxxxxxxxxxxxxx'
auth_token = secrets_client.get_secret_value(SecretId=secret_arn).get('SecretString')
auth_token = json.loads(auth_token)

### replace "xxxxxxxxxx" with splunk username [secret stored in aws secret manager as username:password]

In [None]:
username = "xxxxxxxxxxxxxxxxxxxx"
password = auth_token.get("xxxxxxxxxxxxxxxxxxxx")

### Splunk Search Job, replace "xxxxxxxxxx" with splunk instance ip
### replace "xxxxxxxxxx" with splunk search \`macro\` or splunk search as [search index=*]

In [None]:
url = "https://xxxxxxxxxxxxxxxxxxxx:8089/services/search/jobs"

data = {
    'search': 'xxxxxxxxxxxxxxxxxxxx',
    'id': 'xxxxxxxxxx', # enter custom search id
    'max_count': '50000', # api limit can be increased with splunk config file on splunk instance
    'output_mode':'csv'
}

response = requests.post(url, data=data, verify=False, auth=(username, password))

### Splunk Search Job Control

In [None]:
#sid = 'xxxxxxxxxx' # your custom search id

#control_url = url + '/' + sid + '/control'

#data = {'action': 'pause'}

#response = requests.post(control_url, data=data, verify=False, auth=(username, password))

### Splunk Search Job Status

In [None]:
sid = 'xxxxxxxxxx' # your custom search id

status_url = url + '/' + sid

response = requests.get(status_url, verify=False, auth=(username, password))

In [None]:
with open('SearchJobStatus.xml','w') as file:
    file.write(response.text)

### wait and re run until this cell print "Search Completed !!"

In [None]:
tree = ET.parse('SearchJobStatus.xml')
root = tree.getroot()
for child in root:
    if child.tag == "{http://www.w3.org/2005/Atom}content":
                print('Query Status :',child[0][19].text)
                if (child[0][19].text) == "1":
                    print("\nSearch Completed !!")
                break

### Splunk Search Results, replace "xxxxxxxxxx" with splunk instance ip

In [None]:
sid = 'xxxxxxxxxx' # your custom search id

results_url = "https://xxxxxxxxxxxxxxxxxxxx:8089/services/search/jobs/" + sid + "/results"

params = (
    ('count', '0'), # get number of results from splunk
    ('output_mode', 'csv')
)

response = requests.get(results_url, params=params, verify=False, auth=(username, password))

In [None]:
with open("spl_xxxxxxxxxx.csv","w") as file: # file name to save
    file.write(response.text)

### Pandas DataFrame Basic

In [None]:
df_splunk = pd.read_csv('spl_xxxxxxxxxx.csv') # file name to read

In [None]:
df_splunk.describe()

In [None]:
df_splunk.info()

In [None]:
df_splunk.columns

### if you are getting error while reading data from file to pandas datafram, try this

In [None]:
df_splunk = pd.read_csv("spl_xxxxxxxxxx.csv", encoding='iso-8859-1', engine='python', warn_bad_lines=False, error_bad_lines=False)

### change time format - enter your time column name

In [None]:
df_splunk['time - column - name'] = pd.to_datetime(df_splunk['time - column - name'], infer_datetime_format=True)