# Please read: 

Somewhat automated notebook for Citrix NetScaler Analysis. Intended only as a starting point to help automate some of the analysis.

- Fill in the 'Configuration' section below then run the notebook
- Run the notebook "Kernel -> Restart & Run All"  
- You should see a 'Results.xlsx' generated in the directory you specified as your 'ANALYSISPATH'. 

# Import Libraries 

In [None]:
from pathlib import Path
import pandas as pd
import logging
import gzip
import shutil
import re
import base64
from datetime import datetime
from pyparsing import Word, alphas, Suppress, Combine, nums, string, Regex, Optional

In [None]:
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', -1)
pd.set_option('display.expand_frame_repr', False)
pd.set_option('max_colwidth', -1)

In [None]:
logging.basicConfig(
    level = logging.DEBUG,
    format = '%(asctime)s - %(levelname)s - %(message)s',
    handlers = [logging.StreamHandler()])

# Configuration 

Fill this in carefully and then you should be able to run Kernel -> Restart and Run all and get an excel output with an overview of the relevant artifacts. Feel free to dig into the notebook itself and do some of your own searches. This notebook is meant only as a starting point.

In [None]:
# Use forward slashes for the paths

# Path where you've saved the netscaler logs (e.g. Workspace/Hostname/log)
# Usually found in /var/log
LOGPATH = Path('')

# LEAVE BLANK IF YOU HAVE NO TEMPLATES
# Path to where you've saved the recovered netscaler template XMLs (e.g. Workspace/Hostname/templates). 
# Usually found in:
# /netscaler/portal/templates
# /var/tmp/netscaler/portal/templates
TEMPLATEPATH = Path('')
# Secondary template location
# Usually found in:
# /var/vpn/bookmark
BOOKMARKPATH = Path('')

# Path to analysis workspace 
# Logs will be extracted here - recommend nesting under dir for hostname and running notebook once per host)
# e.g. ( 'Workspace/Hostname' )
ANALYSISPATH = Path('')

# IOCs/common suspicious terms to search all logs (excluding sh.log) for - 
# some such as 'uname' throw a lot of false positives so be careful. 
# Main thing to look for is entries by 'nobody' user
searchFor = ['nobody'] #'find /netscaler/portal/scripts', 'exec', 'print', 'echo', 'crontab', '/portal']

# IOCs/ common suspicious terms to search for in HTTP error logs
http_error_terms = ['chmod:', 'curl:', 'bash:', 'rm:', 'mkdir:', '.init', '--:--:--', '.pl', 'vpn']

# IOCs/common suspicious terms to search sh.logs for
shSearchFor = ['nobody', 'find', 'crontab', 'mv', 'LDR'] 

# HTTPAccess column names
CNAMES = [
    'client_ip_address',
    'client_identity',
    'user_id',
    'time_received',
    'timezone',
    'request',
    'status_code',
    'size',
    'referer (sic)',
    'user_agent'
]

# Unzip logs 

In [None]:
# Ensure analysis path exists
ANALYZEDLOGPATH = ANALYSISPATH / 'analyzed-logs'
Path.mkdir(ANALYZEDLOGPATH, parents=True, exist_ok=True)

log_type_dict = {
    'httpaccess.log.*.gz': 'httpaccess.log',
    'httperror.log.*.gz': 'httperror.log',
    'maillog.*.gz': 'maillog',
    'messages.*.gz': 'messages',
    'nitro.log.*.gz': 'nitro.log',
    'ns.log.*.gz': 'ns.log',
    'nsvpn.log.*.gz': 'nsvpn.log',
    'auth.log.*.gz': 'auth.log',
    'cron.*.gz': 'cron',
    'bash.*.gz': 'bash.log',
    'notice.*.gz': 'notice.log',
    'sh.*.gz': 'sh.log'
}

for zipped_wildcard, unzipped_name in log_type_dict.items():
    for fp in LOGPATH.glob(zipped_wildcard):
        logging.info(f'Unzipping {fp}')
        with gzip.open(fp, 'rb') as f_in:
            with open(ANALYZEDLOGPATH / f'{fp.name[:-3]}', 'wb') as f_out:
                logging.info(f'Writing to {ANALYZEDLOGPATH / fp.name[:-3]}')
                shutil.copyfileobj(f_in, f_out)

    with open(LOGPATH / f'{unzipped_name}', 'rb') as f_in:
        with open(ANALYZEDLOGPATH / f'{unzipped_name}', 'wb') as f_out:
            logging.info(f'Writing to {ANALYZEDLOGPATH / unzipped_name}')
            shutil.copyfileobj(f_in, f_out)
logging.info("Done!")

# HTTPAccess Analysis 

In [None]:
for fp in ANALYZEDLOGPATH.glob('httpaccess.*'):
    logging.info(f'Pre-processing {fp}')
    with open(fp, 'r', encoding='latin1') as f:
        lines = f.readlines()
    with open(ANALYZEDLOGPATH / f'processed-{fp.name}', 'w', encoding='latin1') as f:
        for line in lines:
            if 'logfile turned over due to size' not in line:
                f.write(line)
logging.info('Done!')

In [None]:
df = pd.DataFrame()
for fp in ANALYZEDLOGPATH.glob('processed-httpaccess.*'):
    logging.info(f'Opening {fp}')
    partial_df = pd.read_csv(fp,
                             names=CNAMES,
                             delim_whitespace=True, 
                             na_values='-',
                            )
    df = df.append(partial_df)
    df = df.fillna('-')
    df.time_received = df.time_received.str.strip('[')
    df.timezone = df.timezone.str.strip(']')
    df['timestamp'] = pd.to_datetime(df.time_received, format='%d/%b/%Y:%X')
logging.info('Done!')

In [None]:
vpnRequests = df[(df['request'].str.contains('/vpn/\.\./vpns'))]

In [None]:
xmls = vpnRequests[vpnRequests.request.str.contains('.xml')]

# HTTPError Analysis

In [None]:
http_error_df = pd.DataFrame(columns=['message'])
for fp in ANALYZEDLOGPATH.glob('httperror.*'):
    logging.info(f'Opening {fp}')
    with open(fp, 'r') as f:
        for line in f:
            if any([term in line for term in http_error_terms]):
                http_error_df = http_error_df.append({
                    'message': line
                }, ignore_index=True)
logging.info('Done!')

# Parse Payload XMLs 

In [None]:
import xml.etree.ElementTree as ET
import hashlib
payloadDict = {}
for pa in [TEMPLATEPATH, BOOKMARKPATH]:
    if pa != Path(''):
        for fp in pa.glob('*.xml'):
            logging.info(f'Parsing {fp}')
            with open(fp, 'r') as fin:
                data = fin.read()
                hex = hashlib.md5(data.encode()).hexdigest()
            try:
                root = ET.parse(fp).getroot()
                username = None
                desc = None
                title = None
                url = None
                UI_inuse = None
                username = root.get('username')
                for type_tag in root.findall('bookmarks/bookmark'):
                    desc = type_tag.get('descr')
                    title = type_tag.get('title')
                    url = type_tag.get('url')
                    UI_inuse = type_tag.get('UI_inuse')
                if fp.name not in payloadDict:
                    payloadDict[fp.name] = (hex, 
                                            pd.to_datetime(fp.stat().st_ctime, unit='s'), 
                                            pd.to_datetime(fp.stat().st_atime, unit='s'), 
                                            pd.to_datetime(fp.stat().st_mtime, unit='s'), 
                                            desc, 
                                            title, 
                                            url, 
                                            UI_inuse, 
                                            username)
            except:
                if fp.name not in payloadDict:
                    payloadDict[fp.name] = (hex, 
                                        pd.to_datetime(fp.stat().st_ctime, unit='s'), 
                                        pd.to_datetime(fp.stat().st_atime, unit='s'), 
                                        pd.to_datetime(fp.stat().st_mtime, unit='s'), 
                                        data, 
                                        None, 
                                        None, 
                                        None,
                                        None)
if payloadDict:
    payloadDf = pd.DataFrame(payloadDict).T.reset_index()
    payloadDf.columns = ['File', 'MD5', 'Created_Time', 'AccessedTime', 'ModifiedTime', 'Desc', 'Title', 'Url', 'UI_inuse', 'username']
    #payloadDf.Url = payloadDf.Url.str.replace('http', 'hxxp')
logging.info('Done!')

## Decode Encoded Payloads

In [None]:
b64pattern = re.compile("(?<=echo )[a-zA-Z0-9+\/]+={0,2}")
chrpattern = re.compile("(?<=readpipe\()(.*)(?=\)\'\})")
def decodePayload(x):
            matchedB64String = None
            matchedChrString = None
            if x['Desc']:
                matchedB64String = b64pattern.search(x['Desc'])
            if x['Title']:
                matchedChrString = chrpattern.search(x['Title'])

            if matchedB64String:
                return base64.b64decode(matchedB64String.group(0)).decode()
            elif matchedChrString:
                replacedString = matchedChrString.group(0)
                replacedString = replacedString.replace("chr(", "")
                replacedString = replacedString.replace(") . ", " ")
                replacedString = replacedString.replace(")", "")
                numArr = replacedString.split(" ")
                paddedNums = []
                for num in numArr:
                    if len(num) == 2:
                        paddedNums.append(f"0{num}")
                    else:
                        paddedNums.append(num)
                cleanStr = ""
                for num in paddedNums:
                    cleanStr += chr(int(num))
                return cleanStr
            elif x['Desc']:
                if 'BLOCK' in x['Desc'] or 'save config' in x['Desc'] or 'root' in x['Desc']:
                    return x['Desc']
            elif x['Title']:
                if 'BLOCK' in x['Title']:
                    return x['Title']
            return x['Title']


if payloadDict:
    payloadDf['DecodedPayloads'] = payloadDf.apply(decodePayload, axis=1)

# Setup Log Parser 

In [None]:
current_month = datetime.today().month

In [None]:
class Parser(object):
    # log lines don't include the year, but if we don't provide one, datetime.strptime will assume 1900
    ASSUMED_YEAR = '2020'

    def __init__(self):
        ints = Word(nums)
        month = Word(string.ascii_uppercase, string.ascii_lowercase, exact=3)
        day   = ints
        hour  = Combine(ints + ":" + ints + ":" + ints)

        timestamp = month + day + hour
        timestamp.setParseAction(lambda t: datetime.strptime(Parser.ASSUMED_YEAR + ' ' + ' '.join(t), '%Y %b %d %H:%M:%S'))
        
        # notice
        notice = Suppress("<") + Word(alphas + nums + "/-_.()") + Suppress(">")
        # hostname
        hostname = Word(alphas + nums + "_-.")
        # appname
        appname = Word(alphas + "/-_.()")("appname") + (Suppress("[") + ints("pid") + Suppress("]")) | (Word(alphas + "/-_.")("appname"))
        appname.setName("appname")
        # message
        message = Regex(".*")

        self._pattern = timestamp("timestamp") + notice("notice") + hostname("hostname") + Optional(appname) + Suppress(':') + message("message")

    def parse(self, line):
        try:
            parsed = self._pattern.parseString(line)

            for key in 'appname pid'.split():
                if key not in parsed:
                    parsed[key] = ''
                    
            parsed_dict = parsed.asDict()
            if parsed_dict['timestamp'].month > current_month:
                parsed_dict['timestamp'] = parsed_dict['timestamp'].replace(year=2019)
            return parsed_dict
        except Exception as e:
            logging.info(f"Ignoring line: {line}{e}\n")

# Parse Cron Logs 

In [None]:
cronDf = pd.DataFrame
for fp in ANALYZEDLOGPATH.glob('cron*'):
    with open(fp, 'r', encoding='latin1') as f:
        logging.info(f'Parsing {fp}')
        for line in f:
            if cronDf.empty:
                cronDf = pd.DataFrame(Parser().parse(line))
            else:
                cronDf = pd.concat([cronDf, pd.DataFrame(Parser().parse(line))])
logging.info('Done!')

In [None]:
nobodyCronActions = cronDf[cronDf.message.str.contains('|'.join(searchFor))]

# Parse Bash Logs 

In [None]:
bashDf = pd.DataFrame
for fp in ANALYZEDLOGPATH.glob('bash*'):
    with open(fp, 'r', encoding='latin1') as f:
        logging.info(f'Parsing {fp}')
        for line in f:
            if bashDf.empty:
                bashDf = pd.DataFrame(Parser().parse(line))
            else:
                bashDf = pd.concat([bashDf, pd.DataFrame(Parser().parse(line))])
logging.info('Done!')

In [None]:
nobodyBashActions = bashDf[bashDf.message.str.contains('|'.join(searchFor))]

# Parse Notice Logs 

In [None]:
noticeDf = pd.DataFrame
for fp in ANALYZEDLOGPATH.glob('notice*'):
    with open(fp, 'r', encoding='latin1') as f:
        logging.info(f'Parsing {fp}')
        for line in f:
            if noticeDf.empty:
                noticeDf = pd.DataFrame(Parser().parse(line))
            else:
                noticeDf = pd.concat([noticeDf, pd.DataFrame(Parser().parse(line))])
logging.info('Done!')

In [None]:
nobodyNoticeActions = noticeDf[noticeDf.message.str.contains('|'.join(shSearchFor))]

# Parse sh Logs 

In [None]:
shDf = pd.DataFrame
for fp in ANALYZEDLOGPATH.glob('sh*'):
    with open(fp, 'r', encoding='latin1') as f:
        logging.info(f'Parsing {fp}')
        for line in f:
            if shDf.empty:
                shDf = pd.DataFrame(Parser().parse(line))
            else:
                shDf = pd.concat([shDf, pd.DataFrame(Parser().parse(line))])
logging.info('Done!')

In [None]:
nobodyShActions = shDf[shDf.message.str.contains('|'.join(shSearchFor))]

# Create DataFrame containing first/last date in logs 

In [None]:
timeDf = pd.DataFrame(columns=['log_type', 'start_date', 'end_date', 'timezone'])
log_type_dfs = {
    'cron': cronDf,
    'bash': bashDf,
    'notice': noticeDf,
    'sh': shDf
}

http_start_time_row = df.loc[df.timestamp == min(df.timestamp)].iloc[0]
http_end_time_row = df.loc[df.timestamp == max(df.timestamp)].iloc[0]
timeDf = timeDf.append({'log_type': 'httpaccess', 
               'start_date': http_start_time_row.timestamp, 
               'end_date': http_end_time_row.timestamp, 
               'timezone': http_start_time_row.timezone}, ignore_index=True)

for k, v in log_type_dfs.items():
    timeDf = timeDf.append({
        'log_type': k,
        'start_date': v.loc[v.timestamp == min(v.timestamp)].iloc[0].timestamp,
        'end_date': v.loc[v.timestamp == max(v.timestamp)].iloc[0].timestamp,
        'timezone': None
    }, ignore_index=True)
    
timeDf = timeDf.fillna('-')

# Write Findings to Excel 

In [None]:
writer = pd.ExcelWriter(ANALYSISPATH / 'Results.xlsx', engine='openpyxl')
timeDf.to_excel(writer, sheet_name='LogTimeRanges', index=False)
if not vpnRequests.empty:
    vpnRequests.to_excel(writer, sheet_name='VulnRelatedHTTPRequests', index=False)       
    if not xmls.empty:
        xmls.to_excel(writer, sheet_name='ObservedHTTPPayloadRequests', index=False)
    else:
        logging.info('No XML requests observed - excluding dataframe from results')
else:
    logging.info('No vulnerability related HTTP requests observed')

if not nobodyCronActions.empty: 
    nobodyCronActions.to_excel(writer, sheet_name='SuspiciousCron', index=False)
else:
    logging.info('No suspicious cron logs observed - excluding dataframe from results')
    
if not nobodyBashActions.empty:
    nobodyBashActions.to_excel(writer, sheet_name='SuspiciousBash', index=False)
else:
    logging.info('No suspicious bash logs observed - excluding dataframe from results')
    
if not nobodyNoticeActions.empty:
    nobodyNoticeActions.to_excel(writer, sheet_name='SuspiciousNotice', index=False)
else:
    logging.info('No suspicious notice logs observed - excluding dataframe from results')
    
if not nobodyShActions.empty:
    nobodyShActions.to_excel(writer, sheet_name='SuspiciousSh', index=False)
else:
    logging.info('No suspicious sh logs observed - excluding dataframe from results')
    
if not http_error_df.empty:
    http_error_df.to_excel(writer, sheet_name='SuspiciousHTTPError', index=False)
else:
    logging.info('No suspicious HTTP error logs observed - excluding dataframe from results')
    
if payloadDict:
    payloadDf.to_excel(writer, sheet_name='ParsedXMLs', index=False)
else:
    logging.info('Template path not provided or no XMLs found so skipping XML analysis')
writer.save()
logging.info(f"Printed results to {ANALYSISPATH / 'Results.xlsx'}")
logging.info('Done!')