In [None]:
## Imports
import glob
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import statsmodels.api as sm
import shutil
import csv
import yaml
from scipy import stats

with open('config.yaml') as stream:
    config = yaml.safe_load(stream)

print(config['appname'])
print(config['options'])

In [None]:
def prepareFolder(path):
    if os.path.exists(path+'/') == True:
        shutil.rmtree(path)
    if os.path.exists(path+'/') == False:
        os.makedirs(path+'/')

In [None]:
## Constants
COLORS = ['#1f77b4',
          '#ff7f0e',
          '#2ca02c',
          '#d62728',
          '#9467bd',
          '#8c564b',
          '#e377c2',
          '#7f7f7f',
          '#bcbd22',
          '#17becf',
          '#1a55FF']
CAT = {
    'usr': 'total-cpu-usage',
    'sys': 'total-cpu-usage',
    'idl': 'total-cpu-usage',
    'wai': 'total-cpu-usage',
    'hiq': 'total-cpu-usage',
    'siq': 'total-cpu-usage',

    'used' : 'memory usage',
    'buff' : 'memory usage',
    'cach' : 'memory usage',
    'free' : 'memory usage',

    'files': 'filesystem',
    'inodes' : 'fileystem',

    'read' : 'dsk/total',
    'writ' : 'dsk/total',
    'reads' : 'dsk/total',
    'writs' : 'dsk/total',

    'recv' : 'net/eth0',
    'send' : 'net/eth0',

    'lis' : 'tcp sockets',
    'act' : 'tcp sockets',
    'syn' : 'tcp sockets',
    'tim' : 'tcp sockets',
    'clo' : 'tcp sockets',

    'tot': 'sockets',
    'tcp': 'sockets',
    'udp': 'sockets',
    'raw': 'sockets',
    'frg': 'sockets',

    'int' : 'system',
    'csw' : 'system',

    'run' : 'procs',
    'blk' : 'procs',
    'new' : 'procs'
}

CAT_NAMES = ['total-cpu-usage', 'memory-usage', 'filesystem', 'dsk-total1', 'dsk-total2', 'net-eth0', 'tcp sockets', 'sockets', 'system', 'procs',]

COL_NAMES_DOCUMENTATION_DO_NOT_DELETE = [
    # total-cpu-usage
    'usr: CPU usage by user processes [%]', 
    'sys: CPU usage by system processes [%]',
    'idl: Number of idle processes [#]',
    'wai: Number of waiting processes [#]',
    'hiq: Number of hard interrupts [#]',
    'siq: Number of soft interrupts [#]',
    # memory usage
    # https://www.tecmint.com/dstat-monitor-linux-server-performance-process-memory-network/
    # https://linuxtect.com/linux-dstat-command-monitor-linux-performance-and-resource-usage/
    # https://www.thegeekdiary.com/understanding-proc-meminfo-file-analyzing-memory-utilization-in-linux/
    # /proc/meminfo
    'used: Amount of used memory [Bytes]', # / 1024 = KB
    'buff: Amount of buffered memory [Bytes]', # / 1024 / 1024 = MB
    'cach: Amount of cached memory [Bytes]',
    'free: Amount of free memory [Bytes]',

    # filesystem
    # https://www.linuxquestions.org/questions/linux-general-1/proc-sys-fs-file-nr-57646/
    # /proc/sys/fs/file-nr
    'files: Number of allocated file handles [#]',
    'inodes: Number of used file handles [#]',

    # dsk/total
    'read: Amount of read bytes on disk [Bytes]', # not 100% sure
    'writ: Amount of written bytes on disk [Bytes]', # / 1024 = KB

    # dsk/total
    'reads: Number of read operations on disk [#]',
    'writs: Number of write operations on disk [#]',
    
    # net/eth0
    'recv: Amount of received bytes on eth0 [B]', # /1024 = k
    'send: Amount of received bytes on eth0 [B]',

    # tcp sockets
    # http://karunsubramanian.com/network/what-is-syn_sent-socket-status/
    # /proc/net/tcp
    # /proc/net/tcp6
    'lis: Number of TCP connections with status "listening" [#]', 
    'act: Number of TCP connections with status "established" (active) [#]',
    'syn: Number of TCP connections with status "syn_sent", "syn_receive" or "last_ack" [#]',
    'tim: Number of TCP connections with status "waiting"[#]',
    'clo: Number of TCP connections with status "fin-wait1/2", "close/_wait" or "closign" (closed) [#]',

    # sockets
    # https://community.hpe.com/t5/Networking/Regarding-meaning-for-quot-cat-proc-net-sockstat-quot-results/td-p/3922867#.Yvnld3VBxhE
    # /proc/net/sockstat
    'tot: Number of total sockets [#]',
    'tcp: Number of tcp sockets [#]',
    'udp: Number of udp sockets [#]',
    'raw: Number of raw (using no protocol) sockets [#]',
    'frg: Number of frag sockets [#]',

    # system
    # https://www.tecmint.com/dstat-monitor-linux-server-performance-process-memory-network/
    'int: Number of interrupts [#]', 
    'csw: Number of ontext switches [#]', 

    # procs
    # /proc/stat
    'run: Number of processes with status "running" [#]', 
    'blk: Number of processes with status "blocked" [#]',
    'new: Number of processes with status "new" [#]' 
]

COL_NAMES = [
    # total-cpu-usage
    'usr: CPU usage by user processes [%]', 
    'sys: CPU usage by system processes [%]',
    'idl: Number of idle processes [#]',
    'wai: Number of waiting processes [#]',
    'hiq: Number of hard interrupts [#]',
    'siq: Number of soft interrupts [#]',
    # memory usage
    # https://www.tecmint.com/dstat-monitor-linux-server-performance-process-memory-network/
    # https://linuxtect.com/linux-dstat-command-monitor-linux-performance-and-resource-usage/
    # https://www.thegeekdiary.com/understanding-proc-meminfo-file-analyzing-memory-utilization-in-linux/
    # /proc/meminfo
    'used: Amount of used memory [Bytes]', # / 1024 = KB
    'buff: Amount of buffered memory [Bytes]', # / 1024 / 1024 = MB
    'cach: Amount of cached memory [Bytes]',
    'free: Amount of free memory [Bytes]',

    # filesystem
    # https://www.linuxquestions.org/questions/linux-general-1/proc-sys-fs-file-nr-57646/
    # /proc/sys/fs/file-nr
    'files: Number of allocated file handles [#]',
    'inodes: Number of used file handles [#]',

    # dsk/total
    'read: Amount of read bytes on disk [Bytes]', # not 100% sure
    'writ: Amount of written bytes on disk [Bytes]', # / 1024 = KB

    # dsk/total
    'reads: Number of read operations on disk [#]',
    'writs: Number of write operations on disk [#]',
    
    # net/eth0
    'recv: Amount of received bytes on eth0 [B]', # /1024 = k
    'send: Amount of received bytes on eth0 [B]',

    # tcp sockets
    # http://karunsubramanian.com/network/what-is-syn_sent-socket-status/
    # /proc/net/tcp
    # /proc/net/tcp6
    'lis: Number of TCP connections "listening" [#]', 
    'act: Number of TCP connections "established" (active) [#]',
    'syn: Number of TCP connections "syn_s", "syn_r" or "last_a" [#]',
    'tim: Number of TCP connections with status "waiting"[#]',
    'clo: Number of TCP connections with status "closing"  [#]',

    # sockets
    # https://community.hpe.com/t5/Networking/Regarding-meaning-for-quot-cat-proc-net-sockstat-quot-results/td-p/3922867#.Yvnld3VBxhE
    # /proc/net/sockstat
    'tot: Number of total sockets [#]',
    'tcp: Number of tcp sockets [#]',
    'udp: Number of udp sockets [#]',
    'raw: Number of raw sockets [#]',
    'frg: Number of frag sockets [#]',

    # system
    # https://www.tecmint.com/dstat-monitor-linux-server-performance-process-memory-network/
    'int: Number of interrupts [#]', 
    'csw: Number of ontext switches [#]', 

    # procs
    # /proc/stat
    'run: Number of processes with status "running" [#]', 
    'blk: Number of processes with status "blocked" [#]',
    'new: Number of processes with status "new" [#]' 
]

In [None]:
from dfLoader import main

dfs, names = main()
# sanity check
for (df, name) in zip(dfs, names):
     print (df['usr'].iloc[0], name)

In [None]:
for (df, name) in zip(dfs, names):
    print(df.index[120])
    print(df.index[240])

In [None]:
# Time Series
prepareFolder('plots/timeseries')
plt.style.use('classic')

for (df, name) in zip(dfs, names):
    start = df.index[0]
    print(df.index[0])
    os.mkdir('plots/timeseries/'+name)
    index = 0
    for column in df.columns:
        plt.figure(figsize=(7, 1), dpi=50)
        df[(np.abs(stats.zscore(df)) < 3).all(axis=1)] # remove outliers
        
        # calculate average
        df[column+'-avg1/3'] = df[column].iloc[:120].mean()
        df[column+'-avg2/3'] = df[column].iloc[120:240].mean()
        df[column+'-avg3/3'] = df[column].iloc[240:].mean()
        plt.hist(df[column])
        df[column+'-avg'] = df[column].rolling(10).mean()
        df[column+'-rolling'] = df[column].rolling(10).mean()[10-1::10]
        df[column+'-rolling'].bfill(inplace = True)
        
        plt.subplot(3, 1, 1)
        plt.ylim([df[column].min(), df[column].max()])
        df[column].plot(marker="o", color='C0', alpha=1, linewidth=1, label=COL_NAMES[index])
        plt.axvline(df.index[120], color='r') # triggering malware
        plt.axvline(df.index[240], color='r') # triggering MTD
        df[column+'-avg1/3'].iloc[:120].plot(color='C6', alpha=1, linestyle='-', linewidth=1, label='avg')
        df[column+'-avg2/3'].iloc[120:240].plot(color='C6', alpha=1, linestyle='-', linewidth=1, label='avg')
        df[column+'-avg3/3'].iloc[240:].plot(color='C6', alpha=1, linestyle='-', linewidth=1, label='avg')
        plt.legend(bbox_to_anchor=(1, 0.75), loc="upper left")

        plt.subplot(3, 1, 2)
        plt.ylim([df[column].min(), df[column].max()])
        df[column+'-avg'].plot(marker="+", color='C2', alpha=1, linewidth=1, label=COL_NAMES[index]+'rolling avg')
        plt.axvline(df.index[120], color='r') # triggering malware
        plt.axvline(df.index[240], color='r') # triggering MTD
        df[column+'-avg1/3'].iloc[:120].plot(color='C6', alpha=1, linestyle='-', linewidth=1, label='avg')
        df[column+'-avg2/3'].iloc[120:240].plot(color='C6', alpha=1, linestyle='-', linewidth=1, label='avg')
        df[column+'-avg3/3'].iloc[240:].plot(color='C6', alpha=1, linestyle='-', linewidth=1, label='avg')
        plt.legend(bbox_to_anchor=(1, 0.75), loc="upper left")

        plt.subplot(3, 1, 3)
        plt.ylim([df[column].min(), df[column].max()])
        df[column+'-rolling'].plot(color='C1', alpha=1, linewidth=1, label=COL_NAMES[index]+'non-overlapping rolling avg')
        plt.axvline(df.index[120], color='r') # triggering malware
        plt.axvline(df.index[240], color='r') # triggering MTD
        df[column+'-avg1/3'].iloc[:120].plot(color='C6', alpha=1, linestyle='-', linewidth=1, label='avg')
        df[column+'-avg2/3'].iloc[120:240].plot(color='C6', alpha=1, linestyle='-', linewidth=1, label='avg')
        df[column+'-avg3/3'].iloc[240:].plot(color='C6', alpha=1, linestyle='-', linewidth=1, label='avg')
        plt.legend(bbox_to_anchor=(1, 0.75), loc="upper left")

        #plt.axhline(y=df[column].mean(), color='C1', alpha=1, linestyle='-', linewidth=4, label='mean')
        #plt.axhline(y=df[column].max(), color='C2', alpha=1, linestyle=':', linewidth=2, label='max')
        #plt.axhline(y=df[column].min(), color='C6', alpha=1, linestyle=':', linewidth=2, label='min')
        # plt.axvline(pd.Timestamp(2022, 8, 16, 9, 30, 0), color='r') # triggering malware
        # plt.axvline(pd.Timestamp(2022, 8, 16, 9, 45, 0), color='r') # triggering MTD
        plt.title('{}'.format(CAT.get(column))) 
        plt.ylabel(column)
        
        plt.savefig('plots/timeseries/{}/{}-timeseries.png'.format(name, column), facecolor='white', transparent=False, bbox_inches="tight")
        index += 1
    plt.cla()        
plt.close('all')