## Imports

In [None]:
import glob
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import statsmodels.api as sm

## Constants

In [None]:
HEADER_LEN = 6
KEEP = 1
COLORS = ['#1f77b4',
          '#ff7f0e',
          '#2ca02c',
          '#d62728',
          '#9467bd',
          '#8c564b',
          '#e377c2',
          '#7f7f7f',
          '#bcbd22',
          '#17becf',
          '#1a55FF']
CURRENT_WORKING_DIR = os.getcwd()
print(CURRENT_WORKING_DIR)

#todo make dynamic
os.chdir('D:/Switchdrive/Uni/Thesis/code/HuberNicolas/MTDPolicy/')



CAT = {
    'usr': 'total-cpu-usage',
    'sys': 'total-cpu-usage',
    'idl': 'total-cpu-usage',
    'wai': 'total-cpu-usage',
    'hiq': 'total-cpu-usage',
    'siq': 'total-cpu-usage',

    'used' : 'memory usage',
    'buff' : 'memory usage',
    'cache' : 'memory usage',
    'free' : 'memory usage',

    'files': 'filesystem',
    'inodes' : 'fileystem',

    'read' : 'dsk/total',
    'writ' : 'dsk/total',
    'reads' : 'dsk/total',
    'writs' : 'dsk/total',

    'recv' : 'net/eth0',
    'send' : 'net/eth0',

    'lis' : 'tcp sockets',
    'act' : 'tcp sockets',
    'syn' : 'tcp sockets',
    'tim' : 'tcp sockets',
    'clo' : 'tcp sockets',

    'int' : 'system',
    'csw' : 'system',

    'run' : 'procs',
    'blk' : 'procs',
    'new' : 'procs'
}

CAT_NAMES = ['total-cpu-usage', 'memory-usage', 'filesystem', 'dsk-total1', 'dsk-total2', 'net-eth0', 'tcp sockets', 'system', 'procs',]

COL_NAMES = [
    # total-cpu-usage
    'usr: CPU usage by user processes [%]', 
    'sys: CPU usage by system processes [%]',
    'idl: Number of idle processes [#]',
    'wai: Number of waiting processes [#]',
    'hiq: Number of hard interrupts [#]',
    'siq: Number of soft interrupts [#]',
    # memory usage
    # https://www.tecmint.com/dstat-monitor-linux-server-performance-process-memory-network/
    # https://linuxtect.com/linux-dstat-command-monitor-linux-performance-and-resource-usage/
    # https://www.thegeekdiary.com/understanding-proc-meminfo-file-analyzing-memory-utilization-in-linux/
    # /proc/meminfo
    'used: Amount of used memory [Bytes]', # / 1024 = KB
    'buff: Amount of buffered memory [Bytes]', # / 1024 / 1024 = MB
    'cache: Amount of cached memory [Bytes]',
    'free: Amount of free memory [Bytes]',

    # filesystem
    # https://www.linuxquestions.org/questions/linux-general-1/proc-sys-fs-file-nr-57646/
    # /proc/sys/fs/file-nr
    'files: Number of allocated file handles [#]',
    'inodes: Number of used file handles [#]',

    # dsk/total
    'read: Amount of read bytes on disk [Bytes]', # not 100% sure
    'writ: Amount of written bytes on disk [Bytes]', # / 1024 = KB

    # dsk/total
    'reads: Number of read operations on disk [#]',
    'writs: Number of write operations on disk [#]',
    
    # net/eth0
    'recv: Amount of received bytes on eth0 [B]', # /1024 = k
    'send: Amount of received bytes on eth0 [B]',

    # tcp sockets
    # http://karunsubramanian.com/network/what-is-syn_sent-socket-status/
    # /proc/net/tcp
    # /proc/net/tcp6
    'lis: Number of TCP connections with status "listening" [#]', 
    'act: Number of TCP connections with status "established" (active) [#]',
    'syn: Number of TCP connections with status "syn_sent", "syn_receive" or "last_ack" [#]',
    'tim: Number of TCP connections with status "waiting"[#]',
    'clo: Number of TCP connections with status "fin-wait1/2", "close/_wait" or "closign" (closed) [#]',

    # system
    # https://www.tecmint.com/dstat-monitor-linux-server-performance-process-memory-network/
    'int: Number of interrupts [#]', 
    'csw: Number of ontext switches [#]', 

    # procs
    # /proc/stat
    'run: Number of processes with status "running" [#]', 
    'blk: Number of processes with status "blocked" [#]',
    'new: Number of processes with status "new" [#]' 
]
PERIOD = 60

## Load csv files

### Loading

In [None]:
files = glob.glob('data\csv\*.csv')
print(files)


# create dataframes for each .csv
dfs = []
header = None
for f in files:
    # create header
    if header is None:
        header = pd.read_csv(f, skiprows=HEADER_LEN)
        header = list(header.columns.values)
    # read and append
    dff = pd.read_csv(f, skiprows = HEADER_LEN + 1, header = None) # read csv
    dff = dff.iloc[:, :-1] # remove last row
    dff.columns = header # append header
    dfs.append(dff) # append to list of dfs
    
# append all dataframes dff to one dataframe df
df = pd.concat(dfs, ignore_index=True)



### Postprocessing

In [None]:
# fix year
df['time'] = '2022-' + df['time'].astype(str)
df['time'] = pd.to_datetime(df['time'],format= '%Y-%d-%m %H:%M:%S')
# sort and reindex
df = df.sort_values(by='time')
df = df.iloc[::KEEP] # only take every KEEPth-entry
df = df.reset_index(drop=True)
df.set_index('time', inplace=True)

In [None]:
# save file
startDate = df.iloc[0].name.strftime('%Y-%m-%d')
startTime = df.iloc[0].name.strftime('%X')
endDate = df.iloc[-1].name.strftime('%Y-%m-%d')
endTime = df.iloc[-1].name.strftime('%X')
name = "{}-{}_{}-{}({}).csv".format(startDate.replace('-',''), startTime.replace(':',''), endDate.replace('-',''), endTime.replace(':',''), str(df.shape[0]))

df.to_csv(name, index=False, header=True)

In [None]:
# sanity check
print(header)
print(df.columns.to_list())
header.pop(0)
print(header == df.columns.to_list())

## Information

In [None]:
col_types = df.dtypes
print(col_types)

print("# datapoints (rows) = {}".format(df.shape[0]))
print("# columns = {}".format(df.shape[1]))
print("# total cells (Size) = Shape[0] x Shape[1] = {}".format(df.size))
print("Shape = {}".format(df.shape))

## Visualization

In [None]:
# go to directory
print(os.getcwd())
os.chdir('./data/plots/timeline')
print(os.getcwd())
index = 0
for i in df.columns:
    plt.figure(figsize=(25,18), dpi=200)
    plt.hist(df[i])
    plt.subplot(2, 1, 1)
    df[i].plot(marker="o", label=COL_NAMES[index])
    plt.axhline(y=df[i].mean(), color=COLORS[1], linestyle=':', linewidth=3, label='mean = {:.2f}'.format(df[i].mean()))
    plt.axhline(y=df[i].max(), color=COLORS[2], linestyle='-', linewidth=1, label='max = {:.2f}'.format(df[i].max()))
    plt.axhline(y=df[i].min(), color=COLORS[3], linestyle='-', linewidth=1, label='min = {:.2f}'.format(df[i].min()))
    plt.xlabel('Timestamp [hh:mm:ss] from {}'.format(startDate))
    plt.title('{} - {}'.format(CAT.get(i), i)) 
    plt.ylabel(COL_NAMES[index])
    plt.legend(bbox_to_anchor=(1,1), loc="upper left")

    plt.subplot(2, 1, 2)    
    df[i].plot(marker="o", label=COL_NAMES[index])
    plt.axhline(y=df[i].median(), color=COLORS[1], linestyle='-.', linewidth=1, label='median = {:.2f}'.format(df[i].median()))
    plt.axhline(y=df[i].quantile(q=0.75), color=COLORS[2], linestyle='-.', linewidth=1, label='quantile (0.75) = {:.2f}'.format(df[i].quantile(q=0.75)))
    plt.axhline(y=df[i].quantile(q=0.95), color=COLORS[3], linestyle='-.', linewidth=1, label='quantile (0.95) = {:.2f}'.format(df[i].quantile(q=0.95)))
    #plt.axhline(y=df[i].std(), color=COLORS[4], linestyle=':', linewidth=1, label='std = {:.2f}'.format(df[i].std()))
    plt.xlabel('Timestamp [hh:mm:ss] from {}'.format(startDate))
    plt.ylabel(COL_NAMES[index])
    plt.legend(bbox_to_anchor=(1,1), loc="upper left")
    plt.title('{} - {}'.format(CAT.get(i), i)) 
    plt.savefig('%s.png' % i, facecolor='white', transparent=False)
    # plt.show()
    index += 1
    plt.close('all')

In [None]:
os.chdir('../std')

df_total_cpu_usage = df[['usr', 'sys', 'idl', 'wai', 'hiq', 'siq']]
df_memory_usage = df[['used', 'buff', 'cach', 'free']]
df_filesystem = df[['files', 'inodes']]
df_dsk_total_1 = df[['read', 'writ']]
df_dsk_total_2 = df[['reads', 'writs']]
df_net_eth0 = df[['recv', 'send']]
df_tcp_sockets = df[['lis', 'act', 'syn', 'tim', 'clo']]
df_system = df[['int', 'csw']]
df_procs = df[['run', 'blk', 'new']]

dfs = [df_total_cpu_usage, df_memory_usage, df_filesystem, df_dsk_total_1, df_dsk_total_2, df_net_eth0, df_tcp_sockets, df_system, df_procs]

index= 0
plt.figure(dpi=200)
for i in dfs:
    stds_y = []
    stds_x = i.columns.to_list()
    for std in i.std():
        stds_y.append(std)
    sns.barplot(stds_x, stds_y)
    plt.xlabel('metrics')
    plt.ylabel('standard deviation')
    plt.title('{}'.format(CAT_NAMES[index])) 
    plt.savefig('%s-std.png' % CAT_NAMES[index], facecolor='white', transparent=False)
    plt.close('all')
    index += 1

## Exploration

In [None]:
os.chdir('../decompose')
plt.figure(figsize=(12,6))
sns.boxplot(df['usr'])

plt.savefig("cpu_usr_box.png", facecolor='white', transparent=False)

result = sm.tsa.seasonal_decompose(df['usr'], model="add",  period=PERIOD).plot()
plt.savefig("cpu_usr_decompose.png", facecolor='white', transparent=False)

plt.figure(figsize=(12,6))
sns.boxplot(df['sys'])
plt.savefig("cpu_sys_box.png", facecolor='white', transparent=False)

result = sm.tsa.seasonal_decompose(df['sys'], model="add",  period=PERIOD).plot()
plt.savefig("cpu_sys_decompose.png", facecolor='white', transparent=False)

# RAM
plt.figure(figsize=(12,6))
sns.boxplot(df['free'])
plt.savefig("ram_free_box.png", facecolor='white', transparent=False)

result = sm.tsa.seasonal_decompose(df['free'], model="add",  period=PERIOD).plot()
plt.savefig("ram_free_decompose.png", facecolor='white', transparent=False)

# Disk
plt.figure(figsize=(12,6))
sns.boxplot(df['free'])
plt.savefig("disk_read_box.png", facecolor='white', transparent=False)

result = sm.tsa.seasonal_decompose(df['read'], model="add",  period=PERIOD).plot()
plt.savefig("disk_read_decompose.png", facecolor='white', transparent=False)

plt.figure(figsize=(12,6))
sns.boxplot(df['free'])
plt.savefig("disk_write_box.png", facecolor='white', transparent=False)

result = sm.tsa.seasonal_decompose(df['writ'], model="add",  period=PERIOD).plot()
plt.savefig("disk_write_decompose.png", facecolor='white', transparent=False)

plt.figure(figsize=(12,6))
sns.boxplot(df['free'])
plt.savefig("disk_read_no_box.png", facecolor='white', transparent=False)

result = sm.tsa.seasonal_decompose(df['reads'], model="add",  period=PERIOD).plot()
plt.savefig("disk_read_no_decompose.png", facecolor='white', transparent=False)

plt.figure(figsize=(12,6))
sns.boxplot(df['free'])
plt.savefig("disk_write_no_box.png", facecolor='white', transparent=False)

result = sm.tsa.seasonal_decompose(df['writs'], model="add",  period=PERIOD).plot()
plt.savefig("disk_write_no_decompose.png", facecolor='white', transparent=False)

# Network
plt.figure(figsize=(12,6))
sns.boxplot(df['free'])
plt.savefig("network_recv_box.png", facecolor='white', transparent=False)

result = sm.tsa.seasonal_decompose(df['recv'], model="add",  period=PERIOD).plot()
plt.savefig("network_recv_decompose.png", facecolor='white', transparent=False)

plt.figure(figsize=(12,6))
sns.boxplot(df['free'])
plt.savefig("network_send_box.png", facecolor='white', transparent=False)

result = sm.tsa.seasonal_decompose(df['send'], model="add",  period=PERIOD).plot()
plt.savefig("network_send_decompose.png", facecolor='white', transparent=False)