# Process Checks

In [22]:
from IPython.display import HTML
HTML('''<script>
code_show=true; 
function code_toggle() {
 if (code_show){
 $('div.input').hide();
 } else {
 $('div.input').show();
 }
 code_show = !code_show
} 
$( document ).ready(code_toggle);
</script>
The raw code for this IPython notebook is by default hidden for easier reading.
To toggle on/off the raw code, click <a href="javascript:code_toggle()">here</a>.''')

## Tickerplant code

In [23]:
from qpython.qconnection import QConnection as qcon
from qpython.qcollection import QDictionary as qdict
from contextlib import redirect_stdout as rd_so
import time
import psutil
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline

#set qcon variables
import csv
with open('credentials.csv') as csv_file:
    csv_reader = csv.reader(csv_file, delimiter=',')
    line_count = 0
    for row in csv_reader:
        if line_count == 0:
            print(f'Credentials required are {", ".join(row)}')
            line_count += 1
        else:
            host=row[0]
            un=row[1]
            pswd=row[2]
            print(f'\t host:{row[0]} username: {row[1]} password: {row[2]}.')
            line_count += 1
    print(f'Processed {line_count} lines.')      
                  

Credentials required are host, username, password
	 host:localhost username: admin password: admin.
Processed 2 lines.


In [40]:
#run torq summary
torq_sum= ! "$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"/torq.sh summary

#convert array to numpy array
summary=np.asarray(torq_sum)

#split each element of array by | character
sum_list = [i.split('|') for i in summary]
df = pd.DataFrame(sum_list)

# take first row and use as header for df
new_header = df.iloc[0]
df = df[1:]
df.columns = new_header

#trim whitespace from headers
cols=[]
for i in df.columns:
    cols.append(str.strip(i))
df.columns=cols

#trim whitespace from all objects within dataframe
data = df.select_dtypes(['object'])
df[data.columns] = df.apply(lambda x: x.str.strip())

# function to extract the port number for each process - takes a string argument
def find_portno(process):
    process_info = df.loc[df['PROCESS'] == process]
    PORT = process_info['PORT'].astype(str).astype(int)
    return PORT

#function to decode bytes to strings
def byte_decode(table,cols):
    table[cols] = table[cols].applymap(lambda x: x.decode('utf-8'))
    
#return PIDs
df.replace('', np.nan, inplace=True)
pids = df["PID"]
pids = pids.dropna()
pids = [int(i) for i in pids]
print(pids)

[7490, 7582, 7672, 7764, 7856, 7948, 8040, 8132, 8224, 8316, 8408, 8590, 8682, 8772, 8864, 8956, 9048]


### Process Summary

Table showing process name, status, PID and Port number.
* Process status indicated by colours green (up) and red (down).
* Killtick, tpreplay1 and compression1 should usually have a down status indicated.

In [25]:
def print_mem_stats(pid):
    with open("tmp.txt","a") as file:
        with rd_so(file):
            return (psutil.Process(pid)).memory_percent()
def print_cpu_stats(pid):
    with open("tmp.txt","a") as file:
        with rd_so(file):
            return (psutil.Process(pid)).cpu_percent(interval =1.0)
        
mems=[]
cpus=[]
for pid in pids:
    mems.append(print_mem_stats(pid));
    cpus.append(print_cpu_stats(pid))

#insert 0 into cpu/mem for processes that are down     

mems.insert(8,0)
mems.insert(10,0)
mems.insert(13,0)
cpus.insert(8,0)
cpus.insert(10,0)
cpus.insert(13,0)
df['%MEM']=mems
df['%CPU']=cpus
df=df.round({'%MEM':1})
# set colour on down processes to red and up processes to green
def colour_down_red(col):
    color = 'red' if 'down' in col else 'green'
    return 'color: %s' % color
df.style.applymap(colour_down_red, subset=['STATUS'])


Unnamed: 0,TIME,PROCESS,STATUS,PID,PORT,%MEM,%CPU
1,12:44:07,discovery1,up,7490.0,1701.0,0.3,0
2,12:44:08,tickerplant1,up,7582.0,1700.0,0.3,0
3,12:44:08,rdb1,up,7672.0,1702.0,0.3,1
4,12:44:08,hdb1,up,7764.0,1703.0,0.3,0
5,12:44:08,hdb2,up,7856.0,1704.0,0.3,0
6,12:44:08,wdb1,up,7948.0,1705.0,0.3,0
7,12:44:08,sort1,up,8040.0,1706.0,0.3,1
8,12:44:08,gateway1,up,8132.0,1707.0,0.3,0
9,12:44:08,killtick,down,,,0.0,0
10,12:44:08,monitor1,up,8224.0,1709.0,0.3,0


### Count of tables in Tickerplant 

Table to show the count in each table found in the Tickerplant 
* The counts in each of these tables should be 0, as the Tickerplant should not be storing any data.
* If the counts in any of these tables is not 0, this could indicate a slow subscriber.

In [26]:
#table to show tables in TP and the counts of each of them
##counts should all be zero
with qcon(host, port=find_portno('tickerplant1'), username=un, password=pswd,timeout=3.0) as q:
    tablecounts = q("enlist tables[]!count each value each tables[]", pandas=True)
tablecounts

Unnamed: 0,logmsg,quote,quote_iex,trade,trade_iex
0,0,0,0,0,0


### Tickerplant Log file size increasing

Checks if log messages in the log file of the tickerplant is increasing.
   * If log messages are increasing, the tickerplant is receiving data.
   * If log messages are not increasing, the tickerplant may not be recieving data. 


In [27]:
#log files are increasing over time
with qcon(host, port=find_portno('tickerplant1'), username=un, password=pswd,timeout=3.0) as q:
    log1=(q("hcount .u.L"))
    time.sleep(2)
    log2=(q("hcount .u.L"))
print ("Log file sizes are increasing: ", log1<log2)

Log file sizes are increasing:  False


### Process handles connected to Tickerplant

Table to show the handles of processes connected to the tickerplant and if there are any slow subscribers.
  *  A process may be a slow subscriber if there is a number value in the output queue.

In [28]:
# shows IPC handles with number of bytes waiting in their output queues and what processes are connected
##shows any slow subscribers 
with qcon(host, port=find_portno('tickerplant1'), username=un, password=pswd,timeout=3.0) as q:
    zW=q(".z.W[]", pandas=True)
    hprocesses=q("asc select w,u from .clients.clients", pandas=True)
    byte_decode(hprocesses, ['u'])
# assign columns with new names
keys=pd.DataFrame(zW.keys, columns=['handles'])
values=pd.DataFrame(zW.values, columns=['output queue'])
hprocesses=hprocesses.rename(columns={'u':'processes'})
# apply new names
zW2=keys.join(values)
# join zW2 and hprocesses
zW2=zW2.join(hprocesses.processes)
zW2.set_index('handles', inplace=True)
zW2

Unnamed: 0_level_0,output queue,processes
handles,Unnamed: 1_level_1,Unnamed: 2_level_1
6,,rdb
7,,wdb
8,,feed
9,,chainedtp
10,,metrics
11,,iexfeed
12,,admin


## RDB code

In [29]:
with qcon(host, port=find_portno('rdb1'), username=un, password=pswd,timeout=3.0) as q:
    
    #Check tables in rdb are same as tables in tickerplant 
    tables = q('all 1_tables[] in ((exec w from .servers.SERVERS where proctype=`tickerplant)0)("tables[]")')
    #Check count of tables in rdb - data is being sent from the tickerplant 
    tptordb = q('enlist tables[]!count each value each tables[]', pandas=True)
    tabname=q('tables[](2)')
    #Check that data in table can be queried
    rdbtquery=q('5#select from tables[](2)', pandas=True)
    byte_decode(rdbtquery,['sym'])
    
    #Check that only data from today is present in the rdb tables
    onedatet = q('select Currentdate:all .z.d=distinct (`date$time) from tables[](2)', pandas=True)
    #ondedateq = q('select Currentdate:all .z.d=distinct (`date$time) from quote', pandas=True)

### RDB tables

Checks if the tables in the rdb are the same as the tables in the Tickerplant.

In [30]:
print ("RDB tables are same as Tickerplant tables : ", tables)

RDB tables are same as Tickerplant tables :  True


### RDB table counts

Checks to see if data is being sent from the Tickerplant to the RDB
* Counts for heartbeat and logmsg should be 0
* If counts for the other tables are 0, the RDB has been sent no data for today

In [31]:
tptordb.set_index(tptordb.columns.tolist())

heartbeat,logmsg,quote,quote_iex,trade,trade_iex
0,0,0,0,0,0


### RDB Query
Checks to see if tables in the RDB can be queried 

In [32]:
print ('Table to be queried:')
name = tabname.decode('UTF-8')
print (name)
rdbtquery

Table to be queried:
quote


Unnamed: 0,time,sym,bid,ask,bsize,asize,mode,ex
0,NaT,,,,,,,
1,NaT,,,,,,,
2,NaT,,,,,,,
3,NaT,,,,,,,
4,NaT,,,,,,,


### RDB Date Checks
Check to see if date in RDB tables is the current date

In [33]:
def color_true(val):
    color  ='pink' if val==False else 'green'
    return 'background-color: %s' %color
onedatet.style.applymap(color_true)

Unnamed: 0,Currentdate
0,True


### HDB Code

In [34]:
with qcon(host, port=find_portno('hdb1'), username=un, password=pswd,timeout=10.0) as q:
    
    #Check hdb table counts excl. eod_summary and eod_summary_iex
    hdbtablecount=q('raze{0!select tbl:x,cnt:count i by date from x where date >=.z.d-5}each tables[] except `heartbeat`logmsg', pandas=True)
    byte_decode(hdbtablecount,['tbl'])
    a=q('1+1')

    
    lastdayquery=q('5#select from tables[](2) where date=.z.d-1', pandas=True)
    byte_decode(lastdayquery,['sym'])
    
a #check HDB is up 

2

### HDB Table Counts
* Counts for heartbeat and logmsg should be 0
* If counts for other tables are 0, no data has been recieved for the day before

In [35]:
hdbtablecount#.set_index(hdbtablecount.columns.tolist())

Unnamed: 0,date,tbl,cnt


### HDB Query
Check to see if HDB tables can be queried

#### HDB table query 

In [36]:
print ('Table to be queried:')
print (name)
lastdayquery

Table to be queried:
quote


Unnamed: 0,date,time,sym,bid,ask,bsize,asize,mode,ex
0,NaT,NaT,,,,,,,
1,NaT,NaT,,,,,,,
2,NaT,NaT,,,,,,,
3,NaT,NaT,,,,,,,
4,NaT,NaT,,,,,,,


In [37]:
### Process Memory Usage

#Table to show memory usage of each process

In [38]:
#!ps -e -o %p,  -o %C, -o %mem -o ,%a | grep 1700 >procmem.csv #process status commmand for TorQ processes
#df3=pd.read_csv("./procmem.csv") 
#df3.columns=('PID','%CPU','%MEM','COMMAND')
#df3['COMMAND'] = df3['COMMAND'].str.slice_replace(0, 65, '   ')
#import re
###funciton to clean command value
#def clean_command(nope):
    #if re.search('\-procname', nope):
        #pos=re.search('\-procname', nope).start()
        #return nope[:pos]
    #else:
        
        #return nope
#df3['COMMAND'] = df3['COMMAND'].apply(clean_command)
#df3.style.set_properties(subset=['COMMAND'], **{'width': '300px'})
#df3

## CPU and Memory checks

Table to show cpu and memory usage for processes that are up

In [39]:
#df=df.dropna(how='any',axis=0)

