# Process Checks

In [1]:
from IPython.display import HTML
HTML('''<script>
code_show=true; 
function code_toggle() {
 if (code_show){
 $('div.input').hide();
 } else {
 $('div.input').show();
 }
 code_show = !code_show
} 
$( document ).ready(code_toggle);
</script>
The raw code for this IPython notebook is by default hidden for easier reading.
To toggle on/off the raw code, click <a href="javascript:code_toggle()">here</a>.''')

## Tickerplant code

In [2]:
from qpython.qconnection import QConnection as qcon
from qpython.qcollection import QDictionary as qdict
import time
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline

#set qcon variables
import csv
with open('credentials.csv') as csv_file:
    csv_reader = csv.reader(csv_file, delimiter=',')
    line_count = 0
    for row in csv_reader:
        if line_count == 0:
            print(f'Credentials required are {", ".join(row)}')
            line_count += 1
        else:
            host=row[0]
            un=row[1]
            pswd=row[2]
            print(f'\t host:{row[0]} username: {row[1]} password: {row[2]}.')
            line_count += 1
    print(f'Processed {line_count} lines.')      
                  

Credentials required are host, username, password
	 host:localhost username: admin password: admin.
Processed 2 lines.


In [3]:
#run torq summary
torq_sum= ! "$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"/torq.sh summary

#convert array to numpy array
summary=np.asarray(torq_sum)

#split each element of array by | character
sum_list = [i.split('|') for i in summary]
df = pd.DataFrame(sum_list)

# take first row and use as header for df
new_header = df.iloc[0]
df = df[1:]
df.columns = new_header

#trim whitespace from headers
cols=[]
for i in df.columns:
    cols.append(str.strip(i))
df.columns=cols

#trim whitespace from all objects within dataframe
data = df.select_dtypes(['object'])
df[data.columns] = df.apply(lambda x: x.str.strip())

# function to extract the port number for each process - takes a string argument
def find_portno(process):
    process_info = df.loc[df['PROCESS'] == process]
    PORT = process_info['PORT'].astype(str).astype(int)
    return PORT

#function to decode bytes to strings
def byte_decode(table,cols):
    table[cols] = table[cols].applymap(lambda x: x.decode('utf-8'))

### Process Summary

Table showing process name, status, PID and Port number.
* Process status indicated by colours green (up) and red (down).
* Killtick, tpreplay1 and compression1 should usually have a down status indicated.

In [4]:
# set colour on down processes to red and up processes to green
def colour_down_red(col):
    color = 'red' if 'down' in col else 'green'
    return 'color: %s' % color
df.style.applymap(colour_down_red, subset=['STATUS'])

Unnamed: 0,TIME,PROCESS,STATUS,PID,PORT
1,15:27:04,discovery1,up,16759.0,1701.0
2,15:27:04,tickerplant1,up,16851.0,1700.0
3,15:27:04,rdb1,up,16941.0,1702.0
4,15:27:04,hdb1,up,17033.0,1703.0
5,15:27:04,hdb2,up,17125.0,1704.0
6,15:27:04,wdb1,up,17217.0,1705.0
7,15:27:04,sort1,up,17309.0,1706.0
8,15:27:04,gateway1,up,17401.0,1707.0
9,15:27:04,killtick,down,,
10,15:27:04,monitor1,up,17493.0,1709.0


### Count of tables in Tickerplant 

Table to show the count in each table found in the Tickerplant 
* The counts in each of these tables should be 0, as the Tickerplant should not be storing any data.
* If the counts in any of these tables is not 0, this could indicate a slow subscriber.

In [5]:
#table to show tables in TP and the counts of each of them
##counts should all be zero
with qcon(host, port=find_portno('tickerplant1'), username=un, password=pswd,timeout=3.0) as q:
    tablecounts = q("enlist tables[]!count each value each tables[]", pandas=True)
tablecounts

Unnamed: 0,logmsg,quote,quote_iex,trade,trade_iex
0,0,0,0,0,0


### Tickerplant Log file size increasing

Checks if log messages in the log file of the tickerplant is increasing.
   * If log messages are increasing, the tickerplant is receiving data.
   * If log messages are not increasing, the tickerplant may not be recieving data. 


In [6]:
#log files are increasing over time
with qcon(host, port=find_portno('tickerplant1'), username=un, password=pswd,timeout=3.0) as q:
    log1=(q("hcount .u.L"))
    time.sleep(2)
    log2=(q("hcount .u.L"))
print ("Log file sizes are increasing: ", log1<log2)

Log file sizes are increasing:  True


### Process handles connected to Tickerplant

Table to show the handles of processes connected to the tickerplant and if there are any slow subscribers.
  *  A process may be a slow subscriber if there is a number value in the output queue.

In [7]:
# shows IPC handles with number of bytes waiting in their output queues and what processes are connected
##shows any slow subscribers 
with qcon(host, port=find_portno('tickerplant1'), username=un, password=pswd,timeout=3.0) as q:
    zW=q(".z.W[]", pandas=True)
    hprocesses=q("asc select w,u from .clients.clients", pandas=True)
    byte_decode(hprocesses, ['u'])
# assign columns with new names
keys=pd.DataFrame(zW.keys, columns=['handles'])
values=pd.DataFrame(zW.values, columns=['output queue'])
hprocesses=hprocesses.rename(columns={'u':'processes'})
# apply new names
zW2=keys.join(values)
# join zW2 and hprocesses
zW2=zW2.join(hprocesses.processes)
zW2.set_index('handles', inplace=True)
zW2

Unnamed: 0_level_0,output queue,processes
handles,Unnamed: 1_level_1,Unnamed: 2_level_1
6,,rdb
7,,wdb
8,,feed
9,,chainedtp
10,,metrics
11,,iexfeed
12,,admin


## RDB code

In [8]:
with qcon(host, port=find_portno('rdb1'), username=un, password=pswd,timeout=3.0) as q:
    
    #Check tables in rdb are same as tables in tickerplant 
    tables = q('all 1_tables[] in ((exec w from .servers.SERVERS where proctype=`tickerplant)0)("tables[]")')
    #Check count of tables in rdb - data is being sent from the tickerplant 
    tptordb = q('enlist tables[]!count each value each tables[]', pandas=True)
    #Check rdb can be queried
    rdbtquery = q('-5#select from trade', pandas=True)
    rdbqquery= q('-5#select from quote', pandas=True)
    
    byte_decode(rdbtquery,['sym'])
    byte_decode(rdbqquery,['sym'])
    

    #Check that only data from today is present in the rdb tables
    onedatet = q('select Currentdate:all .z.d=distinct (`date$time) from trade', pandas=True)
    #ondedateq = q('select Currentdate:all .z.d=distinct (`date$time) from quote', pandas=True)

### RDB tables

Checks if the tables in the rdb are the same as the tables in the Tickerplant.

In [9]:
print ("RDB tables are same as Tickerplant tables : ", tables)

RDB tables are same as Tickerplant tables :  True


### RDB table counts

Checks to see if data is being sent from the Tickerplant to the RDB
* Counts for heartbeat and logmsg should be 0
* If counts for the other tables are 0, the RDB has been sent no data for today

In [10]:
tptordb.set_index(tptordb.columns.tolist())

heartbeat,logmsg,quote,quote_iex,trade,trade_iex
0,0,1218871,0,255993,0


### RDB Query
Checks to see if tables in the RDB can be queried 

#### RDB Trade table query

In [11]:
rdbtquery

Unnamed: 0,time,sym,price,size,stop,cond,ex
0,2019-10-28 15:27:06.887870,INTC,137.51,6,True,J,N
1,2019-10-28 15:27:06.887870,HPQ,33.34,81,False,L,O
2,2019-10-28 15:27:06.887870,AMD,37.22,47,False,G,N
3,2019-10-28 15:27:06.887870,AMD,37.24,32,False,R,N
4,2019-10-28 15:27:06.887870,MSFT,37.4,71,False,Z,N


#### RDB Quote table query

In [12]:
rdbqquery

Unnamed: 0,time,sym,bid,ask,bsize,asize,mode,ex
0,2019-10-28 15:27:07.685284,IBM,75.54,76.62,70,60,H,N
1,2019-10-28 15:27:07.685284,IBM,75.27,76.11,30,51,Z,N
2,2019-10-28 15:27:07.685284,IBM,75.77,76.28,22,42,Z,N
3,2019-10-28 15:27:07.685284,INTC,137.69,139.46,40,37,B,N
4,2019-10-28 15:27:07.685284,AIG,20.16,20.89,93,95,Z,O


### RDB Date Checks
Check to see if date in RDB tables is the current date

In [13]:
def color_true(val):
    color  ='pink' if val==False else 'green'
    return 'background-color: %s' %color
onedatet.style.applymap(color_true)

Unnamed: 0,Currentdate
0,True


### HDB Code

In [14]:
with qcon(host, port=find_portno('hdb1'), username=un, password=pswd,timeout=3.0) as q:
    
    #Check hdb table counts excl. eod_summary and eod_summary_iex
    hdbtablecount=q('enlist tables[]!count each value each tables[]',pandas=True)

    
    lastdayquery=q('5#select from trade where date=.z.d-1', pandas=True)
    byte_decode(lastdayquery,['sym'])
    
    lastqquery=q('5#select from quote where date=.z.d-1', pandas=True)
    byte_decode(lastqquery,['sym'])


### HDB Table Counts
* Counts for heartbeat and logmsg should be 0
* If counts for other tables are 0, no data has been recieved for the day before

In [15]:
hdbtablecount.set_index(hdbtablecount.columns.tolist())

heartbeat,logmsg,quote,quote_iex,trade,trade_iex
0,0,8176504,0,1836405,0


### HDB Query
Check to see if HDB tables can be queried

#### HDB Trade table query 

In [16]:
lastdayquery

Unnamed: 0,date,time,sym,price,size,stop,cond,ex
0,2019-10-27,2019-10-27 11:49:00.684878,MSFT,28.99,43,True,Z,N
1,2019-10-27,2019-10-27 11:49:00.889977,MSFT,29.04,36,False,A,N
2,2019-10-27,2019-10-27 11:49:00.889977,MSFT,29.04,17,False,J,N
3,2019-10-27,2019-10-27 11:49:01.885948,MSFT,28.91,54,False,A,N
4,2019-10-27,2019-10-27 11:49:04.486495,MSFT,28.92,56,False,,N


#### HDB Quote table query

In [17]:
lastqquery

Unnamed: 0,date,time,sym,bid,ask,bsize,asize,mode,ex
0,2019-10-27,2019-10-27 11:49:01.285294,MSFT,28.67,29.73,51,28,R,N
1,2019-10-27,2019-10-27 11:49:01.285294,MSFT,28.1,29.75,52,64,A,N
2,2019-10-27,2019-10-27 11:49:01.285294,MSFT,28.34,29.58,45,77,I,N
3,2019-10-27,2019-10-27 11:49:01.684866,MSFT,28.23,29.14,35,18,L,N
4,2019-10-27,2019-10-27 11:49:01.684866,MSFT,28.11,29.58,12,94,Z,N


### Process Memory Usage

Table to show memory usage of each process

In [18]:
pid=!ps -o pid,user,%mem,%cpu,command ax |grep stackid  #process status commmand for TorQ processes
df2=np.asarray(pid)                                     #make ps ouput into an array
df3=[i.split(' ') for i in df2]                         #split array by whitespace
df4=pd.DataFrame(df3)                                   #make array into a pandas dataframe
df4=df4.drop([0,2,3,5,8,9,10,11,12,13,15,16,17,18,19,20,21,22,23,24,25,26,27,28],1) #drop unwanted/empty columns
df4=df4.drop([17,18],0)                                 #drop last two rows of non-TorQ related process status info
df4.columns=('PID','USER','%MEMORY','%CPU','process')   #rename columns appropriately
df4

Unnamed: 0,PID,USER,%MEMORY,%CPU,process
0,16759,0.3,0.1,q,discovery1
1,16851,0.0,0.1,q,tickerplant1
2,16941,5.0,0.2,q,rdb1
3,17033,0.3,0.1,q,hdb1
4,17125,0.3,0.1,q,hdb2
5,17217,0.4,0.2,q,wdb1
6,17309,0.3,0.2,q,sort1
7,17401,0.5,0.1,q,gateway1
8,17493,0.5,0.2,q,monitor1
9,17585,0.3,0.1,q,housekeeping1
