### Wash Sales Case Grouping

In [1]:
import os; 
import pandas as pd; pd.set_option('mode.chained_assignment',None) 
import numpy as np
import datetime
import matplotlib.pyplot as plt
import matplotlib.mlab as mlab
import math
import re
import xlrd
from xlsxwriter.utility import xl_rowcol_to_cell
from random import sample
from src.data_models.SmartsCsvDataModel import SmartsCsvDataModel
from src.data_models.smartshelper import metrics, fixNum, unSMART, cparty, totalUP, deets, wtf, identities
from src.data_models.SmartsDataModel import SmartsDataModel
from src.utility.DataModelUtility import execute_query_data_frame

  """)


#### 1) Set Alerting Period

In [2]:
alerting_1 = datetime.date(2019,8,1)
alerting_2 = datetime.date(2019,8,31)

os.chdir('/Users/sqian/Documents/{}_output'.format(alerting_1.strftime("%Y%m")))
plt.rcParams['figure.figsize'] = (12,4)

#### 2) Import CSV data

In [3]:
rawalerts = unSMART(pd.read_excel('~/Documents/vmfldr/smarts1_{}.xlsx'.format(alerting_1.strftime("%Y%b"))))

-> type fixed, kept 179 rows, 12 columns


#### Data Prep - Alert Level

In [4]:
washDateAlert = rawalerts.loc[(rawalerts['AlertCode'] == 4042) | (rawalerts['AlertCode'] == 4041),['AlertCode','Date']].reset_index(drop=True)
# washDateAlert

#### Cases Excluding Wash

In [5]:
codelist = rawalerts.groupby('AlertCode').count().index.tolist()
fig_0 = []

for i in codelist:
    data = metrics(rawalerts.loc[rawalerts['AlertCode'] == i,'AccountIDName']).index.tolist()
    datalist = [i,len(data),data]
    fig_0.append(datalist)
    
fig_0 = pd.DataFrame(fig_0).rename(columns={0:'code',1:'countAccounts',2:'accList'})

#### Data Prep - Alert Event Level

In [6]:
newdf = pd.DataFrame()

for i in range(0,washDateAlert.shape[0]):
    temp = pd.read_csv('~/Documents/vmfldr/washtext{}/All_Securities_{}_{}.csv'.format(washDateAlert.loc[i,'Date'].strftime("%Y%m"),washDateAlert.loc[i,'AlertCode'],washDateAlert.loc[i,'Date'].strftime("%Y%m%d")))
    temp['AlertCode'] = washDateAlert.loc[i,'AlertCode']
    newdf = newdf.append(temp,sort=False)

newdf.columns = newdf.columns.str.replace(' ','')
newdf['datetime'] = pd.to_datetime(newdf['Date'],format=' %d/%m/%Y')
newdf['month'] = pd.to_datetime(newdf['datetime']).dt.month
newdf['year'] = pd.to_datetime(newdf['datetime']).dt.year

### Preliminary Figures

#### Figure A: Wash Alerts Summary

In [7]:
washalerts = rawalerts.loc[(rawalerts['AlertCode'] == 4041) | (rawalerts['AlertCode'] == 4042)]

fig_A_alerts = pd.pivot_table(washalerts.groupby(['year','month','AlertCode']).count(), values='Datetime', index=['year','month'], columns=['AlertCode'], aggfunc=np.sum).fillna(value=0).astype(int)
fig_A_alertsT = fig_A_alerts.sum(axis=1)
fig_A_events = pd.DataFrame(newdf.groupby(['year','month']).count()['datetime'])
fig_A_draft = pd.concat([fig_A_alerts, fig_A_alertsT, fig_A_events],axis=1).rename(columns={0:'alerts','datetime':'events'})

fig_A = pd.concat([fig_A_draft, pd.DataFrame(fig_A_draft.sum(axis=0)).rename(columns={0:'TOTAL'}).transpose()],axis=0)
fig_A['events/alert'] = round(fig_A['events']/fig_A['alerts'],1)
# fig_A

#### Figure B: Alert Event Count

In [8]:
date_list = [alerting_1 + datetime.timedelta(days=x) for x in range(0, 30)]
datePD = pd.DataFrame(date_list).set_index(0)
data = pd.DataFrame(newdf['datetime'].dt.date.value_counts())

plotdata = pd.concat([datePD,data['datetime']],axis=1,sort=True).fillna(0).astype(int)

In [9]:
plt.plot_date(x=plotdata.index, y=plotdata['datetime'], fmt="go-",xdate=True)
plt.ylabel("Number of Events")
plt.grid(True)
plt.xticks(rotation=20)
plt.title("{} - {}: Wash Sale Alert Events".format(alerting_1.strftime("%Y/%m/%d"),alerting_2.strftime("%Y/%m/%d")))
plt.savefig("{}_Wash_fig_B.png".format(alerting_1.strftime("%Y%b")))
plt.close()


To register the converters:
	>>> from pandas.plotting import register_matplotlib_converters
	>>> register_matplotlib_converters()


#### Figure C: Alert Events by Account

In [10]:
newdf2 = newdf[['BuyerName','Security','datetime']].rename(columns={'BuyerName':'acc'}).append(newdf[['SellerName','Security','datetime']].rename(columns={'SellerName':'acc'}))
fig_Cdraft = pd.DataFrame(newdf2['acc'].value_counts())
fig_C = pd.concat([fig_Cdraft[:10],
                   pd.DataFrame(fig_Cdraft[10:].sum()).rename(columns={0:'remaining'}).transpose(),
                   pd.DataFrame(fig_Cdraft.sum()).rename(columns={0:'TOTAL'}).transpose()],axis=0).rename(columns={'acc':'count'})
fig_C['percAcc'] = round(fig_C['count']/fig_Cdraft['acc'].sum()*100).astype(int).astype(str) + '%'

#### Figure D: Alert Events by Account Pair

In [11]:
newdf['acc1'] = newdf[['BuyerName','SellerName']].min(axis=1)
newdf['acc2'] = newdf[['BuyerName','SellerName']].max(axis=1)

newdf['pair_POV1'] = newdf['acc1'].astype(str) + ' & ' + newdf['acc2'].astype(str)
newdf['passPOV'] = (2 * (newdf['acc1'] == newdf['BuyerName'])-1) * newdf['MoneyPassAmount']
newdf = newdf.reset_index(drop=True)

In [12]:
fig_D_draft = pd.DataFrame(newdf['pair_POV1'].value_counts())
fig_D = pd.concat([fig_D_draft[:10],
                   pd.DataFrame(fig_D_draft[10:].sum()).rename(columns={0:'remaining'}).transpose(),
                   pd.DataFrame(fig_D_draft.sum()).rename(columns={0:'TOTAL'}).transpose()],axis=0).rename(columns={'pair_POV1':'count'})
fig_D['percAcc'] = round(fig_D['count']/fig_D_draft['pair_POV1'].sum()*100).astype(int).astype(str) + '%'
# fig_D

### Case Grouping

#### Case Grouping >>> Identifying Worst Accounts

In [13]:
smallestcat = 0
percentageAlerts = 0.10

while smallestcat < 0.05:
    percentageAlerts = percentageAlerts + 0.01
    
    washMetrics = pd.DataFrame(newdf2['acc'].value_counts()/newdf2['acc'].value_counts().sum())
    worst = washMetrics.loc[washMetrics['acc'] >= percentageAlerts].index.tolist()
    okay = washMetrics.loc[washMetrics['acc'] < percentageAlerts].index.tolist()


    worstaccSorted = sorted(worst, reverse=False)
    acclist = []
    for i in worstaccSorted:
        for j in worstaccSorted:
            if (i <= j):
                if (i == j): temp = str(i) + ' & all'
                elif (i in worstaccSorted) & (j in worstaccSorted): temp = str(i) + ' & ' + str(j)
                acclist.append(temp)

    newdf['CaseDesc'] = ''
    newdf['acc1str'] = newdf['acc1'].astype(str) + ' & all'
    newdf['acc2str'] = newdf['acc2'].astype(str) + ' & all'
    newdf['TV'] = newdf['Trade1Value'] + newdf['Trade2Value']

    for i in range(0,newdf.shape[0]):
        if newdf.loc[i,'pair_POV1'] in acclist: newdf.loc[i,'CaseDesc'] = newdf.loc[i,'pair_POV1']
        elif (newdf.loc[i,'acc1str'] in acclist): newdf.loc[i,'CaseDesc'] = newdf.loc[i,'acc1str']
        elif (newdf.loc[i,'acc2str'] in acclist): newdf.loc[i,'CaseDesc'] = newdf.loc[i,'acc2str']
        else: newdf.loc[i,'CaseDesc'] = 'other than ' + str(worst)


    output1 = newdf.groupby(['CaseDesc']).count()['Date'].reset_index()
    output1[['split1','split2']] = output1['CaseDesc'].str.split('&',expand=True)
    output1.loc[(output1['split2'] == 'all'),'split2'] = 999999
    output1.loc[output1['split1'] == ('other than ' + str(worst)),'split1'] = 999999
    output1['split1'] = output1['split1'].astype(int)
    output1 = output1.sort_values('split1').sort_values('split2').reset_index(drop=True).reset_index(drop=False).drop(columns=['split1','split2'])
    output1['Case'] = 'Case ' + (output1['index']+1).apply(lambda x: '{0:0>2}'.format(x))
    output2 = pd.concat([output1[['Case','CaseDesc','Date']].rename(columns={'Date':'events'}),
                         pd.DataFrame(output1[['Date']].rename(columns={'Date':'events'}).sum(axis=0)).transpose()],axis=0,sort=False)
    output2['percAcc0'] = output2['events']/output1['Date'].sum()
    output2['percAcc'] = round(output2['percAcc0']*100).astype(int).astype(str) + '%'
    output2.loc[output2['Case'].isna(),['Case','CaseDesc']] = 'TOTAL'

    smallestcat = output2['percAcc0'].min()
    
print('okay:', okay, '\nworst:', worst,sep='\n')

okay:
[1507866, 772544, 444259, 711292, 355845, 952523, 453659, 3032, 2647, 395052, 1595, 898579, 450782, 135552, 195483, 568127, 885724, 1403, 1169466]

worst:
[22440, 811415]


#### Case Grouping >>> Creating Cases

In [14]:
casemapping = output2.set_index('CaseDesc')['Case'].to_dict()

newdf['CaseNo'] = newdf['CaseDesc'].map(casemapping)
newdf['AlertID'] = 'Wash Sale ABA (' + (newdf['AlertCode']).astype(str) + ') | ' + newdf['datetime'].dt.strftime('%Y-%m-%d')

eventbd = pd.DataFrame(newdf.groupby(['CaseNo','CaseDesc']).count()['Security'])
eventbd['pct'] = round(eventbd['Security']/eventbd['Security'].sum()*100).astype(int).astype(str)+'%'
uAlerts = newdf.groupby(['CaseNo','AlertCode','CaseDesc'])['Date'].apply(pd.unique).apply(len).reset_index().groupby(['CaseNo','CaseDesc']).sum()['Date']
uPair = newdf.groupby(['CaseNo','CaseDesc'])['pair_POV1'].apply(pd.unique).apply(len)
uAcc_draft = newdf[['CaseNo','CaseDesc','acc1']].append(newdf[['CaseNo','CaseDesc','acc2']].rename(columns={'acc2':'acc1'}),sort=True)
uAcc = uAcc_draft.groupby(['CaseNo','CaseDesc'])['acc1'].apply(pd.unique).apply(len)
uSUM = round(newdf.groupby(['CaseNo','CaseDesc']).sum()['TV']/1000).astype(int)

datafix = pd.concat([newdf.groupby(['CaseNo','CaseDesc'])['datetime'].max(),newdf.groupby(['CaseNo','CaseDesc'])['datetime'].min(),newdf.groupby(['CaseNo','CaseDesc'])['datetime'].apply(pd.unique).apply(len)],axis=1)
datafix.columns=['max','min','unique']
datafix['span'] = datafix['max'] - datafix['min']
datafix = datafix.reset_index()
ltext1 = ' includes Wash Sale Alert Events triggered on activity between Accounts '
datafix['ltext'] = datafix['CaseNo'] + ltext1 + datafix['CaseDesc'] + ' between {} and {} on SMARTS Calibrated Version 1.'.format(alerting_1.strftime("%B %d, %Y"), alerting_2.strftime("%B %d, %Y"))
datafix = datafix.set_index(['CaseNo','CaseDesc'])
datafix['days'] = newdf.groupby(['CaseNo','CaseDesc'])['Date'].apply(pd.unique).apply(len)
datafix['ltext2'] = 'The alerts in this case trigger on ' + datafix['days'].astype(str) + ' days, spanning '+ datafix['span'].astype(str).str.slice(start=0, stop=7, step=1) + ' total days.'

renamecols = {'pair_POV1':'#pairs','pair_POV2':'CaseDesc','Date':'#alerts','index':'Case','Security':'#events','acc1':'#accs','TV':'washed$000'}
fig_E_draft = pd.concat([eventbd,uAlerts,uPair,uAcc,uSUM,datafix[['ltext','ltext2']]],axis=1).rename(columns=renamecols)

In [15]:
caseotherlist = pd.unique(pd.melt(newdf.loc[newdf['CaseNo'] == 'Case 04',['acc1', 'acc2']].reset_index(drop=True).reset_index(), id_vars='index', value_vars=['acc1', 'acc2'])['value']).tolist()

#### Figure E

In [16]:
eventbd = newdf.count()['Security']; eventbd
pcttotal = '100%'

uAlerts = newdf.groupby(['AlertCode','Date']).count().shape[0]; uAlerts
uPair = len(pd.unique(newdf['pair_POV1'])); uPair
uAcc_draft = newdf[['CaseNo','CaseDesc','acc1']].append(newdf[['CaseNo','CaseDesc','acc2']].rename(columns={'acc2':'acc1'}),sort=True)
uAcc = len(pd.unique(uAcc_draft['acc1'])); uAcc
uSUM = round(newdf.sum()['TV']/1000).astype(int); uSUM

fig_E_total = pd.DataFrame(['TOTAL', eventbd, pcttotal, uAlerts,uPair,uAcc,uSUM]).transpose().rename(columns={0:'CaseNo'}).set_index('CaseNo')
fig_E_total.columns = fig_E_draft.columns[:-2]

In [17]:
fig_E = pd.concat([fig_E_draft.reset_index(), fig_E_total.reset_index()],axis=0,sort=False)
fig_E = fig_E[['CaseNo', 'CaseDesc', '#events', 'pct', '#alerts', '#pairs', '#accs', 'washed$000','ltext','ltext2']]
fig_E['washed$000'] = fig_E['washed$000'].map('{:,.0f}'.format)
# fig_E

#### Figure F

In [18]:
fig_F = fig_E.reset_index()[['CaseNo','#alerts']]
# fig_F

#### Figures 1

In [19]:
output= newdf

for i in fig_E.reset_index()['CaseNo'][:-1]:
    tempdfdata = pd.DataFrame(newdf.loc[newdf['CaseNo'] == i,'datetime'].dt.date.value_counts())
    plotdata = pd.concat([datePD,tempdfdata['datetime']],axis=1,sort=True).fillna(0).astype(int)
    numba = fig_F.loc[fig_F['CaseNo'] == i].reset_index()['index'][0] + 1
    
    plt.rcParams['figure.figsize'] = (12,4)
    plt.plot_date(x=plotdata.index, y=plotdata['datetime'], fmt="go-",xdate=True)
    plt.title(i+' Alert Events')
    plt.ylabel("Events")
    plt.grid(True)
    plt.xticks(rotation=20)
    plt.savefig("{}_{}_fig_{}.1.png".format(alerting_1.strftime("%Y%b"),i,numba))
    plt.close()

#### DEBUGGING HERE

In [20]:
# acclist = pd.unique(np.concatenate(newdf.groupby('CaseNo')['Security'].apply(pd.unique)))

# idee = identities(acclist,alerting_1,alerting_2)
# for i in range(1,len(idee)): print(idee.iloc[i]['text'])

#### Figures 2

In [21]:
totalsub = fig_E.reset_index()[['CaseNo','#events','#pairs']]

fig_X = pd.concat([newdf.groupby(['CaseNo','Date']).count()['Description'],
                   newdf.groupby(['CaseNo','Date'])['pair_POV1'].apply(pd.unique).apply(len)],
                  axis=1, sort=False).rename(columns={'Description':'Events'}).reset_index(drop=False)

casepairmap = newdf.groupby('CaseNo')['pair_POV1'].apply(pd.unique).apply(len).to_dict()

fig_X['totalunique'] = fig_X['CaseNo'].map(casepairmap)
fig_X['CaseName'] = 'Wash Sale ABA |' + fig_X['Date']

fig_X2 = {}
for i in fig_F['CaseNo'][:-1]: 
    temp = fig_X.loc[fig_X['CaseNo'] == i,['CaseName','Events','pair_POV1']].sort_values('Events',ascending=False).rename(columns={'CaseName':'CaseNo','Events':'events','pair_POV1':'#pairs'})
    temp1 = pd.DataFrame(temp[10:].sum(axis=0)).transpose()
    temp1['CaseNo'] = 'remaining'
    temp2 = pd.DataFrame(temp.sum(axis=0)).transpose()
    temp2['CaseNo'] = 'TOTAL'
    fig_X2[i] = pd.concat([temp[:10], temp1, temp2], axis=0, sort=False).reset_index(drop=True)
    
# fig_X2.keys()

#### Figures 3

In [22]:
fig_X3_a = pd.DataFrame(output.groupby(['CaseNo','Security','pair_POV1']).count()['Date'])
fig_X3 = pd.pivot_table(fig_X3_a, values='Date', index=['CaseNo','pair_POV1'], columns=['Security'], aggfunc=np.sum).fillna(value=0).astype(int).reset_index().set_index('pair_POV1')
fig_X3['TOTAL'] = fig_X3.drop(columns='CaseNo').sum(axis=1)
# fig_X3

In [23]:
tradingpairsoi = output['Security'].value_counts().index.tolist()
# tradingpairsoi

In [24]:
fig_X3dict = {}

for i in fig_X3.groupby('CaseNo')['CaseNo'].count().index.tolist():
    tempdf = fig_X3.loc[fig_X3['CaseNo'] == i].sort_values('TOTAL', ascending=False)
    tempdf2 = pd.concat([tempdf[:10],
                         pd.DataFrame(tempdf[10:].sum(axis=0)).rename(columns={0:'remaining'}).transpose(),
                         pd.DataFrame(tempdf.sum(axis=0)).rename(columns={0:'TOTAL'}).transpose()], axis=0)
    tempdf2.loc[tempdf2['CaseNo'] != i,'CaseNo'] = '-'
    tempdf3 = tempdf2.drop(columns='CaseNo').reset_index(drop=False).rename(columns={'index':'Account Pairs'})
    fig_X3dict[i] = tempdf3.loc[tempdf3['TOTAL'] > 0]

# fig_X3dict.keys()

#### Figures 4

In [25]:
fig_X4 = pd.concat([output.groupby('pair_POV1')['CaseNo'].max(),
                    metrics(output['pair_POV1']),
                    output.groupby('pair_POV1').sum().astype(int)[['passPOV','TV']].sort_values('TV',ascending=False)],
                   axis=1,sort=False)
fig_X4['passperc'] = round(fig_X4['passPOV']/fig_X4['TV']*100,1).astype(str) + '%'
fig_X4['TV000'] = round(fig_X4['TV']/1000)

fig_X4 = fig_X4.sort_values('CaseNo').reset_index()
fig_X4[['party1','party2']] = fig_X4['index'].str.split(' & ',expand=True)
# fig_X4

In [26]:
listt = fig_X4[['CaseNo','party1']].rename(columns={'party1':'party2'})
acccasemap = listt.groupby('CaseNo')['party2'].apply(lambda x: x.value_counts().index[0]).to_dict()

fig_X4['Case_mainacc'] = fig_X4['CaseNo'].map(acccasemap)
fig_X4['bool'] = 2 * (fig_X4['Case_mainacc'] == fig_X4['party1']) - 1
fig_X4['passAdj'] = fig_X4['passPOV'] * fig_X4['bool']
fig_X4['adjindex'] = fig_X4['party2'] + ' & ' + fig_X4['party1']

def func(row):
    if row['bool'] == -1: return row['adjindex']
    else: return row['index']

fig_X4['combo'] = fig_X4.apply(func, axis=1)
fig_X4final = fig_X4[['CaseNo','combo','count','passAdj','TV000']]

In [27]:
fig_X4dict = {}

for i in fig_X4final.groupby('CaseNo')['CaseNo'].count().index.tolist():  
    part0 = fig_X4final.loc[fig_X4final['CaseNo'] == i].sort_values('count', ascending=False)
    part00 = pd.DataFrame(part0[10:].sum(axis=0)).transpose()
    part00[['CaseNo','combo']] = 'remaining'
    
    part1 = part0[:10].append(part00, sort=True).reset_index(drop=True)
    part2 = pd.DataFrame(part1.sum(axis=0)).transpose()
    part2[['CaseNo','combo']] = 'TOTAL'
    
    final = pd.concat([part1,part2],axis=0)
    final = final[(final['TV000'] != 0)]
    
    final['%passed'] = round((final['passAdj']/(final['TV000']*1000)*100).astype(float),1).astype(str) + '%'
    fig_X4dict[i] = final[['CaseNo','combo','count','passAdj','TV000' ,'%passed']]

#### Figures 7

In [28]:
data  = pd.DataFrame(rawalerts.groupby(['Date','AlertCode','ShortText_mod','AccountIDName']).count()['Datetime'])

fig_X7a = pd.pivot_table(data, values='Datetime', index=['AlertCode'], columns=['AccountIDName'], aggfunc=np.sum).fillna(value=0).astype(int).dropna(axis='columns',how='all')
fig_X7a = pd.concat([fig_X7a,pd.DataFrame(fig_X7a.sum(axis=0)).transpose()],axis=0)
fig_X7b = fig_X7a.drop(columns=[0])[worst]
fig_X7b['TOTAL'] = fig_X7a.sum(axis=1)

for i in worst:
    fig_X7b[str(i)+'_%'] = round(fig_X7b[i] / fig_X7b['TOTAL']*100).fillna(0).astype(int).astype(str) + '%'
fig_X7c = pd.concat([fig_X7b,data.reset_index()[['AlertCode','ShortText_mod']].drop_duplicates().set_index('AlertCode')], axis=1).reset_index().rename(columns={'index':'AlertCode'})
fig_X7c.loc[fig_X7c['ShortText_mod'].isna(),'AlertCode'] = 99999

fig_X7c.columns = fig_X7c.columns.astype(str)
fig_X7 = fig_X7c[['AlertCode','ShortText_mod', '22440', '22440_%', '811415', '811415_%', 'TOTAL']].sort_values('AlertCode')
# fig_X7

In [29]:
otheralerts = data.reset_index().loc[~data.reset_index()['AlertCode'].isin([4040,4041,4042])]
dataspoof = otheralerts.reset_index().loc[data.reset_index()['AlertCode'].isin([4022,4023,4032])]

fig7_lastcase = pd.concat([otheralerts.groupby('AccountIDName')['Datetime'].sum(),dataspoof.groupby('AccountIDName')['Datetime'].sum()],axis=1).fillna(0).astype(int)
fig7_lastcase.columns=['totalAlerts','spoofAlerts']
fig7_lastcase2 = fig7_lastcase.sort_values('totalAlerts', ascending=False)
fig7_lastcase2[fig7_lastcase2.columns + '_%'] = round(fig7_lastcase2/fig7_lastcase2.sum()*100,0).astype(int).astype(str)+'%'
fig7_other = fig7_lastcase2
# fig7_other

#### Figure 8

In [30]:
# list_symbol = str(pd.unique(np.concatenate(newdf.groupby('CaseNo')['Security'].apply(pd.unique))).tolist()).replace('[','').replace(']','')

# acclist = pd.concat([newdf['acc1'],newdf['acc2']], axis=0).value_counts()/pd.concat([newdf['acc1'],newdf['acc2']], axis=0).count()
# list_account = str(acclist.loc[acclist >= 0.05].index.tolist()).replace('[','').replace(']',''); list_account

In [31]:
# query2 = """
#         SELECT *
#         FROM ms_prod.smarts_raw_data
#         WHERE event_type = 'Place' and symbol in ({sec}) and account_id in ({acc}) and event_date between '{t1}' and '{t2}' 
#         """.format(sec = list_symbol,acc = list_account,t1 = alerting_1,t2 = alerting_2)
# new = execute_query_data_frame(query2, 'gemrdsdb', ssh = 'interim')
# new.columns = new.columns.str.replace('_crypto','')
# new['date_time'] = new['event_date'] + pd.to_timedelta(new['event_time'].astype(str)) + new['event_millis']

# gb = new.groupby(['account_id','symbol','order_type','execution_options','side']).count()
# table = pd.pivot_table(gb, columns='side', index=['account_id','symbol','order_type','execution_options'], values='event_id')
# table1 = pd.concat([table,table.sum(axis=1)], axis=1).rename(columns={0:'total'}).sort_values('total',ascending=False)

#### Wash Case Figures

writer = pd.ExcelWriter('{}_Wash.xlsx'.format(alerting_1.strftime("%Y%b")), engine='xlsxwriter')

identities(worst,alerting_1,alerting_2).to_excel(writer, 'id')

fig_A.to_excel(writer, 'f.A')
# fig_B is a PLOT
fig_C.to_excel(writer, 'f.C')
fig_D.to_excel(writer, 'f.D')
fig_E.to_excel(writer, 'f.E')
fig_F.to_excel(writer, 'f.F')

# fig7
fig_X7.to_excel(writer, 'f.7')  
fig7_other.to_excel(writer, 'f.7+')

# fig1 plots
for i in fig_F['CaseNo'][:-1]: 
    fig_X2[i].to_excel(writer,'{}.{}'.format(i.replace('Case ','C'),'f2'))
    fig_X3dict[i].to_excel(writer,'{}.{}'.format(i.replace('Case ','C'),'f3'))
    fig_X4dict[i].to_excel(writer,'{}.{}'.format(i.replace('Case ','C'),'f4'))

# Alert/Case mapping
outputmap = []
output[['Date','AlertID','CaseNo','pair_POV1','Description','Security','TV','passPOV']].to_excel(writer, 'EventMapping')

for i in output.groupby('AlertID').sum().index:
    toprint = output.loc[output['AlertID'] == i,'CaseNo'].value_counts().index.tolist()
    outputmap.append([i,sorted(toprint)])
pd.DataFrame(outputmap).to_excel(writer, 'AlertMapping')

writer.save()

In [33]:
identities(worst,alerting_1,alerting_2).to_excel(writer, 'id')

INFO:paramiko.transport:Connected (version 2.0, client OpenSSH_7.4p1)
INFO:paramiko.transport:Authentication (publickey) successful!


ssh_connect 0:00:00.621077


INFO:root:Loaded dataframe with 2 rows in 0:00:00.879523.
INFO:paramiko.transport:Connected (version 2.0, client OpenSSH_7.4p1)
INFO:paramiko.transport:Authentication (publickey) successful!


ssh_connect 0:00:00.586097


INFO:root:Loaded dataframe with 4 rows in 0:00:10.640640.
