In [1]:
!pip install networkx==2.1



In [2]:
import os
import math
import numpy as np
import pandas as pd
import hashlib
import warnings
from datetime import datetime, timedelta

import networkx as nx
from networkx.algorithms import community

warnings.filterwarnings('ignore')

## Load Dataset

In [3]:
# We are gonna load new trxs
df_trx = pd.read_parquet('../data/dataset_20220105_153428.parquet')

df_trx.columns = [
    'trx_id', 
    'created_date', 
    'amt', 
    'hash', 
    'card_holder',
    'user_email', 
    'is_cbk', 
    'cbk_date_created', 
    'amt_cbk'
]

df_trx['status'] = 1
df_trx.loc[(df_trx.is_cbk == 0) & ((df_trx.hash.str.endswith('2')) | (df_trx.hash.str.endswith('6'))) ,'status'] = 0

df_trx = df_trx.sort_values('created_date').reset_index(drop=True)

In [4]:
df_trx = df_trx[0:1000] # Wall time: 2.16 s # Wall time: 2.19 s
# df_trx = df_trx[1000:2000] # Wall time: 2.66 s # Wall time: 2.77 s
# df_trx = df_trx[2000:4000] # Wall time: 6.54 s # Wall time: 6.83 s
# df_trx = df_trx[4000:8000] # Wall time: 24.1 s # Wall time: 26.1 s
# df_trx = df_trx[8000:16000] # Wall time: 1min 6s # Wall time: 1min 12s
# df_trx = df_trx[16000:] # Wall time: 1min 34s # Wall time: 1min 41s

In [5]:
df_trx

Unnamed: 0,trx_id,created_date,amt,hash,card_holder,user_email,is_cbk,cbk_date_created,amt_cbk,status
16000,404180994,2021-02-18 17:08:05,396.16,300d048574c46532bdfd28f99304dcccf92fa8d2a20fcc...,bb43b28e514bacaef4ce02433056c3b9ef59beea71717e...,b597fa7e66bfa1c41ee8a1964fb4669f27d1f9e778d4e0...,1,2021-03-31 14:37:29,396.16,1
16001,404205592,2021-02-18 17:21:06,5.32,35efcd5fa47f2f701c241f392c09a6aafe46d31e1a8c74...,2362b4fcbb2ffe622a969519e9d56be9eab6bc8e0a5f61...,31dab99e7758cc7a79fd497df735d37d1bc932e364f865...,0,NaT,0.00,1
16002,404209390,2021-02-18 17:23:07,214.69,d94371b4b25a7d635247eb6dd65345d7441ff1b4e477a5...,bb43b28e514bacaef4ce02433056c3b9ef59beea71717e...,52bf6b6ec5cfa6793c22c9cd52b8932aa4bd19fb881ce2...,1,2021-03-31 14:37:25,214.69,1
16003,404263495,2021-02-18 17:53:39,10.66,35efcd5fa47f2f701c241f392c09a6aafe46d31e1a8c74...,2362b4fcbb2ffe622a969519e9d56be9eab6bc8e0a5f61...,31dab99e7758cc7a79fd497df735d37d1bc932e364f865...,0,NaT,0.00,1
16004,404323065,2021-02-18 18:27:16,26.42,b6983cbc5818f11a332577d11140dcdc4f17be022eb29d...,7e559daf31199c1682dccc09dc3e4fc1b6632ce724465b...,59502c2fb396ba0a211b982512ec09f7dc529b721a0698...,0,NaT,0.00,1
...,...,...,...,...,...,...,...,...,...,...
22978,1371950018,2022-01-03 21:55:20,7.47,773465039472b859d7682eb0e57d14ec22632b0e216b82...,0c3e7d7d22f38046bc2fbb136bf08f4d73e23f44be5d59...,,0,NaT,0.00,0
22979,1371972779,2022-01-03 22:04:19,2.24,2db8ef94cda136be46d170d10f7199f06a81d4e0cf5f16...,ec30121adef022c2cf956686aa123da6b100578560db4f...,,0,NaT,0.00,0
22980,1372009794,2022-01-03 22:18:45,3.74,995ae622bf219e7f0e692677d806984e902845fb8cb112...,b956e037182da51d39b32dbf14762ddbc68d56fcae878c...,,0,NaT,0.00,1
22981,1372107100,2022-01-03 22:57:09,2.99,1e874901b4ac23f6b636cc509850d04584fc9842438bc7...,510c3dde1ebfa9f8ccf11dd96570e8ef8a5cb0e005e198...,,0,NaT,0.00,0


## Graph Functions

In [6]:
def build_graph(df, pivot_column, edge_columns, attr_columns, identify_subgraphs=True):
    
    """
    Function to build graph from dataframe
    :param df: Dataframe with data to build graph
    :param pivot_column: pivot column
    :param edge_columns: list with other nodes to build graph
    :param attr_columns: list with attributes to add
    :param identify_subgraphs: flag True to return subgraphs
    :return G, sG: Complete graph and subgraphs objects
    """

    df_edge = pd.DataFrame()
    
    for col in edge_columns:
        df_edge_temp = df[[pivot_column, col]]
        df_edge_temp.columns = ['source', 'target']
        df_edge_temp = df_edge_temp[~df_edge_temp.source.isnull()]
        df_edge_temp = df_edge_temp[~df_edge_temp.target.isnull()]
        df_edge_temp.source = pivot_column + '-' + df_edge_temp.source.astype('str')
        df_edge_temp.target = col + '-' + df_edge_temp.target.astype('str')
        df_edge = df_edge.append(df_edge_temp, ignore_index=True)
    df_edge = df_edge.drop_duplicates()

    # 1. Create graph
    G = nx.from_pandas_edgelist(
        df_edge, 
        source='source', 
        target='target'
    )
    
    # 2. Add Attributes
    # Dataframe with attributes to add
    df_attr = df[[pivot_column] + attr_columns]
    df_attr[pivot_column] = pivot_column + '-' + df_attr[pivot_column].astype('str')

    # Build dictionary to add attributes
    node_attr = df_attr[[pivot_column] + attr_columns].set_index(pivot_column).to_dict('index')

    # Set attributes to nodes
    nx.set_node_attributes(G, node_attr)

    # 3. Identify subgraphs
    if identify_subgraphs:
        sG = nx.connected_component_subgraphs(G)
        return G, sG
    else:
        return G

def add_info_to_graph(G, df, pivot_column, edge_columns, attr_columns, identify_subgraphs=True):

    df_edge = pd.DataFrame()
    
    for col in edge_columns:
        df_edge_temp = df[[pivot_column, col]]
        df_edge_temp.columns = ['source', 'target']
        df_edge_temp = df_edge_temp[~df_edge_temp.source.isnull()]
        df_edge_temp = df_edge_temp[~df_edge_temp.target.isnull()]
        df_edge_temp.source = pivot_column + '-' + df_edge_temp.source.astype('str')
        df_edge_temp.target = col + '-' + df_edge_temp.target.astype('str')
        df_edge = df_edge.append(df_edge_temp, ignore_index=True)
    df_edge = df_edge.drop_duplicates()

    # 1. Create graph
    H = nx.from_pandas_edgelist(
        df_edge, 
        source='source', 
        target='target'
    )
    
    # 2. Add Attributes
    # Dataframe with attributes to add
    df_attr = df[[pivot_column] + attr_columns]
    df_attr[pivot_column] = pivot_column + '-' + df_attr[pivot_column].astype('str')

    # Build dictionary to add attributes
    node_attr = df_attr[[pivot_column] + attr_columns].set_index(pivot_column).to_dict('index')

    # Set attributes to nodes
    nx.set_node_attributes(H, node_attr)
    
    # 3. Combine both graphs
    F = nx.compose(G,H)

    # 3. Identify subgraphs
    if identify_subgraphs:
        sF = nx.connected_component_subgraphs(F)
        return F, sF
    else:
        return F
    
# Function to caclulate Average from list
def average(lst):
    return sum(lst) / len(lst) if len(lst) > 0 else 0

# Function to calculate min, avg and max distance between a source and multiple target nodes
def distance_to(graph, source, target):
    if len(target) == 0:
        return 0,0,0
    
    distance = []
    for _target in target:
        path = nx.shortest_path(
            subgraph,
            source=source,
            target=_target
        )
        _distance = len(path)-1
        distance.append(_distance)
    
    return min(distance), round(average(distance),4), max(distance)

In [7]:
DOMAINS = [
    "HASH",
    "BIN",
    "USER_EMAIL",
    "USER_IP",
    "USER_PHONE",
    "USER_DOCUMENT",
    "CARD_HOLDER",
    "DEVICE_ID"
]

PIVOT_COLUM = 'trx_id'

EDGE_COLUMNS = [
    'hash',
    'card_holder', 
    'user_email'
]

ATTR_COLUMNS = [
    'status', 
    'is_cbk', 
    'amt'
]

calc_date = str(datetime.now()).replace(' ','T')
graph_file = 'graphs/G.pkl'
graph_file_bkp = (f'graphs/G_{calc_date}.pkl')
features_file = 'stats/df_agg_features.parquet'
features_file_bkp = f'stats/df_agg_features_{calc_date}.parquet'

In [8]:
%%time
# List with new trx added to graph
trx_added = df_trx.trx_id.to_list()
trx_added = ['trx_id-'+str(x) for x in trx_added]

try:
    # Load pre calculated graph
    G_loaded = nx.read_gpickle(graph_file)
    
    # Add new trx to graph
    G = add_info_to_graph(
        G_loaded, 
        df_trx, 
        PIVOT_COLUM, 
        EDGE_COLUMNS, 
        ATTR_COLUMNS, 
        False
    )
    print('Successfully LOADED G')
    print(f'Successfully ADDED new trxs ({len(trx_added)})')

except:
    # Build graph
    G = build_graph(
        df_trx, 
        PIVOT_COLUM, 
        EDGE_COLUMNS, 
        ATTR_COLUMNS, 
        False
    )
    print('Successfully CREATED G')

# Identify subgraphs updated
sG = nx.connected_component_subgraphs(G)
graphs = list(sG)
sG_new = [sg for sg in graphs if any(x in list(sg.nodes()) for x in trx_added) == True]
print('Successfully IDENTIFIED SG')

# LOCAL TESTING ONLY
# Dataframe with agg features per domain
try:
    df_agg_features = pd.read_parquet(features_file)
except:
    df_agg_features = pd.DataFrame()

num_sg = 0

# Iterate over every subgraph and get stats from ones with new added trxs
for subgraph in sG_new:
    
    # Subgraph with new trx
    sG_nodes = list(subgraph.nodes())
        
    sG_nodes_trx_id = [node for node in sG_nodes if node.startswith('trx_id')]
    attributes_in_sG = [subgraph.node[x] for x in sG_nodes_trx_id]
    sG_nodes_trx_id_apr = [node for node in sG_nodes if node.startswith('trx_id') and subgraph.node[node]['status']==1]
    sG_nodes_trx_id_rej = [node for node in sG_nodes if node.startswith('trx_id') and subgraph.node[node]['status']==0]
    sG_nodes_trx_id_cbk = [node for node in sG_nodes if node.startswith('trx_id') and subgraph.node[node]['is_cbk']==1]
    sG_nodes_domains = [node for node in sG_nodes if not node.startswith('trx_id')]

    # Graph General Features
    graph_agg_features = dict()

    graph_agg_features['graph_q_att'] = len(sG_nodes_trx_id)
    graph_agg_features['graph_q_apr'] = sum(item['status'] for item in attributes_in_sG)
    graph_agg_features['graph_q_rej'] = len(sG_nodes_trx_id) - sum(item['status'] for item in attributes_in_sG)
    graph_agg_features['graph_q_cbk'] = sum(item['is_cbk'] for item in attributes_in_sG)
    graph_agg_features['graph_ratio_q_apr'] = round(graph_agg_features['graph_q_apr'] / graph_agg_features['graph_q_att'], 4)
    graph_agg_features['graph_ratio_q_rej'] = round(graph_agg_features['graph_q_rej'] / graph_agg_features['graph_q_att'], 4)
    graph_agg_features['graph_ratio_q_cbk'] = round(graph_agg_features['graph_q_cbk'] / graph_agg_features['graph_q_att'], 4)

    graph_agg_features['graph_amt_att'] = round(sum(item['amt'] for item in attributes_in_sG), 4)
    graph_agg_features['graph_amt_apr'] = round(sum(item['amt'] for item in attributes_in_sG if item['status']==1), 4)
    graph_agg_features['graph_amt_rej'] = round(graph_agg_features['graph_amt_att'] - graph_agg_features['graph_amt_apr'], 4)
    graph_agg_features['graph_amt_cbk'] = round(sum(item['amt'] for item in attributes_in_sG if item['is_cbk']==1), 4)
    graph_agg_features['graph_ratio_amt_apr'] = round(graph_agg_features['graph_amt_apr'] / graph_agg_features['graph_amt_att'], 4)
    graph_agg_features['graph_ratio_amt_rej'] = round(graph_agg_features['graph_amt_rej'] / graph_agg_features['graph_amt_att'], 4)
    graph_agg_features['graph_ratio_amt_cbk'] = round(graph_agg_features['graph_amt_cbk'] / graph_agg_features['graph_amt_att'], 4)

    # Q attr in graph
    for _domain in DOMAINS:
        graph_agg_features[f'graph_q_{_domain.lower()}'] = len(list([x for x in sG_nodes if x.startswith(_domain.lower())]))

    graph_agg_features['graph_avg_shortest_path_length'] = round(nx.average_shortest_path_length(subgraph), 5)
    graph_agg_features['graph_degree_pearson_corr_coef'] = round(nx.degree_pearson_correlation_coefficient(subgraph), 5)

    nodes_degree_centrality = nx.degree_centrality(subgraph)
    nodes_closeness_centrality = nx.closeness_centrality(subgraph)
    nodes_betweenness_centrality = nx.betweenness_centrality(subgraph)    

    # Domain Agg Features
    for node_domain in sG_nodes_domains:

        domain_agg_features = dict()

        domain_agg_features['graph_node_num_neighbors'] = len(list(nx.neighbors(subgraph, node_domain)))
        domain_agg_features['graph_node_degree_centrality'] = round(nodes_degree_centrality[node_domain], 5)
        domain_agg_features['graph_node_closeness_centrality'] = round(nodes_closeness_centrality[node_domain], 5)
        domain_agg_features['graph_node_between_centrality'] = round(nodes_betweenness_centrality[node_domain], 5)

        dist_to_apr = distance_to(subgraph, node_domain, sG_nodes_trx_id_apr)
        dist_to_rej = distance_to(subgraph, node_domain, sG_nodes_trx_id_rej)
        dist_to_cbk = distance_to(subgraph, node_domain, sG_nodes_trx_id_cbk)

        domain_agg_features['graph_node_min_dist_to_apr'] = dist_to_apr[0]
        domain_agg_features['graph_node_avg_dist_to_apr'] = dist_to_apr[1]
        domain_agg_features['graph_node_max_dist_to_apr'] = dist_to_apr[2]

        domain_agg_features['graph_node_min_dist_to_rej'] = dist_to_rej[0]
        domain_agg_features['graph_node_avg_dist_to_rej'] = dist_to_rej[1]
        domain_agg_features['graph_node_max_dist_to_rej'] = dist_to_rej[2]

        domain_agg_features['graph_node_min_dist_to_cbk'] = dist_to_cbk[0]
        domain_agg_features['graph_node_avg_dist_to_cbk'] = dist_to_cbk[1]
        domain_agg_features['graph_node_max_dist_to_cbk'] = dist_to_cbk[2]

        # Combine general features with domain specifics
        agg_features = graph_agg_features.copy()
        agg_features.update(domain_agg_features)  

        # TODO
        # Put key value to redis
        # redis.set(node_domain, str(agg_features))

        # LOCAL TESTING ONLY         
        graphItem = {
            'key': node_domain,
            'value': str(agg_features),
            'created_date': calc_date.replace('T',' ')[:19]
        }
        df_agg_features = df_agg_features.append(pd.DataFrame([graphItem]), ignore_index=True)
    
    num_sg+=1

print(f'Total Number of sG: {str(len(graphs))}')
print(f'Number of sG updated: {str(num_sg)}')
    
# Save graphs for next calculation and tracking
nx.write_gpickle(G, graph_file)
nx.write_gpickle(G, graph_file_bkp)

# LOCAL TESTING ONLY
# Save stats for audit and tracking
df_agg_features.to_parquet(features_file)
df_agg_features.to_parquet(features_file_bkp)

Successfully LOADED G
Successfully ADDED new trxs
Successfully IDENTIFIED SG
Total Number of sG: 10123
Number of sG updated: 2771
CPU times: user 1min 41s, sys: 338 ms, total: 1min 41s
Wall time: 1min 41s


In [9]:
df_agg_features

Unnamed: 0,key,value,created_date
0,user_email-5e84a8977a6663a50915393a5f6478938cb...,"{'graph_q_att': 1, 'graph_q_apr': 1, 'graph_q_...",2022-05-12 23:08:26
1,hash-7017577081ed6de8cce6aaf59c486949d9c03a86b...,"{'graph_q_att': 1, 'graph_q_apr': 1, 'graph_q_...",2022-05-12 23:08:26
2,card_holder-70057fda68453cbd95944f3864a2da6747...,"{'graph_q_att': 1, 'graph_q_apr': 1, 'graph_q_...",2022-05-12 23:08:26
3,hash-8a00ecd83f0b13af06106c76d1662cd4fe3cbcd78...,"{'graph_q_att': 1, 'graph_q_apr': 1, 'graph_q_...",2022-05-12 23:08:26
4,user_email-d88ae14b2c2f5edacb3fc60e9615130dd5a...,"{'graph_q_att': 1, 'graph_q_apr': 1, 'graph_q_...",2022-05-12 23:08:26
...,...,...,...
36262,card_holder-b956e037182da51d39b32dbf14762ddbc6...,"{'graph_q_att': 1, 'graph_q_apr': 1, 'graph_q_...",2022-05-12 23:12:06
36263,hash-995ae622bf219e7f0e692677d806984e902845fb8...,"{'graph_q_att': 1, 'graph_q_apr': 1, 'graph_q_...",2022-05-12 23:12:06
36264,user_email-57e8c2fea0822295a12242166cab20f87ac...,"{'graph_q_att': 1, 'graph_q_apr': 1, 'graph_q_...",2022-05-12 23:12:06
36265,hash-d0652778f8976ff29b6f03b6ab2a0cab4015bc3f5...,"{'graph_q_att': 1, 'graph_q_apr': 1, 'graph_q_...",2022-05-12 23:12:06


In [10]:
df_agg_features[df_agg_features.key == 'user_email-e95f92d5fd98d7ba95c148ee17f17023551734fd542a0db5cf42bf6eefa6ab8e']

Unnamed: 0,key,value,created_date
566,user_email-e95f92d5fd98d7ba95c148ee17f17023551...,"{'graph_q_att': 16, 'graph_q_apr': 13, 'graph_...",2022-05-12 23:08:26
1462,user_email-e95f92d5fd98d7ba95c148ee17f17023551...,"{'graph_q_att': 22, 'graph_q_apr': 19, 'graph_...",2022-05-12 23:08:48
2842,user_email-e95f92d5fd98d7ba95c148ee17f17023551...,"{'graph_q_att': 49, 'graph_q_apr': 44, 'graph_...",2022-05-12 23:09:11
5593,user_email-e95f92d5fd98d7ba95c148ee17f17023551...,"{'graph_q_att': 67, 'graph_q_apr': 62, 'graph_...",2022-05-12 23:09:41
14350,user_email-e95f92d5fd98d7ba95c148ee17f17023551...,"{'graph_q_att': 111, 'graph_q_apr': 106, 'grap...",2022-05-12 23:10:33
27584,user_email-e95f92d5fd98d7ba95c148ee17f17023551...,"{'graph_q_att': 115, 'graph_q_apr': 110, 'grap...",2022-05-12 23:12:06


In [11]:
df_agg_features[df_agg_features.key == 'user_email-e95f92d5fd98d7ba95c148ee17f17023551734fd542a0db5cf42bf6eefa6ab8e'].reset_index(drop=True)[0:1].value.values

array(["{'graph_q_att': 16, 'graph_q_apr': 13, 'graph_q_rej': 3, 'graph_q_cbk': 0, 'graph_ratio_q_apr': 0.8125, 'graph_ratio_q_rej': 0.1875, 'graph_ratio_q_cbk': 0.0, 'graph_amt_att': 242.51, 'graph_amt_apr': 231.77, 'graph_amt_rej': 10.74, 'graph_amt_cbk': 0, 'graph_ratio_amt_apr': 0.9557, 'graph_ratio_amt_rej': 0.0443, 'graph_ratio_amt_cbk': 0.0, 'graph_q_hash': 2, 'graph_q_bin': 0, 'graph_q_user_email': 3, 'graph_q_user_ip': 0, 'graph_q_user_phone': 0, 'graph_q_user_document': 0, 'graph_q_card_holder': 2, 'graph_q_device_id': 0, 'graph_avg_shortest_path_length': 2.97233, 'graph_degree_pearson_corr_coef': -0.62675, 'graph_node_num_neighbors': 11, 'graph_node_degree_centrality': 0.5, 'graph_node_closeness_centrality': 0.35484, 'graph_node_between_centrality': 0.07937, 'graph_node_min_dist_to_apr': 1, 'graph_node_avg_dist_to_apr': 1.3077, 'graph_node_max_dist_to_apr': 3, 'graph_node_min_dist_to_rej': 5, 'graph_node_avg_dist_to_rej': 5.6667, 'graph_node_max_dist_to_rej': 7, 'graph_node_

In [12]:
df_agg_features[df_agg_features.key == 'user_email-e95f92d5fd98d7ba95c148ee17f17023551734fd542a0db5cf42bf6eefa6ab8e'].reset_index(drop=True)[1:2].value.values

array(["{'graph_q_att': 22, 'graph_q_apr': 19, 'graph_q_rej': 3, 'graph_q_cbk': 0, 'graph_ratio_q_apr': 0.8636, 'graph_ratio_q_rej': 0.1364, 'graph_ratio_q_cbk': 0.0, 'graph_amt_att': 408.03, 'graph_amt_apr': 397.29, 'graph_amt_rej': 10.74, 'graph_amt_cbk': 0, 'graph_ratio_amt_apr': 0.9737, 'graph_ratio_amt_rej': 0.0263, 'graph_ratio_amt_cbk': 0.0, 'graph_q_hash': 2, 'graph_q_bin': 0, 'graph_q_user_email': 4, 'graph_q_user_ip': 0, 'graph_q_user_phone': 0, 'graph_q_user_document': 0, 'graph_q_card_holder': 2, 'graph_q_device_id': 0, 'graph_avg_shortest_path_length': 2.91034, 'graph_degree_pearson_corr_coef': -0.6817, 'graph_node_num_neighbors': 16, 'graph_node_degree_centrality': 0.55172, 'graph_node_closeness_centrality': 0.39189, 'graph_node_between_centrality': 0.09852, 'graph_node_min_dist_to_apr': 1, 'graph_node_avg_dist_to_apr': 1.3158, 'graph_node_max_dist_to_apr': 3, 'graph_node_min_dist_to_rej': 5, 'graph_node_avg_dist_to_rej': 5.6667, 'graph_node_max_dist_to_rej': 7, 'graph_no

In [13]:
df_agg_features[df_agg_features.key == 'user_email-e95f92d5fd98d7ba95c148ee17f17023551734fd542a0db5cf42bf6eefa6ab8e'].reset_index(drop=True)[2:3].value.values

array(["{'graph_q_att': 49, 'graph_q_apr': 44, 'graph_q_rej': 5, 'graph_q_cbk': 0, 'graph_ratio_q_apr': 0.898, 'graph_ratio_q_rej': 0.102, 'graph_ratio_q_cbk': 0.0, 'graph_amt_att': 682.66, 'graph_amt_apr': 662.65, 'graph_amt_rej': 20.01, 'graph_amt_cbk': 0, 'graph_ratio_amt_apr': 0.9707, 'graph_ratio_amt_rej': 0.0293, 'graph_ratio_amt_cbk': 0.0, 'graph_q_hash': 2, 'graph_q_bin': 0, 'graph_q_user_email': 4, 'graph_q_user_ip': 0, 'graph_q_user_phone': 0, 'graph_q_user_document': 0, 'graph_q_card_holder': 2, 'graph_q_device_id': 0, 'graph_avg_shortest_path_length': 2.32456, 'graph_degree_pearson_corr_coef': -0.8141, 'graph_node_num_neighbors': 43, 'graph_node_degree_centrality': 0.76786, 'graph_node_closeness_centrality': 0.69136, 'graph_node_between_centrality': 0.38165, 'graph_node_min_dist_to_apr': 1, 'graph_node_avg_dist_to_apr': 1.1364, 'graph_node_max_dist_to_apr': 3, 'graph_node_min_dist_to_rej': 1, 'graph_node_avg_dist_to_rej': 2.2, 'graph_node_max_dist_to_rej': 3, 'graph_node_mi

In [14]:
df_agg_features[df_agg_features.key == 'user_email-e95f92d5fd98d7ba95c148ee17f17023551734fd542a0db5cf42bf6eefa6ab8e'].reset_index(drop=True)[3:4].value.values

array(["{'graph_q_att': 67, 'graph_q_apr': 62, 'graph_q_rej': 5, 'graph_q_cbk': 0, 'graph_ratio_q_apr': 0.9254, 'graph_ratio_q_rej': 0.0746, 'graph_ratio_q_cbk': 0.0, 'graph_amt_att': 1029.61, 'graph_amt_apr': 1009.6, 'graph_amt_rej': 20.01, 'graph_amt_cbk': 0, 'graph_ratio_amt_apr': 0.9806, 'graph_ratio_amt_rej': 0.0194, 'graph_ratio_amt_cbk': 0.0, 'graph_q_hash': 2, 'graph_q_bin': 0, 'graph_q_user_email': 4, 'graph_q_user_ip': 0, 'graph_q_user_phone': 0, 'graph_q_user_document': 0, 'graph_q_card_holder': 2, 'graph_q_device_id': 0, 'graph_avg_shortest_path_length': 2.25153, 'graph_degree_pearson_corr_coef': -0.85815, 'graph_node_num_neighbors': 61, 'graph_node_degree_centrality': 0.82432, 'graph_node_closeness_centrality': 0.74747, 'graph_node_between_centrality': 0.37431, 'graph_node_min_dist_to_apr': 1, 'graph_node_avg_dist_to_apr': 1.0968, 'graph_node_max_dist_to_apr': 3, 'graph_node_min_dist_to_rej': 1, 'graph_node_avg_dist_to_rej': 2.2, 'graph_node_max_dist_to_rej': 3, 'graph_nod