## Импорт необходимых библиотек 

In [None]:
import pandas as pd
import numpy as np
from graphviz import Digraph

import warnings
warnings.filterwarnings("ignore")

## Загружаем данные

In [None]:
data = pd.read_csv('prepared_data.csv', sep=';')
data.shape

In [None]:
data.head(10)

### Находим связи между людьми

In [None]:
def get_values_for_graph(df):
    
    edgesess = df.values.tolist() 
    graph = {}
    
    for summa, a, b in edgesess:
        if a not in graph:
            graph[a] = {b:summa}
        else:
            graph[a].update({b:summa})
            
    for summa, a, b in edgesess:
        if b not in graph:
            graph[b] = {a:summa}
        else:
            graph[b].update({a:summa})
            
    return graph

In [None]:
graph = get_values_for_graph(data)

## Находим полносвязные группы

In [None]:
def coherence(vertex, visited, temp_visited, graph):
    
    if vertex not in visited:
        visited.append(vertex)
        temp_visited.append(vertex)
        
        for neighbor in graph[vertex].keys():
            if neighbor not in visited:
                coherence(neighbor, visited, temp_visited, graph)
        
    return visited

In [None]:
nodes = np.unique(data['Payer'].values.tolist() + \
                  data['Recipient'].values.tolist())
fully_con = []
visited = []

for vertex in nodes:
    temp_visited = []
    nodes_temp = coherence(vertex, visited, temp_visited, graph)
    fully_con.append(temp_visited)

fully_connected_groups = [i for i in fully_con if len(i) > 0]

## С набора данных отбираем людей, которые входят в многочисленные группы

In [None]:
full_list = []
for l in fully_connected_groups:
    full_list = full_list + l
    
new_df = data[(data['Payer'].isin(full_list)) |\
              (data['Recipient'].isin(full_list))]

## Формируем столбец для отрисовки данных и подсчитываем сумму переводов

In [None]:
new_df['payer-buyer'] = new_df['Payer']+'-->'+new_df['Recipient']
df_t_1 = pd.DataFrame(new_df['payer-buyer'].value_counts()).reset_index()
df_t_1.columns = ['transact', 'frequency']

df_t_1['sum_tr'] = df_t_1['transact'].map(new_df.groupby('payer-buyer')['Sum'].agg('sum'))
transact = df_t_1['transact'].values.tolist()
counts = df_t_1['frequency'].values.tolist()
summ_tr = df_t_1['sum_tr'].values.tolist()

### Engines

dot - "hierarchical" or layered drawings of directed graphs. This is the default tool to use if edges have directionality.

neato - "spring model'' layouts.  This is the default tool to use if the graph is not too large (about 100 nodes) and you don't know anything else about it. Neato attempts to minimize a global energy function, which is equivalent to statistical multi-dimensional scaling.

fdp - "spring model'' layouts similar to those of neato, but does this by reducing forces rather than working with energy.

sfdp - multiscale version of fdp for the layout of large graphs.

twopi - radial layouts, after Graham Wills 97. Nodes are placed on concentric circles depending their distance from a given root node.

circo - circular layout, after Six and Tollis 99, Kauffman and Wiese 02. This is suitable for certain diagrams of multiple cyclic structures, such as certain telecommunications networks.

## Отрисовываем полновязные группы

In [None]:
f = Digraph('finite_state_machine', 
            filename='Linked_groups', 
            engine='sfdp')
f.attr(rank='same', size='8,5')
f.attr('node', shape='box', color='lightblue')

for i in range(len(transact)):
    tr = transact[i]
    sum_i = str(summ_tr[i])
    count = str(counts[i])
    start = tr.split('-->')[0]
    end = tr.split('-->')[1]
    
    f.edge('{0}'.format(start), 
           '{0}'.format(end), 
           label='{0}'.format('f-'+count+' sum-'+sum_i), 
           arrowhead='vee')
    
f.view()

## Рисуем граф для самой многочисленной группы

In [None]:
large_gr = max(fully_connected_groups, key=len) ## самая многочисленная группа
new_df = data[(data['Payer'].isin(large_gr)) |\
              (data['Recipient'].isin(large_gr))]

In [None]:
new_df['payer-buyer'] = new_df['Payer']+'-->'+new_df['Recipient']
df_t_1 = pd.DataFrame(new_df['payer-buyer'].value_counts()).reset_index()
df_t_1.columns = ['transact', 'frequency']

df_t_1['sum_tr'] = df_t_1['transact'].map(new_df.groupby('payer-buyer')['Sum'].agg('sum'))
transact = df_t_1['transact'].values.tolist()
counts = df_t_1['frequency'].values.tolist()
summ_tr = df_t_1['sum_tr'].values.tolist()

In [None]:
f = Digraph('finite_state_machine', 
            filename='single_group', 
            engine='sfdp')
f.attr(rank='same', size='8,5')
f.attr('node', shape='box', color='lightblue')

for i in range(len(transact)):
    tr = transact[i]
    sum_i = str(summ_tr[i])
    count = str(counts[i])
    start = tr.split('-->')[0]
    end = tr.split('-->')[1]
    
    f.edge('{0}'.format(start), 
           '{0}'.format(end), 
           #label='{0}'.format('f-'+count+' sum-'+sum_i), # информация на ребрах
           arrowhead='vee')
    
f.view()