In [35]:
import snap
import numpy as np
import pandas as pd
import os
from graphviz import Graph
from graphviz import Source
import matplotlib.pyplot as plt

In [40]:
with open(os.path.realpath('input/dep_and_sen_all_extracted_summary.csv'), 'rb') as f:
    summary = pd.read_csv(f)

In [41]:
with open(os.path.realpath('input/members_mapping.csv'), 'rb') as f:
    deputats_mapping = pd.read_csv(f)

In [42]:
with open(os.path.realpath('input/fractions_mapping.csv'), 'rb') as f:
    fractions_mapping = pd.read_csv(f)

In [45]:
summary.drop(['name', 'factions.name', 
              'factions.startDate', 'factions.endDate'], axis='columns', inplace=True)

In [46]:
summary.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5703 entries, 0 to 5702
Data columns (total 4 columns):
id             5703 non-null int64
position       5703 non-null object
isCurrent      5703 non-null bool
factions.id    5031 non-null float64
dtypes: bool(1), float64(1), int64(1), object(1)
memory usage: 139.4+ KB


### Current

In [7]:
summary_id = summary.drop(['factions.id'], axis='columns')

In [8]:
summary_id.drop_duplicates(inplace=True)

In [9]:
summary_id_current = summary_id.mask(summary_id['isCurrent'] == False)

In [10]:
summary_id_current.dropna(how='all', inplace=True)

In [11]:
summary_id_current.drop(['isCurrent'], axis='columns', inplace=True)

In [12]:
summary_id_current['id'] = summary_id_current['id'].astype(np.int64)

In [13]:
summary_id_current.drop_duplicates(inplace=True)

In [14]:
summary_id_current.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 565 entries, 12 to 5699
Data columns (total 2 columns):
id          565 non-null int64
position    565 non-null object
dtypes: int64(1), object(1)
memory usage: 13.2+ KB


In [25]:
# new graph
members_graph = snap.TNGraph.New()

In [26]:
for i in summary_id_current['id']:
    members_graph.AddNode(i)

In [27]:
len(list(members_graph.Nodes()))

565

In [48]:
summary_id_current.to_csv(os.path.realpath('input/summary_id_current.csv'), index=False)

### Position

In [18]:
summary_id_dep = summary_id_current.mask(summary_id['position'] == 'Член СФ')
summary_id_sen = summary_id_current.mask(summary_id['position'] == 'Депутат ГД')

In [19]:
summary_id_dep.dropna(how='all', inplace=True)
summary_id_sen.dropna(how='all', inplace=True)

In [20]:
summary_id_dep['id'] = summary_id_dep['id'].astype(np.int64)
summary_id_sen['id'] = summary_id_sen['id'].astype(np.int64)

In [21]:
summary_id_dep.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 445 entries, 20 to 5699
Data columns (total 2 columns):
id          445 non-null int64
position    445 non-null object
dtypes: int64(1), object(1)
memory usage: 10.4+ KB


In [22]:
summary_id_sen.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 120 entries, 12 to 5676
Data columns (total 2 columns):
id          120 non-null int64
position    120 non-null object
dtypes: int64(1), object(1)
memory usage: 2.8+ KB


In [28]:
import itertools as it

def complete_graph(data):
    all_pairs = it.permutations(data, 2)
    for i in all_pairs:
        members_graph.AddEdge(i[0], i[1])
    print('Edges added')

In [29]:
complete_graph(summary_id_dep['id'])

Edges added


In [30]:
complete_graph(summary_id_sen['id'])

Edges added


In [31]:
snap.SaveEdgeList(members_graph, "output/members_graph.txt", "Members_graph")

In [33]:
snap.DrawGViz(members_graph, snap.gvlNeato, "images/members_graph.png", "members_graph", True)