Este notebook es para testear las metricas a calcular en las redes. Las mismas luego se pasarán a una clase *analizer*. 
Cosas a hacer:
* Comunidades
* Agregar attributos
* Modularidad: por partido y comunidad
* NMI
* Diversity: communities, parties
* Distancia
* Shortest path mean (de toda red)
* Shortest path entre partidos

In [1]:
year = 2019
alpha = 0.17

In [2]:
import matplotlib.pyplot as plt
import networkx as nx

In [3]:
from procesador_votaciones import ProcesadorDeVotaciones
from heavy_graph_maker import HeavyGraphMaker
from new_disparity_filter import NewDisparityFilter

In [4]:
procesador = ProcesadorDeVotaciones(year, year, 'data')
congreso = procesador.procesar()
congreso.validate()

Loading from data/2019/1
Loading from data/2019/2
Loading from data/2019/3
Loading from data/2019/4
Loading from data/2019/5
Loading from data/2019/6
Loading from data/2019/7
Loading from data/2019/8
Loading from data/2019/9
Loading from data/2019/10
Loading from data/2019/11
Loading from data/2019/12
Changing vote 3952 in 2019 to 2020
Changing vote 3958 in 2019 to 2020
Changing vote 3942 in 2019 to 2020
Changing vote 3947 in 2019 to 2020
Changing vote 3953 in 2019 to 2020
Changing vote 3954 in 2019 to 2020
Changing vote 3936 in 2019 to 2020
Changing vote 3935 in 2019 to 2020
Changing vote 3950 in 2019 to 2020
Changing vote 3945 in 2019 to 2020
Changing vote 3948 in 2019 to 2020
Changing vote 3949 in 2019 to 2020
Changing vote 3946 in 2019 to 2020
Changing vote 3943 in 2019 to 2020
Changing vote 3941 in 2019 to 2020
Changing vote 3944 in 2019 to 2020
Changing vote 3956 in 2019 to 2020
Changing vote 3940 in 2019 to 2020
Changing vote 3957 in 2019 to 2020
Changing vote 3933 in 2019 to 20

In [5]:
maker = HeavyGraphMaker(congreso)
grafo = maker.create_year_network(year, positive = True)

print(f' El grafo tiene {len(grafo.get_edges())} enlaces')

plt.hist(grafo.get_sorted_edges_weights().values(), bins = 100)
plt.show()

print(f'Peso max {max(grafo.get_sorted_edges_weights().values())}')
print(f'Peso min {min(grafo.get_sorted_edges_weights().values())}')

In [6]:
grafo_cortado = NewDisparityFilter(grafo).alpha_cut(alpha_t = alpha)

The graph has 27883 edges
Max alpha is 0.9599667723469449, min alpha is 0.007888357372729058
Enlaces sacados: 1000
Enlaces sacados: 2000
Enlaces sacados: 3000
Enlaces sacados: 4000
Enlaces sacados: 5000
Enlaces sacados: 6000
Enlaces sacados: 7000
Enlaces sacados: 8000
Enlaces sacados: 9000
Enlaces sacados: 10000
Enlaces sacados: 11000
Enlaces sacados: 12000
Enlaces sacados: 13000
Enlaces sacados: 14000
Enlaces sacados: 15000
Enlaces sacados: 16000
Enlaces sacados: 17000
Enlaces sacados: 18000
Enlaces sacados: 19000
Enlaces sacados: 20000
Enlaces sacados: 21000
Enlaces sacados: 22000
Enlaces sacados: 23000
23337 edges deleted, 4546 left. 16.304% left.
Gigant component is 0.9227799227799228 of the total


In [7]:
len(grafo_cortado.get_edges())

4546

In [8]:
GC_cut, _ = grafo_cortado.gigant_component()
len(GC_cut.get_nodes())

239

# Comunidades
* Queremos calcular comunidades en la componente gigante del grafo cortado
* Luego a cada comunidad guardarla como un atributo del nodo

* Analizar composición de la comunidad.


### Pesos a tener en cuenta

In [9]:
new_weights = list(GC_cut.get_edge_attr('weight').values()) 

In [10]:
import funciones as f
funciones_dict = {'Fast greedy': f.fast_greedy,'Leiden': f.leiden1}

particiones = {}

for name, metodo in funciones_dict.items():
    particiones[name] = {}
    particion = metodo(GC_cut.graph,new_weights)
    particiones[name] = particion  

# Agregamos atributos al grafo
Queremos que cada nodo tenga tres cosas: comunidad, partido e ideologia

### Parties

In [11]:
if len(grafo.years) == 1:
    graph_year = grafo.years[0]
if len(grafo.years) == 2:
    graph_year = grafo.years

parties_dict = {}
for repre in grafo_cortado.get_nodes():
    parties_dict[repre] = [party.text for party in repre.parties.get(year)][0]

grafo_cortado.set_node_attributes(parties_dict, 'party')

### Comunidades

In [12]:
#Leiden
communities_dict = particiones['Leiden']
GC_cut.set_node_attributes(communities_dict, 'community')

### Ideologies

In [13]:
import json

with open('configs/ideologies.json') as json_file:
    ideologies_dict = json.load(json_file)

ideology_dict_to_add = {}
for repre, party in parties_dict.items():
    for ideology, list_of_parties in ideologies_dict[f'{graph_year}'].items():
        if party in list_of_parties:
            ideology_dict_to_add[repre] = ideology
    if repre not in ideology_dict_to_add:
        ideology_dict_to_add[repre] = 'Sin ideologia'

In [14]:
grafo_cortado.set_node_attributes(ideology_dict_to_add, 'ideology')

# Creamos un dict para guardar las métricas

In [15]:
data = {}
data['alpha'] = alpha

# Diversidad por partidos, comunidad

In [16]:
import numpy as np
def diversity_by_party(year):
    """Este es la unica metrica que no necesita del grafo, pues la saca del congreso.
    Se le pasa anios y devuelve dos listas: con la diversidad de partidos para esos anios y 
    con el total de partidos
    
        Parameters
    ----------
    years : lists of years to calculate

    Returns
    -------
    diversities : lists of diversities for each year
    total: total number of parties per year
    """
    parties = [party for party in congreso.parties if party.representatives.get(year)]
    cant_people = [len(party.representatives[year]) for party in parties]
    total = sum(cant_people)
    probs = [people / total for people in cant_people]
    entropy = -sum(np.log(probs) * probs)
    diversity = np.exp(entropy)
        
        
    return diversity, len(parties)

def diversity_by_param(grafo, param):
    """
    grafo: RepresentativesGraph
    Param: 'community' or 'ideology'. Anything that its a attr of the nodes"""
    params = list(grafo.get_node_attr(param).values())
    unique,count = np.unique(params,return_counts=True)
    total = sum(count)
    probs = count/total
    entropy = -sum(np.log(probs)*probs)
    div = np.exp(entropy)
    total_unique = len(unique)

    return div,total_unique

In [17]:
div_comm = diversity_by_param(GC_cut, 'community')

In [18]:
div_party = diversity_by_party(year)

In [19]:
data['div_comm'] = div_comm
data['div_party'] = div_party

# Info mutua

In [20]:
from sklearn.metrics import normalized_mutual_info_score

def info_mutua(grafo, ideology = False):
    mutual_info = []
    communities = list(grafo.get_node_attr('community').values())
    if not ideology:
        parties = [party for party in grafo.get_node_attr('party').values()]
        mutual_info = normalized_mutual_info_score(communities,parties,'geometric')
        return mutual_info
    else:
        ideologies = [ideo for ideo in grafo.get_node_attr('ideology').values()]
        mutual_info = normalized_mutual_info_score(communities,ideologies,'geometric')
        return mutual_info

In [21]:
informacion_mutua = info_mutua(GC_cut)

In [22]:
data['info_mutua'] = informacion_mutua

# Modularidad

In [23]:
import igraph as ig

def modularity(grafo, parameter):
    param_dict = grafo.get_node_attr(parameter)    
    ig_graph = ig.Graph.TupleList(grafo.get_edges(data = True), directed=False, weights = True)
    for node in ig_graph.vs:
        node[parameter] = param_dict[node['name']]

    param = list(set(ig_graph.vs[parameter]))
    param_int = [param.index(p) for p in ig_graph.vs[parameter]]
    vc = ig.VertexClustering(ig_graph, param_int, params={'weight':ig_graph.es['weight']})
    return vc.modularity

In [24]:
mod_ideo = modularity(GC_cut, 'ideology')

In [25]:
mod_comm = modularity(GC_cut, 'community')

In [26]:
mod_party = modularity(GC_cut, 'party')

In [27]:
data['modularity_ideology'] = mod_ideo
data['modularity_community'] = mod_comm
data['modularity_party'] = mod_party

# Distance

In [28]:
import numpy as np

def calculate_dist(grafo, year):
    weights = grafo.get_edge_attr('weight')
    total_votes = len(congreso.votings[year])
    distance = {k: np.sqrt(2 * (1 - w / total_votes)) for k,w in weights.items()}
    grafo.set_edge_attributes(distance, 'distance')

In [29]:
calculate_dist(GC_cut, year)

# Shortest path mean 
De toda la red

In [30]:
from statistics import mean

def shortest_path_mean(grafo):
        GC, _ = grafo.gigant_component()
        len_nodes = len(GC.get_nodes())
        dists = []
        for node in GC.get_nodes():
            path_lens = GC.shortest_path(node).values()
            for p in path_lens:
                if len(p) >= 2:
                    x = sum([distance_edge(GC, p[i], p[i + 1]) for i in range(0, len(p) - 1 )])
                    dists.append(x)
        m = mean(dists)
        return m
                    
def distance_edge(grafo, node1, node2):
    return grafo.graph.edges()[node1, node2]['distance']                    

In [31]:
data['shortest_path_mean'] = shortest_path_mean(GC_cut)

# Distancia entre partidos
La idea es ver la distancia entre el Justicialismo y Radicalismo (por ejemplo) a lo largo de los años y justicialismo entre sí.

In [32]:
def calculate_shortest_paths(grafo, groups, parties = True):
    if parties == True:
        param = 'party'
    else: 
        param = 'ideology'
        
    summary = {k: {} for k in groups}
    all_paths = []
    for g1 in groups:
        summary[g1] = {}
        sources = []
        for node, ideology in grafo.get_node_attr(param).items():
            if ideology == g1:
                sources.append(node)
        for g2 in groups:
            targets = []
            paths = []
            for node, ideology in grafo.get_node_attr(param).items():
                if ideology == g2:
                    targets.append(node)
            for s in sources:
                for t in targets:
                    path = grafo.shortest_path(initial_node = s, target_node = t, weight = 'distance')
                    if len(path) > 1:
                        x =  sum([distance_edge(grafo, path[i], path[i + 1]) for i in range(0, len(path) - 1 )])
                        if x > 0:
                            paths.append(x)
                            all_paths.append(x)
            
            if len(paths) == 0:
                summary[g1][g2] = (np.nan,np.nan,np.nan)
                summary[g2][g1] = (np.nan,np.nan,np.nan)
            else:
                mean = np.mean(paths)
                std_dev = np.std(paths)
                summary[g1][g2] = (mean,std_dev,len(targets)) 
                summary[g2][g1] = (mean,std_dev,len(sources))

            if g1 == g2:
                break
                
    all_paths_mean = np.mean(all_paths)
    all_paths_std = np.std(all_paths)
    
    return summary,(all_paths_mean,all_paths_std)  

In [33]:
pps = [item for items in ideologies_dict[f'{year}'].values() for item in items]
pps

['Frente para la Victoria - PJ',
 'PRO',
 'Unión Cívica Radical',
 'Coalición Cívica',
 'Justicialista',
 'Federal Unidos por una Nueva Argentina']

In [34]:
data['shortest_paths_pps'] = calculate_shortest_paths(GC_cut, pps, parties = True)

In [35]:
data['shortest_paths_ideology'] = calculate_shortest_paths(GC_cut, ideologies_dict[f'{year}'], parties = False)

In [36]:
data

{'alpha': 0.17,
 'div_comm': (2.8953421410446976, 3),
 'div_party': (13.112993554692878, 42),
 'info_mutua': 0.60257445403763,
 'modularity_ideology': 0.4207203714524781,
 'modularity_community': 0.5560341140057474,
 'modularity_party': 0.30751187112275963,
 'shortest_path_mean': 4.341003483009598,
 'shortest_paths_pps': ({'Frente para la Victoria - PJ': {'Frente para la Victoria - PJ': (1.2438824284773182,
     0.5030165176334321,
     63),
    'PRO': (3.436366551935795, 0.5982972467875696, 52),
    'Unión Cívica Radical': (3.0789807359951373, 0.5771716065992722, 37),
    'Coalición Cívica': (3.5148866185606025, 0.5752518098577609, 9),
    'Justicialista': (1.999025256860849, 0.657199337300349, 13),
    'Federal Unidos por una Nueva Argentina': (2.455001617705034,
     0.49815860283860136,
     11)},
   'PRO': {'Frente para la Victoria - PJ': (3.436366551935795,
     0.5982972467875696,
     63),
    'PRO': (1.279916063658656, 0.5221874497944441, 52),
    'Unión Cívica Radical': (1.28

In [37]:
with open(f'metrics/{year}_metrics.json', 'w') as outfile:
    json.dump(data, outfile)