In [1]:
import pandas as pd
import numpy as np
from scipy.stats import mode
import seaborn as sns

In [4]:
df = pd.read_csv("igo_year_formatv3.csv", delimiter="	")

In [10]:
wanted_years = df.loc[df.year == 2012]

AttributeError: 'DataFrame' object has no attribute 'year'

In [11]:
wanted_years.shape

NameError: name 'wanted_years' is not defined

In [None]:
wanted_years.isnull().sum()

In [None]:
country_columns = wanted_years.columns[3:220]
country_columns

In [None]:
for country in country_columns:
    n_nulls = wanted_years[country].isnull().sum()
    if n_nulls > 0:
        print(f"{country} has {n_nulls} nulls")

In [None]:
with open("country_columns.txt", 'w') as f:
    for country in country_columns:
        f.write(f"{country}\n")
    

In [None]:
# From happines report
with open("happiness_countrys.txt", 'r') as f:
    countrys = f.readlines()

notacao_diff = {
    "unitedstates": "usa",
    "unitedkingdom": "uk",
    "centralafricanrepublic": "car",
    "southkorea": "sokorea",
    "czechrepublic": "czechrep",
    "southafrica": "soafrica",
    "dominicanrepublic": "domrepublic",
    "macedonia": "fyromacedonia",
    "unitedarabemirates": "uae",
}

countrys = set(map(lambda x: x.lower().replace("\n", "").replace(" ", ""), countrys))


set_not_in_hr = set()
for country in country_columns:
    country = country.lower().replace("\n", "").replace(" ", "")
    if not country in countrys and not country in notacao_diff.values():
        print(f"**{country}** nao esta no happiness")
        set_not_in_hr.add(country)
        
set_country_columns = set(country_columns)
set_country_columns = set_country_columns - set_not_in_hr

In [None]:
from tqdm import tqdm

In [None]:
intersecs = {}

for country_1 in tqdm(set_country_columns):
    for country_2 in set_country_columns:
        if country_1 != country_2:
            country_1_present = wanted_years[country_1]
            country_2_present = wanted_years[country_2]
            n_intersecs = len(wanted_years.loc[(country_1_present == 1.0) & (country_2_present == 1.0)])
            intersecs[(country_1, country_2)] = n_intersecs
                

In [None]:
import pickle

with open("intersecs.pickle", 'wb') as f:
    pickle.dump(intersecs, f)
# with open("intersecs.pickle", 'rb') as f:
#     intersecs = pickle.load(f)

In [None]:
import matplotlib.pyplot as plt

intersecs_values = list(intersecs.values())
plt.hist(intersecs_values, color='red', alpha=0.7, edgecolor='black')
plt.xlabel('Frequencias dos numeros IGOS em comum entre países')
plt.show()

In [None]:
threshold = mode(intersecs_values)[0][0]
# threshold = sum(intersecs_values) / len(intersecs_values)
threshold

In [None]:
import netpixi
from netpixi.integration.gt import *
from regression.integration.gt import *

In [None]:
g = Graph(directed=False)

for countrys, n_intersecs in intersecs.items():
    if n_intersecs > threshold:
        if not g.has_vertex(countrys[0]):
            v1 = g.add_vertex(countrys[0])
        if not g.has_vertex(countrys[1]):
            v2 = g.add_vertex(countrys[1])
            
        if not g.has_edge(countrys[0], countrys[1]):
            g.add_edge(countrys[0], countrys[1])

PATH_REDE = 'IGOS.net.gz'
gt_save(g, PATH_REDE)

In [None]:
n = g.num_vertices()
m = g.num_edges()

print(f"Número de arestas: {m}")
print(f"Número de vértices: {n}")

In [None]:
max_edges = n * (n - 1) // 2
print(f"Número máximo de arestas: {max_edges}")
d = m / max_edges
print(f"Densidade: {d}")

In [None]:
data = gt_data(g)

degrees = []
for v in g.all_vertices():
    degrees.append(v.total_degree())
    
data['degree'] = degrees

In [None]:
data['degree'].describe()

In [None]:
sns.histplot(data['degree'])

In [None]:
m = gt_draw.sfdp_layout(g)
gt_move(g, m)
gt_save(g, 'rede_sfdp.net.gz')
r = netpixi.render('rede_sfdp.net.gz', infinite=True);

In [None]:
r.vertex_set_key('id')

In [None]:
import cpnet

from graph_tool import spectral

In [None]:
matrix = spectral.adjacency(g)

# Além de cpnet.Rombach, há outras
# opções de algoritmos contínuos:
# cpnet.MINRES
# cpnet.Rossa

algorithm = cpnet.Rombach()
algorithm.detect(matrix)
c = algorithm.get_coreness()

# Depois do código acima, c é um dicionário
# em que as chaves são índices de vértices e
# valores são os atribuídos pelo algoritmo.

g.add_vp('coreness')
for i, coreness in c.items():
    v = g.get_vertex_by_index(i)
    v['coreness'] = float(coreness)

# Vamos usar coreness como referência para
# o tamanho dos vértices. Estabelecemos 10
# como o menor tamanho possível e 40 como
# o maior tamanho possível. Como o coreness
# está entre 0 e 1, a conta é bem simples.

for v in g.all_vertices():
    r.vertex(v['id'], size=(10 + 40 * v['coreness']))

# Quando a execução desta célula terminar,
# veja como ficou a visualização acima.

In [None]:
data = gt_data(g)

coreness = []
for v in g.all_vertices():
    coreness.append(v['coreness'])
    
data['coreness'] = coreness
data

In [None]:
data.to_csv("coreness_igos.csv")