In [1]:
import pathlib
import os
import pandas as pd
import numpy as np
import networkx as nx
import statistics

In [2]:
def similarita(voti_dep1, voti_dep2):
    uguali = np.sum((voti_dep1 == voti_dep2) & (voti_dep1 != 0) ) # volte in cui hanno votato la stessa  ed erano entrambi presenti
    diversi = np.sum((voti_dep1 != voti_dep2) & (voti_dep1 != 0) & (voti_dep2 != 0) ) # volte in cui hanno votato diversamente ed erano entrambi presenti
    
    if uguali == 0:
        sim = 0
    else:
        sim = uguali/(uguali+diversi)
    return sim

In [3]:
def alignment_parties(df):
    col = df.columns[4:].to_list()
    col.append('Partito')
    parties_alignment = pd.DataFrame()
    party = df.groupby('Partito')
    for i in party: # party[0] nome partito, party[1] dataframe
        a = np.apply_along_axis(statistics.mode, axis = 0, arr= i[1])[4:].tolist()
        a.append(i[0])
        parties_alignment = pd.concat([parties_alignment, pd.DataFrame(a).T], axis = 0)

    parties_alignment.columns = col
    return parties_alignment

In [4]:
def drop_weights(G):
    '''Drop the weights from a networkx weighted graph.'''
    for node, edges in nx.to_dict_of_dicts(G).items():
        for edge, attrs in edges.items():
            attrs.pop('weight', None)

In [5]:
dataset_folder = pathlib.Path(os.path.dirname(os.getcwd())+'\\data_collection\\data\\Data_monthly\\xviii-2022')

In [6]:
file_list = list(dataset_folder.iterdir())
for i, path in enumerate(file_list):
    if path.suffix != ".csv":
        file_list.pop(i)

In [24]:
df = pd.read_csv(file_list[1])

In [25]:
dizionario_espressioni = {'Assente': np.float64(0), 'Favorevole': np.float64(1), 'Non ha votato': np.float64(0), 'Contrario': np.float64(3), 'Astensione': np.float64(4), 'In missione': np.float64(0)}
for i in df.columns[4:]:
    df[i] = df[i].map(dizionario_espressioni)

df = df.replace(np.nan, 0)

df.reset_index(inplace= True, drop = True)

indici = []
for i in range(df.shape[0]):
    if np.count_nonzero(df.iloc[i,4:]) < 0.7*(df.shape[1]-4):
        indici.append(i)

df.drop(indici, axis = 0, inplace = True)

In [27]:
def fromCsvToGraph(df):
    #mapping to categorical values
    dizionario_espressioni = {'Assente': np.float64(0), 'Favorevole': np.float64(1), 'Non ha votato': np.float64(0), 'Contrario': np.float64(3), 'Astensione': np.float64(4), 'In missione': np.float64(0)}
    for i in df.columns[4:]:
        df[i] = df[i].map(dizionario_espressioni)

    df = df.replace(np.nan, 0)


    indici = []
    for i in range(df.shape[0]):
        if np.count_nonzero(df.iloc[i,4:]) < 0.7*(df.shape[1]-4):
            indici.append(i)

    df.drop(indici, axis = 0, inplace = True)
    df.reset_index(inplace= True, drop = True)

    alignment = alignment_parties(df)

    discipline = []
    for index, row in df.iterrows():
        p = alignment[alignment['Partito'] == row['Partito']]
        c = 0
        for i in p.drop('Partito', axis = 1).columns:
            if row[i] == np.float64(p[i][0]):
                c+=1
        
        discipline.append(c/(len(p.columns) - 1))

    df['discipline'] = discipline

    m = np.array(df[df.columns[4:df.shape[1]-1]])

    A = np.zeros((len(m),len(m))) #inizializzo matrice a 0
    i=0
    j=0
    for i in range(len(m)):
        for j in range(len(m)):
            A[i,j] = similarita(m[i],m[j])
    
    G = nx.from_numpy_matrix(A)
    G.remove_edges_from(nx.selfloop_edges(G))
    threshold = np.percentile(A, q = 50)
    G.remove_edges_from([(n1, n2) for n1, n2, w in G.edges(data="weight") if w < threshold])

    drop_weights(G)

    val = {}
    for i in G.nodes:
        val[i] = df['id'][i]
    
    return G

In [28]:
df = pd.read_csv(file_list[1])

In [29]:
G = fromCsvToGraph(df)

Misure

In [39]:
np.mean(list(nx.degree_centrality(G).values()))

0.4957464553794829

In [42]:
np.mean(list(nx.eigenvector_centrality_numpy(G).values()))

0.0838349293998304

In [43]:
np.mean(list(nx.pagerank(G).values()))

0.00909090909090909

In [45]:
np.mean(list(nx.closeness_centrality(G).values()))

0.5736451257348052

In [46]:
np.mean(list(nx.harmonic_centrality(G).values()))

68.30909090909091

In [47]:
np.mean(list(nx.betweenness_centrality(G).values()))

0.002670435239242579

In [50]:
nx.degree_assortativity_coefficient(G)

0.023402758177638518