In [1]:
import pandas as pd
import numpy as np
from sklearn.cluster import MeanShift
import json

In [5]:
def nclusters_meanshift(data, bw):
    ops = {i: data[i] for i in range(len(data))}
    sorted_ops = sorted(ops.items(), key = lambda kv:(kv[1], kv[0]))
    A=np.array([el[1] for el in sorted_ops]).reshape(-1,1)
    clustering = MeanShift(bandwidth=bw).fit(A)
    lbls = clustering.labels_
    labels = np.arange(len(sorted_ops))
    for i in range(len(labels)):
        cl = lbls[i]
        labels[sorted_ops[i][0]]=cl
    cluster_participation_dict = {}
    for l in labels:
        if l not in cluster_participation_dict:
            cluster_participation_dict[l] = 1
        else:
            cluster_participation_dict[l] += 1
    #computing effective number of clusters using function explained in the paper
    C_num = 0
    C_den = 0
    for k in cluster_participation_dict:
        C_num += cluster_participation_dict[k]
        C_den += ((cluster_participation_dict[k])**2)
    C_num = (C_num**2)
    C = C_num/C_den
    return C

def nclusters(data, threshold):
    data = [float(el) for el in data]
    data = sorted(data)
    start = data[0]
    max_val = start + threshold
    c = (start, max_val)
    cluster = dict()
    for i in data:
        if i <= max_val:
            if c in cluster.keys():
                cluster[c] += 1
            else:
                cluster[c] = 1
        else:
            max_val = i + threshold
            c = (i, max_val)
            cluster[c] = 1
    #ora ho il dizionario con i cluster di una run
    C_num = len(data)**2
    C_den = 0
    for k in cluster.keys():
        C_den += cluster[k]*cluster[k]
    C = C_num / C_den
    return C

In [3]:
results = pd.read_csv("aggregate/aggregate_results_final_final.csv", index_col=[0])
results.head()

Unnamed: 0,n,density,eps,gam,gam_media,p_media,max_it,media_op,avg_ncluster,std_ncluster,avg_pwdist,std_pwdist,avg_niter,std_niter,new_avg_ncluster,new_std_ncluster
0,100,1.0,0.1,0.0,0.0,0.1,1000000,0.05;0.5;0.95,4.335848,0.550327,0.168295,0.252318,282.56,361.727123,4.335848,0.547568
1,100,1.0,0.1,0.5,0.5,0.1,1000000,0.05;0.5;0.95,5.111224,0.609606,0.170443,0.251217,113.63,20.596486,5.980435,1.116844
2,100,1.0,0.1,0.75,0.75,0.1,1000000,0.05;0.5;0.95,5.284581,0.596961,0.170446,0.250276,99.1,20.3045,6.246396,1.241999
3,100,1.0,0.1,1.0,1.0,0.1,1000000,0.05;0.5;0.95,5.532513,0.601132,0.169319,0.247976,89.49,15.683418,6.591072,1.225625
4,100,1.0,0.1,1.25,1.25,0.1,1000000,0.05;0.5;0.95,5.636583,0.718927,0.169129,0.247322,84.12,14.55972,6.802312,1.388331


In [7]:
new_values_avg = []
new_values_std = []

In [8]:
for index, row in results.iterrows():
    media_op = ', '.join(str(el) for el in row['media_op'].split(';'))
    mo = f'[{media_op}]'
    eps = row['eps']
    gam = row['gam']
    p_media = row['p_media']
    name = f'media mo{mo} p{p_media} e{eps} g{gam} gm{gam} mi1000000'
    print(f'doing {name}')
    jsonfile = open(f'aggregate/final_opinions {name}.json')
    data = json.load(jsonfile)
    ncarray = []
    for nr in data.keys():
        finalops = list(data[nr])
        nc = nclusters_meanshift(finalops, bw=0.01)
        ncarray.append(nc)
    ncarray = np.array(ncarray)
    new_values_avg.append(np.average(ncarray))
    new_values_std.append(np.std(ncarray))

doing media mo[0.05, 0.5, 0.95] p0.1 e0.1 g0.0 gm0.0 mi1000000
doing media mo[0.05, 0.5, 0.95] p0.1 e0.1 g0.5 gm0.5 mi1000000
doing media mo[0.05, 0.5, 0.95] p0.1 e0.1 g0.75 gm0.75 mi1000000
doing media mo[0.05, 0.5, 0.95] p0.1 e0.1 g1.0 gm1.0 mi1000000
doing media mo[0.05, 0.5, 0.95] p0.1 e0.1 g1.25 gm1.25 mi1000000
doing media mo[0.05, 0.5, 0.95] p0.1 e0.1 g1.5 gm1.5 mi1000000
doing media mo[0.05, 0.5, 0.95] p0.1 e0.2 g0.0 gm0.0 mi1000000
doing media mo[0.05, 0.5, 0.95] p0.1 e0.2 g0.5 gm0.5 mi1000000
doing media mo[0.05, 0.5, 0.95] p0.1 e0.2 g0.75 gm0.75 mi1000000
doing media mo[0.05, 0.5, 0.95] p0.1 e0.2 g1.0 gm1.0 mi1000000
doing media mo[0.05, 0.5, 0.95] p0.1 e0.2 g1.25 gm1.25 mi1000000
doing media mo[0.05, 0.5, 0.95] p0.1 e0.2 g1.5 gm1.5 mi1000000
doing media mo[0.05, 0.5, 0.95] p0.1 e0.3 g0.0 gm0.0 mi1000000
doing media mo[0.05, 0.5, 0.95] p0.1 e0.3 g0.5 gm0.5 mi1000000
doing media mo[0.05, 0.5, 0.95] p0.1 e0.3 g0.75 gm0.75 mi1000000
doing media mo[0.05, 0.5, 0.95] p0.1 e0.3 g1.

In [9]:
results["0.01MS_avg_ncluster"] = new_values_avg
results["0.01MS_std_ncluster"] = new_values_std

In [12]:
new_values_avg = []
new_values_std = []

for index, row in results.iterrows():
    media_op = ', '.join(str(el) for el in row['media_op'].split(';'))
    mo = f'[{media_op}]'
    eps = row['eps']
    gam = row['gam']
    p_media = row['p_media']
    name = f'media mo{mo} p{p_media} e{eps} g{gam} gm{gam} mi1000000'
    print(f'doing {name}')
    jsonfile = open(f'aggregate/final_opinions {name}.json')
    data = json.load(jsonfile)
    ncarray = []
    for nr in data.keys():
        finalops = list(data[nr])
        nc = nclusters_meanshift(finalops, bw=0.00001)
        ncarray.append(nc)
    ncarray = np.array(ncarray)
    new_values_avg.append(np.average(ncarray))
    new_values_std.append(np.std(ncarray))

results["0.00001MS_avg_ncluster"] = new_values_avg
results["0.00001MS_std_ncluster"] = new_values_std

doing media mo[0.05, 0.5, 0.95] p0.1 e0.1 g0.0 gm0.0 mi1000000
doing media mo[0.05, 0.5, 0.95] p0.1 e0.1 g0.5 gm0.5 mi1000000
doing media mo[0.05, 0.5, 0.95] p0.1 e0.1 g0.75 gm0.75 mi1000000
doing media mo[0.05, 0.5, 0.95] p0.1 e0.1 g1.0 gm1.0 mi1000000
doing media mo[0.05, 0.5, 0.95] p0.1 e0.1 g1.25 gm1.25 mi1000000
doing media mo[0.05, 0.5, 0.95] p0.1 e0.1 g1.5 gm1.5 mi1000000
doing media mo[0.05, 0.5, 0.95] p0.1 e0.2 g0.0 gm0.0 mi1000000
doing media mo[0.05, 0.5, 0.95] p0.1 e0.2 g0.5 gm0.5 mi1000000
doing media mo[0.05, 0.5, 0.95] p0.1 e0.2 g0.75 gm0.75 mi1000000
doing media mo[0.05, 0.5, 0.95] p0.1 e0.2 g1.0 gm1.0 mi1000000
doing media mo[0.05, 0.5, 0.95] p0.1 e0.2 g1.25 gm1.25 mi1000000
doing media mo[0.05, 0.5, 0.95] p0.1 e0.2 g1.5 gm1.5 mi1000000
doing media mo[0.05, 0.5, 0.95] p0.1 e0.3 g0.0 gm0.0 mi1000000
doing media mo[0.05, 0.5, 0.95] p0.1 e0.3 g0.5 gm0.5 mi1000000
doing media mo[0.05, 0.5, 0.95] p0.1 e0.3 g0.75 gm0.75 mi1000000
doing media mo[0.05, 0.5, 0.95] p0.1 e0.3 g1.

In [14]:
results

Unnamed: 0,n,density,eps,gam,gam_media,p_media,max_it,media_op,avg_ncluster,std_ncluster,...,avg_niter,std_niter,new_avg_ncluster,new_std_ncluster,0.01MS_avg_ncluster,0.01MS_std_ncluster,0.0001MS_avg_ncluster,0.0001MS_std_ncluster,0.00001MS_avg_ncluster,0.00001MS_std_ncluster
0,100,1.0,0.1,0.00,0.00,0.1,1000000,0.05;0.5;0.95,4.335848,0.550327,...,282.56,361.727123,4.335848,0.547568,4.335848,0.547568,6.099631,1.419080,6.703897,1.791913
1,100,1.0,0.1,0.50,0.50,0.1,1000000,0.05;0.5;0.95,5.111224,0.609606,...,113.63,20.596486,5.980435,1.116844,5.856033,1.033912,12.326625,3.415332,14.566689,4.574566
2,100,1.0,0.1,0.75,0.75,0.1,1000000,0.05;0.5;0.95,5.284581,0.596961,...,99.10,20.304500,6.246396,1.241999,6.162874,1.165316,13.134670,4.009980,15.548212,5.028616
3,100,1.0,0.1,1.00,1.00,0.1,1000000,0.05;0.5;0.95,5.532513,0.601132,...,89.49,15.683418,6.591072,1.225625,6.518114,1.198646,14.541335,4.283023,16.662048,5.287665
4,100,1.0,0.1,1.25,1.25,0.1,1000000,0.05;0.5;0.95,5.636583,0.718927,...,84.12,14.559720,6.802312,1.388331,6.717476,1.315266,15.805813,4.203050,17.950692,5.605783
5,100,1.0,0.1,1.50,1.50,0.1,1000000,0.05;0.5;0.95,5.798788,0.655022,...,85.23,13.137851,7.216668,1.451438,7.118939,1.368060,16.130333,5.077107,18.044100,5.721081
6,100,1.0,0.2,0.00,0.00,0.1,1000000,0.05;0.5;0.95,2.328089,0.432604,...,1596.55,2236.129780,2.328089,0.430435,2.328089,0.430435,3.383845,1.124842,4.020060,2.077444
7,100,1.0,0.2,0.50,0.50,0.1,1000000,0.05;0.5;0.95,2.509680,0.402311,...,146.28,18.787531,2.509680,0.400294,2.509680,0.400294,5.043151,1.706128,6.113020,2.573614
8,100,1.0,0.2,0.75,0.75,0.1,1000000,0.05;0.5;0.95,2.585506,0.383074,...,128.11,16.480104,2.585506,0.381154,2.585506,0.381154,5.638664,2.143505,6.791191,2.769683
9,100,1.0,0.2,1.00,1.00,0.1,1000000,0.05;0.5;0.95,2.616180,0.426700,...,124.24,18.818173,2.617268,0.424992,2.617268,0.424992,5.634959,2.191986,6.964524,3.326361


In [15]:
results.to_csv("aggregate/aggregate_results_final_final.csv")

In [16]:
results = pd.read_csv("aggregate/aggregate_results_final_final.csv", index_col=[0])
results

Unnamed: 0,n,density,eps,gam,gam_media,p_media,max_it,media_op,avg_ncluster,std_ncluster,...,avg_niter,std_niter,new_avg_ncluster,new_std_ncluster,0.01MS_avg_ncluster,0.01MS_std_ncluster,0.0001MS_avg_ncluster,0.0001MS_std_ncluster,0.00001MS_avg_ncluster,0.00001MS_std_ncluster
0,100,1.0,0.1,0.00,0.00,0.1,1000000,0.05;0.5;0.95,4.335848,0.550327,...,282.56,361.727123,4.335848,0.547568,4.335848,0.547568,6.099631,1.419080,6.703897,1.791913
1,100,1.0,0.1,0.50,0.50,0.1,1000000,0.05;0.5;0.95,5.111224,0.609606,...,113.63,20.596486,5.980435,1.116844,5.856033,1.033912,12.326625,3.415332,14.566689,4.574566
2,100,1.0,0.1,0.75,0.75,0.1,1000000,0.05;0.5;0.95,5.284581,0.596961,...,99.10,20.304500,6.246396,1.241999,6.162874,1.165316,13.134670,4.009980,15.548212,5.028616
3,100,1.0,0.1,1.00,1.00,0.1,1000000,0.05;0.5;0.95,5.532513,0.601132,...,89.49,15.683418,6.591072,1.225625,6.518114,1.198646,14.541335,4.283023,16.662048,5.287665
4,100,1.0,0.1,1.25,1.25,0.1,1000000,0.05;0.5;0.95,5.636583,0.718927,...,84.12,14.559720,6.802312,1.388331,6.717476,1.315266,15.805813,4.203050,17.950692,5.605783
5,100,1.0,0.1,1.50,1.50,0.1,1000000,0.05;0.5;0.95,5.798788,0.655022,...,85.23,13.137851,7.216668,1.451438,7.118939,1.368060,16.130333,5.077107,18.044100,5.721081
6,100,1.0,0.2,0.00,0.00,0.1,1000000,0.05;0.5;0.95,2.328089,0.432604,...,1596.55,2236.129780,2.328089,0.430435,2.328089,0.430435,3.383845,1.124842,4.020060,2.077444
7,100,1.0,0.2,0.50,0.50,0.1,1000000,0.05;0.5;0.95,2.509680,0.402311,...,146.28,18.787531,2.509680,0.400294,2.509680,0.400294,5.043151,1.706128,6.113020,2.573614
8,100,1.0,0.2,0.75,0.75,0.1,1000000,0.05;0.5;0.95,2.585506,0.383074,...,128.11,16.480104,2.585506,0.381154,2.585506,0.381154,5.638664,2.143505,6.791191,2.769683
9,100,1.0,0.2,1.00,1.00,0.1,1000000,0.05;0.5;0.95,2.616180,0.426700,...,124.24,18.818173,2.617268,0.424992,2.617268,0.424992,5.634959,2.191986,6.964524,3.326361
