In [None]:
import pandas as pd
import numpy as np
from sklearn.cluster import MeanShift
import json

In [None]:
def nclusters_meanshift(data, bw):
    ops = {i: data[i] for i in range(len(data))}
    sorted_ops = sorted(ops.items(), key = lambda kv:(kv[1], kv[0]))
    A=np.array([el[1] for el in sorted_ops]).reshape(-1,1)
    clustering = MeanShift(bandwidth=bw).fit(A)
    lbls = clustering.labels_
    labels = np.arange(len(sorted_ops))
    for i in range(len(labels)):
        cl = lbls[i]
        labels[sorted_ops[i][0]]=cl
    cluster_participation_dict = {}
    for l in labels:
        if l not in cluster_participation_dict:
            cluster_participation_dict[l] = 1
        else:
            cluster_participation_dict[l] += 1
    #computing effective number of clusters using function explained in the paper
    C_num = 0
    C_den = 0
    for k in cluster_participation_dict:
        C_num += cluster_participation_dict[k]
        C_den += ((cluster_participation_dict[k])**2)
    C_num = (C_num**2)
    C = C_num/C_den
    return C

def nclusters(data, threshold):
    data = [float(el) for el in data]
    data = sorted(data)
    start = data[0]
    max_val = start + threshold
    c = (start, max_val)
    cluster = dict()
    for i in data:
        if i <= max_val:
            if c in cluster.keys():
                cluster[c] += 1
            else:
                cluster[c] = 1
        else:
            max_val = i + threshold
            c = (i, max_val)
            cluster[c] = 1
    #ora ho il dizionario con i cluster di una run
    C_num = len(data)**2
    C_den = 0
    for k in cluster.keys():
        C_den += cluster[k]*cluster[k]
    C = C_num / C_den
    return C

In [None]:
results = pd.read_csv("aggregate/results.csv")
results.head()

In [None]:
new_values_avg = []
new_values_std = []
for index, row in results.iterrows():
    media_op = ', '.join(str(el) for el in row['media_op'].split(';'))
    mo = f'[{media_op}]'
    eps = row['eps']
    gam = row['gam']
    p_media = row['p_media']
    name = f'media mo{mo} p{p_media} e{eps} g{gam} gm{gam} mi1000000'
    print(f'doing {name}')
    try:
        ncarray = []
        for nr in data.keys():
            csvfile = open(f'res/final_opinions {name} nr{nr}.csv')
            finalops = csvfile.readlines()
            finalops = finalops.strip().split()
            nc = nclusters_meanshift(finalops, bw=0.01)
            ncarray.append(nc)
        ncarray = np.array(ncarray)
        new_values_avg.append(np.average(ncarray))
        new_values_std.append(np.std(ncarray))
    except FileNotFoundError:
        new_values_avg.append(None)
        new_values_std.append(None)
results["0.01MS_avg_ncluster"] = new_values_avg
results["0.01MS_std_ncluster"] = new_values_std

In [None]:
new_values_avg = []
new_values_std = []
for index, row in results.iterrows():
    media_op = ', '.join(str(el) for el in row['media_op'].split(';'))
    mo = f'[{media_op}]'
    eps = row['eps']
    gam = row['gam']
    p_media = row['p_media']
    name = f'media mo{mo} p{p_media} e{eps} g{gam} gm{gam} mi1000000'
    print(f'doing {name}')
    try:
        ncarray = []
        for nr in data.keys():
            csvfile = open(f'res/final_opinions {name} nr{nr}.csv')
            finalops = csvfile.readlines()
            finalops = finalops.strip().split()
            nc = nclusters_meanshift(finalops, bw=0.00001)
            ncarray.append(nc)
        ncarray = np.array(ncarray)
        new_values_avg.append(np.average(ncarray))
        new_values_std.append(np.std(ncarray))
    except FileNotFoundError:
        new_values_avg.append(None)
        new_values_std.append(None)     
results["0.00001MS_avg_ncluster"] = new_values_avg
results["0.00001MS_std_ncluster"] = new_values_std

In [None]:
new_values_avg = []
new_values_std = []
for index, row in results.iterrows():
    media_op = ', '.join(str(el) for el in row['media_op'].split(';'))
    mo = f'[{media_op}]'
    eps = row['eps']
    gam = row['gam']
    p_media = row['p_media']
    name = f'media mo{mo} p{p_media} e{eps} g{gam} gm{gam} mi1000000'
    print(f'doing {name}')
    try:
        nitarray = []
        for nr in data.keys():
            csvfile = f'res/{name} nr{nr}.csv'
            with open(csvfile) as csv_file:
                csv_reader = csv.reader(csv_file, delimiter=',')
                line_count = 0
                for row in csv_reader:
                    linecount +=1
                nitarray.append(linecount)
        nitarray = np.array(nitarray)
        new_values_avg.append(np.average(nitarray))
        new_values_std.append(np.std(nitarray))
    except FileNotFoundError:
        new_values_avg.append(None)
        new_values_std.append(None)     
results["avg_niter"] = new_values_avg
results["std_niter"] = new_values_std

In [None]:
results.head()

In [None]:
results.to_csv("aggregate/results2.csv")

In [None]:
results = pd.read_csv("aggregate/results2.csv", index_col=[0])
results.head()