# Import librerie

In [1]:
import json
import numpy as np

import networkx as nx
from networkx.readwrite import json_graph

# Classi ausiliarie

In [2]:
class DataAnalyzer():

    def __init__(self,path):
        '''
        Args:
            path (str): path al file contenente i dati
        '''
        self.data = self._read_from_json_(path)

    def _read_from_json_(self,paths):
        '''
        Estrai i grafi contenuti in una lista di file.
        Args:
            paths (list[str]): Paths ai file contenenti i grafi
        Returns:
            GS (list[nx.Graph]): Grafi contenuti nei files
        '''
        GS = []
        for path in paths:
            json_gs= None
            with open(path,'r') as file:
                json_gs = json.load(file)
                file.close()
            for g in json_gs:
                GS.append(json_graph.node_link_graph(g))
        return GS

    def get_data(self):
        '''
        Restituisce i grafi.
        Returns:
            _ (nx.Graph): grafo
        '''
        return self.data
    
    def _avg_nodes_(self):
        '''
        Restituisce la media del numero di nodi
        Returns:
            (float): media del numero di nodi nella collezione di grafi
        '''
        GS = self.get_data()
        nodes = [len(g.nodes) for g in GS]
        return np.mean(nodes)
    
    def _avg_mis_fraction_(self):
        '''
        Restituisce la frazione di nodi misinformativi media sui grafi.
        Returns:
            _ (float): media della frazione di nodi misinformativi.
        '''
        GS = self.get_data()
        return np.mean([float(np.sum([1 for i in g.nodes if g.nodes[i]['Misinformative']==1]) / len(g.nodes)) for g in GS])
    
    def get_info(self,info):
        '''
        Restituisce le informazioni sui dati richieste
        Args:
            info (str): informazione desiderata
        Returns:
            _ (float): valore dell'informazione
        '''
        GS = self.get_data()
        match(info):
            case "average degree":
                return np.mean([float(np.mean([d for _,d in g.degree()])) for g in GS])
            case "average degree centrality":
                return [float(np.mean([c for c in nx.degree_centrality(g).values()])) for g in GS]
            case "average clossness centrality":
                return [float(np.mean([c for c in nx.closeness_centrality(g).values()])) for g in GS]
            case "average betweenness centrality":
                return [float(np.mean([c for c in nx.betweenness_centrality(g,len(g.nodes) // 2).values()])) for g in GS]
            case "average path lenght":
                return np.mean([nx.average_shortest_path_length(g) for g in GS])
            case "average clustering coefficent":
                return np.mean([nx.average_clustering(g) for g in GS])
            case "average nodes":
                return self._avg_nodes_()
            case "average fraction misinformative":
                return self._avg_mis_fraction_()
            case "nodes":
                return  [len(g.nodes) for g in GS]
            case "degrees":
                return [float(np.mean([d for _,d in g.degree()])) for g in GS]
    


# Analisi Train

In [3]:
d = DataAnalyzer(["advanced mixed train.json"])

The default value will be changed to `edges="edges" in NetworkX 3.6.


  nx.node_link_graph(data, edges="links") to preserve current behavior, or
  nx.node_link_graph(data, edges="edges") for forward compatibility.


In [4]:
nodes = d.get_info("nodes")
print(f'min {np.min(nodes)}, max {np.max(nodes)}')

min 1040, max 2054


In [5]:
deg = d.get_info("degrees")
print(f'min {np.min(deg)}, max {np.max(deg)}')

min 8.717307692307692, max 19.983805668016196


In [6]:
print(d.get_info("average degree"))

13.746563732841953


In [7]:
print(d.get_info("average nodes"))

1555.3672727272726


In [8]:
print(d.get_info("average fraction misinformative"))

0.30625418935562687


In [9]:
print(d.get_info("average path lenght"))

5.010220188290828


In [10]:
print(d.get_info("average clustering coefficent"))

0.318246260814112


In [11]:
print(float(np.mean(d.get_info("average degree centrality"))))

0.008860721565789896


In [12]:
print(float(np.mean(d.get_info("average clossness centrality"))))

0.20165574738100248


In [13]:
print(float(np.mean(d.get_info("average betweenness centrality"))))

0.002713552872963123


# Analisi Validation

In [14]:
d = DataAnalyzer(["advanced mixed validation.json"])

In [15]:
nodes = d.get_info("nodes")
print(f'min {np.min(nodes)}, max {np.max(nodes)}')

min 2050, max 4059


In [16]:
deg = d.get_info("degrees")
print(f'min {np.min(deg)}, max {np.max(deg)}')

min 10.698536585365854, max 23.296747967479675


In [17]:
print(d.get_info("average degree"))

17.198488382479713


In [18]:
print(d.get_info("average nodes"))

3125.84


In [19]:
print(d.get_info("average fraction misinformative"))

0.3336608383290788


In [20]:
print(d.get_info("average path lenght"))

5.17744860086611


In [21]:
print(d.get_info("average clustering coefficent"))

0.31618526545786724


In [22]:
print(float(np.mean(d.get_info("average degree centrality"))))

0.005488836098690822


In [23]:
print(float(np.mean(d.get_info("average clossness centrality"))))

0.194785456030119


In [24]:
print(float(np.mean(d.get_info("average betweenness centrality"))))

0.0014013056192987577
