In [None]:
# import libraries
import networkx as nx
import pandas as pd
import os
import json

from networkx.algorithms.community.quality import coverage, modularity

### function

In [None]:
def filter_graph(graph, thresh):
    remove = [node for node,degree in dict(graph.degree()).items() if degree < thresh]
    return remove

In [None]:
def to_undirected(graph):
    if nx.is_directed(graph):
        return nx.to_undirected(graph)

In [None]:
def is_connected(graph):
    if nx.is_directed:
        undirected = nx.to_undirected(graph)
    else:
        undirected = graph
    print('Is connected:', nx.is_connected(undirected))

In [None]:
def get_info(graph):
    print(nx.info(graph))
    print('Is weighted:', nx.is_weighted(graph))
    print('Is directed:', nx.is_directed(graph))
    is_connected(graph)

In [None]:
def get_modularity_and_coverage(graph, communities):
    cov = coverage(graph, communities)
    mod = modularity(graph, communities)
    return {'coverage': cov, 'modularity': mod}

In [None]:
def write_gexf(graph, name, path):
    nx.write_gexf(graph, os.path.join(path, name))
    print(f'{name} written')

In [None]:
def read_json_to_dict(filename: str, path: str):
    if not filename.endswith('.json'):
        filename = filename + '.json'
    with open(os.path.join(path, filename)) as json_file:
        data = json.load(json_file)
    return data

## Read and analyse graph

In [None]:
# graph = nx.read_gml("../dataset_covid/Final_Graph_Covid.gml")
path = "../dataset_covid"
name = "Covid.gexf"
graph_path = os.path.join(path, name)
graph = nx.read_gexf(graph_path)

In [None]:
# info
get_info(graph)

In [None]:
degrees = sorted(graph.degree, key=lambda x: x[1], reverse=True)

In [None]:
# degrees

In [None]:
node_to_remove = filter_graph(graph,2)

In [None]:
len(node_to_remove)

In [None]:
copied_graph = graph.copy()
copied_graph.remove_nodes_from(node_to_remove)
get_info(copied_graph)

In [None]:
if nx.is_connected(copied_graph):
    write_gexf(copied_graph, 'small_covid.gexf', path)
else:
    print('Graph not connected')

### Read through pandas

In [None]:
# Read csv and assign weight
edges_unweightened = pd.read_csv("../dataset_covid/user_edgelist.csv",delimiter=";")
edges_unweightened["w"] = 1
edges_weightened = edges_unweightened.groupby(["source","target"]).count().reset_index()

In [None]:
# Get graph and its connected component
graph_edgelist = nx.from_pandas_edgelist(edges_weightened, edge_attr="w", create_using=nx.Graph)
largest_cc = max(nx.connected_components(graph_edgelist), key=len)
ccn_graph = graph_edgelist.subgraph(largest_cc)
get_info(ccn_graph)

In [None]:
# Write graph
if nx.is_connected(ccn_graph):
    write_gexf(ccn_graph, "Connected_covid.gexf")
else:
    print('Graph not connected')

### Get modularity and coverage

In [None]:
communities_path =  "../community/"
graphs_to_evaluate = ["Covid",
                      "gunsense_followers_network_part_largest_CC",
                      "netanyahu_followers_network_part_largest_CC",
                      "russia_march_followers_network_part_largest_CC"
                     ]
graphs_path = "../analysis_paper/"

In [None]:
cov_mod = dict()
for g in graphs_to_evaluate:
    read_graph = nx.read_gexf(os.path.join(graphs_path, g+".gexf"))
    community = read_json_to_dict(g+"_fluidc.json", communities_path)
    result = get_modularity_and_coverage(read_graph, community.values())
    cov_mod[g] = result
    print(f'{g} done')

In [None]:
coverage_modularity_result = pd.DataFrame.from_dict(cov_mod)
coverage_modularity_result.index.names = ['Metric']
coverage_modularity_result

In [None]:
# write csv to notebook folder
coverage_modularity_result.to_csv('coverage_modularity.csv')