In [5]:
import networkx as nx
import numpy as np
import json
import matplotlib as plt

In [6]:
input_path = './inputRealGraph/communities.txt'
output_json_path = './jsonForceGraph/communities.json'

In [7]:
''' STEP 1: Reading the input file (graph) and creating a progressive ID for the nodes. 
            Storing the nodes mapping with the new ID. '''

original_to_new_id = {}
new_id_counter = 1

edges = []

# Read the file
with open(input_path, 'r') as file:
    for line in file:
        node1, node2, community = line.split()
        node1, node2, community = int(node1), int(node2), int(community)
        
        # Storing the nodes mapping in 'original_to_new_id' dict
        if node1 not in original_to_new_id:
            original_to_new_id[node1] = new_id_counter
            new_id_counter += 1
        if node2 not in original_to_new_id:
            original_to_new_id[node2] = new_id_counter
            new_id_counter += 1
        
        # Get new IDs
        new_node1 = original_to_new_id[node1]
        new_node2 = original_to_new_id[node2]
        
        # Add the edge to the list
        edges.append((new_node1, new_node2, community))



In [8]:
print(len(original_to_new_id))
print(original_to_new_id)

1725
{159: 1, 40104: 2, 28406: 3, 40095: 4, 40088: 5, 40083: 6, 13391: 7, 40091: 8, 40110: 9, 40101: 10, 40097: 11, 40085: 12, 40087: 13, 40081: 14, 40109: 15, 40099: 16, 40100: 17, 40106: 18, 13850: 19, 13397: 20, 40082: 21, 15560: 22, 21062: 23, 40084: 24, 18852: 25, 40103: 26, 40105: 27, 13423: 28, 40089: 29, 35201: 30, 5228: 31, 14976: 32, 40090: 33, 40094: 34, 40108: 35, 40080: 36, 782: 37, 96976: 38, 70329: 39, 96972: 40, 73060: 41, 96968: 42, 96963: 43, 7984: 44, 96957: 45, 44836: 46, 23915: 47, 96948: 48, 96929: 49, 96945: 50, 96940: 51, 85678: 52, 784: 53, 44793: 54, 96960: 55, 96924: 56, 96921: 57, 44828: 58, 96942: 59, 96925: 60, 7987: 61, 96931: 62, 96922: 63, 96954: 64, 96918: 65, 96923: 66, 73073: 67, 19432: 68, 96947: 69, 96981: 70, 96937: 71, 96934: 72, 96974: 73, 96938: 74, 92438: 75, 44814: 76, 96926: 77, 96944: 78, 71933: 79, 7986: 80, 96977: 81, 96950: 82, 7971: 83, 96919: 84, 96959: 85, 96979: 86, 22194: 87, 44838: 88, 96932: 89, 96939: 90, 96936: 91, 96958: 92, 96

In [9]:
''' Step 2: Create the graph using NetworkX, with a different color for each community '''

G = nx.Graph()

# Add edges to the graph
for new_node1, new_node2, community in edges:
    G.add_edge(new_node1, new_node2, community=community)

# Assign colors to each community
communities = set(nx.get_edge_attributes(G, 'community').values())
colors = plt.cm.rainbow(np.linspace(0, 1, len(communities)))

community_to_color = {community: f"rgb({int(color[0]*255)}, {int(color[1]*255)}, {int(color[2]*255)})" 
                      for community, color in zip(communities, colors)}

# Prepare data for ForceGraph
nodes = [{"id": str(node), "group": G.nodes[node].get('community', 0)} for node in G.nodes()]
edges = [{"source": str(u), "target": str(v), "community": d['community']} for u, v, d in G.edges(data=True)]

graph_data = {
    "nodes": nodes,
    "links": edges
}

# Save to JSON format
with open(output_json_path, 'w') as f:
    json.dump(graph_data, f, indent=4)

print("Graph data saved to json file")


Graph data saved to json file


In [10]:
print("Community to Color Mapping:")
for community, color in community_to_color.items():
    print( f"'{community}': '{color}',")

Community to Color Mapping:
'1': 'rgb(127, 0, 255)',
'2': 'rgb(109, 28, 254)',
'3': 'rgb(91, 56, 253)',
'4': 'rgb(73, 83, 251)',
'5': 'rgb(55, 109, 248)',
'6': 'rgb(37, 134, 245)',
'7': 'rgb(19, 157, 241)',
'8': 'rgb(0, 180, 235)',
'9': 'rgb(18, 199, 229)',
'10': 'rgb(36, 215, 223)',
'11': 'rgb(54, 229, 215)',
'12': 'rgb(72, 240, 208)',
'13': 'rgb(90, 248, 199)',
'14': 'rgb(108, 253, 190)',
'15': 'rgb(128, 254, 179)',
'16': 'rgb(146, 253, 169)',
'17': 'rgb(164, 248, 158)',
'18': 'rgb(182, 240, 147)',
'19': 'rgb(200, 229, 135)',
'20': 'rgb(218, 215, 123)',
'21': 'rgb(236, 199, 110)',
'22': 'rgb(255, 178, 96)',
'23': 'rgb(255, 157, 83)',
'24': 'rgb(255, 134, 69)',
'25': 'rgb(255, 109, 56)',
'26': 'rgb(255, 83, 42)',
'27': 'rgb(255, 56, 28)',
'28': 'rgb(255, 28, 14)',
'29': 'rgb(255, 0, 0)',


#### ADDITIONAL CODE (SMALL PYTHON SCRIPTS TO CALCULATE THE DENSITY OF COMMUNITIES AND THE DENSEST COMMUNITY) 

In [21]:
''' STEP 1: Store the number of edges for each community in one dict
and the nodes belonging to each community in another dict '''

with open('./dati/communities.txt', 'r') as file:

    community_nodes = {}
    community_edges = {}

    for line in file:
        node1, node2, community = line.split()
        node1, node2, community = int(node1), int(node2), int(community)
        
        if community not in community_edges:
            community_edges[community] = 1
        else:
            community_edges[community] += 1

        if community not in community_nodes:
            community_nodes[community] = set([node1,node2])
        else:
            community_nodes[community].add(node1)
            community_nodes[community].add(node2)

In [22]:
community_edges

{15: 16788,
 14: 496,
 10: 465,
 16: 741,
 17: 1361,
 5: 528,
 8: 946,
 7: 3088,
 1: 496,
 4: 1,
 19: 703,
 18: 465,
 20: 561,
 3: 1326,
 2: 1,
 21: 561,
 22: 465,
 6: 1,
 9: 528,
 13: 699,
 12: 1,
 23: 561,
 24: 703,
 25: 465,
 26: 465,
 27: 496,
 11: 630,
 28: 528,
 29: 528}

In [23]:
community_nodes

{15: {159,
  782,
  784,
  1062,
  1096,
  1097,
  1100,
  1101,
  1102,
  1103,
  1104,
  1105,
  1106,
  1107,
  1108,
  1109,
  1110,
  1111,
  1112,
  1113,
  1115,
  1116,
  1117,
  1118,
  1120,
  1121,
  1122,
  1123,
  1124,
  1125,
  1126,
  1127,
  1129,
  1130,
  1131,
  1132,
  1133,
  1134,
  1135,
  1136,
  1137,
  1138,
  1139,
  1141,
  1142,
  1875,
  1876,
  1877,
  1878,
  1879,
  1880,
  1881,
  1882,
  1883,
  1884,
  1885,
  1886,
  1887,
  1888,
  1889,
  1890,
  1891,
  1892,
  1893,
  1894,
  1895,
  1896,
  1897,
  1898,
  1899,
  1900,
  1901,
  1902,
  1903,
  1904,
  1905,
  1906,
  1907,
  1908,
  1996,
  2440,
  3918,
  3963,
  4613,
  4859,
  5228,
  5860,
  5961,
  7971,
  7984,
  7986,
  7987,
  7989,
  7992,
  8695,
  9819,
  11638,
  11715,
  12126,
  12624,
  12801,
  12802,
  12803,
  12804,
  12805,
  13391,
  13397,
  13423,
  13527,
  13634,
  13850,
  13852,
  13875,
  13886,
  14976,
  15560,
  17534,
  17546,
  17919,
  18852,
  19432,
  1975

In [24]:
'''STEP 2: For each community calculate the density as the number of edges over 
the number of nodes (and store it in a dict) '''
community_density = {}

for community in community_nodes.keys() & community_edges.keys():
    num_nodi = len(community_nodes[community])
    num_archi = community_edges[community]
    community_density[community] = num_archi/num_nodi

community_density

{1: 15.5,
 2: 0.5,
 3: 25.5,
 4: 0.5,
 5: 16.0,
 6: 0.5,
 7: 25.311475409836067,
 8: 21.5,
 9: 16.0,
 10: 15.0,
 11: 17.5,
 12: 0.5,
 13: 17.923076923076923,
 14: 15.5,
 15: 21.746113989637305,
 16: 19.0,
 17: 20.313432835820894,
 18: 15.0,
 19: 18.5,
 20: 16.5,
 21: 16.5,
 22: 15.0,
 23: 16.5,
 24: 18.5,
 25: 15.0,
 26: 15.0,
 27: 15.5,
 28: 16.0,
 29: 16.0}

In [25]:
# Find densest community
max_key = max(community_density, key=community_density.get)
max_value = community_density[max_key]

result_dict = {max_key: max_value}

print(result_dict) # {ID densest community: density value}
print(max_key) # ID densest community

{3: 25.5}
3


In [26]:
''' STEP 3: create a text file where write nodes and edges relating only
 to the densest community'''

input_filename = './inputRealGraph/communities.txt'
output_filename = './inputRealGraph/densest_community.txt'
 
with open(input_filename, 'r') as infile, open(output_filename, 'w') as outfile:
    for line in infile:
        node1, node2, community = line.split()
        community = int(community)
 
        # Write the line in the output file only if the value of the third
        # column (community id) is the same as that of the densest community
        if community == max_key:
            outfile.write(line)