### Installing the dependencies

In [None]:
pip install networkx



In [None]:
import re
import matplotlib.pyplot as plt
import networkx as nx
from collections import defaultdict

def parse_input(input_str):
    # Regex to match "Node1 edge_type Node2"
    pattern = re.compile(r'(\b[\w\s]+\b)\s+((?:<->)|(?:o->)|(?:-->)|(?:o-o))\s+(\b[\w\s]+\b)')
    edges = defaultdict(set)
    for line in input_str.strip().split('\n'):
        match = pattern.search(line)
        if match:
            node1, edge_type, node2 = match.groups()
            # Normalize the nodes' name to handle inconsistencies like leading/trailing whitespaces
            node1, node2 = node1.strip(), node2.strip()
            # We consider the edge type as part of the edge identity
            edges[node1].add((node2, edge_type))
            edges[node2].add((node1, edge_type))  # For undirected graph

    return edges


def jaccard_coefficient(graph1, graph2):
    # Flatten the sets of edges and make them hashable
    edges1 = set(frozenset((node, *edge)) for node, edges in graph1.items() for edge in edges)
    edges2 = set(frozenset((node, *edge)) for node, edges in graph2.items() for edge in edges)

    intersection = edges1.intersection(edges2)
    union = edges1.union(edges2)

    return len(intersection) / len(union) if union else 1  # handle case both graphs are empty

# Input strings for the two graphs
input_str1 = """
ca	-->	cbo
	cam	-->	cbm
	cam	<->	ic
	dam	o->	cam
	dit	<->	ic
	ic	-->	cbm
	max_cc	<->	avg_cc
	max_cc	-->	cbo
	mfa	<->	avg_cc
	mfa	-->	ca
	mfa	-->	cbm
	mfa	<->	ic
	npm	<->	bug
	npm	-->	ce
	rfc	-->	wmc
	wmc	-->	cbo
	wmc	-->	moa
	dit	-->	mfa
	dit	<->	moa
	loc	<->	max_cc
	noc	o->	bug
	rfc	<->	loc
	wmc	<->	loc
	wmc	-->	npm
	amc	o->	avg_cc
	amc	-->	bug
	avg_cc	-->	cam
	cam	-->	rfc
	dam	o->	lcom3
	lcom	<->	npm
	lcom3	<->	cam
	max_cc	-->	rfc
	rfc	<->	cbm
	rfc	<->	ce
	lcom	-->	bug
	lcom	-->	cbo
	lcom	o->	loc
"""

input_str2 = """
amc	<->	avg_cc
	bug	-->	lcom
	ca	<->	amc
	ca	<->	mfa
	cam	<->	amc
	cam	o->	dam
	cam	-->	lcom3
	cam	-->	noc
	cbo	<->	ca
	cbo	<->	ce
	cbo	<->	rfc
	cbo	-->	wmc
	ce	<->	mfa
	ce	-->	rfc
	ce	<->	wmc
	dam	<->	amc
	dam	-->	lcom3
	dit	-->	cbm
	ic	-->	cbm
	ic	-->	dam
	ic	o-o	dit
	ic	-->	npm
	loc	-->	bug
	max_cc	-->	amc
	max_cc	-->	avg_cc
	max_cc	o->	loc
	mfa	-->	dit
	mfa	-->	ic
	moa	-->	cam
	moa	-->	dam
	rfc	-->	bug
	rfc	-->	loc
	rfc	-->	max_cc
	rfc	<->	wmc
	wmc	-->	bug
	wmc	-->	lcom
	wmc	-->	loc
	wmc	-->	moa
	wmc	-->	npm
"""

# Parse the input strings to get graph representations
graph1 = parse_input(input_str1)
print("Edges in graph 1: ", graph1)
print()
graph2 = parse_input(input_str2)
print("Edges in graph 2: ", graph2)

# Calculate the Jaccard coefficient
jc = jaccard_coefficient(graph1, graph2)
print(f'Jaccard Coefficient: {jc}')


Edges in graph 1:  defaultdict(<class 'set'>, {'ca': {('cbo', '-->'), ('mfa', '-->')}, 'cbo': {('wmc', '-->'), ('max_cc', '-->'), ('ca', '-->'), ('lcom', '-->')}, 'cam': {('rfc', '-->'), ('cbm', '-->'), ('avg_cc', '-->'), ('dam', 'o->'), ('ic', '<->'), ('lcom3', '<->')}, 'cbm': {('rfc', '<->'), ('cam', '-->'), ('ic', '-->'), ('mfa', '-->')}, 'ic': {('mfa', '<->'), ('dit', '<->'), ('cbm', '-->'), ('cam', '<->')}, 'dam': {('cam', 'o->'), ('lcom3', 'o->')}, 'dit': {('ic', '<->'), ('moa', '<->'), ('mfa', '-->')}, 'max_cc': {('cbo', '-->'), ('avg_cc', '<->'), ('loc', '<->'), ('rfc', '-->')}, 'avg_cc': {('mfa', '<->'), ('cam', '-->'), ('max_cc', '<->'), ('amc', 'o->')}, 'mfa': {('dit', '-->'), ('cbm', '-->'), ('ic', '<->'), ('ca', '-->'), ('avg_cc', '<->')}, 'npm': {('ce', '-->'), ('lcom', '<->'), ('bug', '<->'), ('wmc', '-->')}, 'bug': {('noc', 'o->'), ('npm', '<->'), ('lcom', '-->'), ('amc', '-->')}, 'ce': {('rfc', '<->'), ('npm', '-->')}, 'rfc': {('loc', '<->'), ('cam', '-->'), ('max_cc',