In [6]:
import pickle
import networkx as nx

In [2]:

with open('margento_jerry_ai_dialogs_and_wildcard.gpickle', 'rb') as f:
    jerry_ai_graph = pickle.load(f)

In [3]:

with open('margento_jerry_web_searches_plus_wildcard_graph.gpickle', 'rb') as fa:
    jerry_glasgow_web = pickle.load(fa)

In [4]:

with open('margento_jerry_mistici_hoti_nebuni_plus_wildcard_graph.gpickle', 'rb') as faro:
    jerry_ro = pickle.load(faro)

In [7]:

combined_jerry_graph = nx.MultiDiGraph()
combined_jerry_graph.update(jerry_ai_graph)
combined_jerry_graph.update(jerry_glasgow_web)
combined_jerry_graph.update(jerry_ro)

In [8]:

print("Number of nodes:", combined_jerry_graph.number_of_nodes())
print("Number of edges:", combined_jerry_graph.number_of_edges())


Number of nodes: 751
Number of edges: 3199


In [None]:

combined_jerry_graph.update(jerry_ai_graph)
combined_jerry_graph.update(jerry_glasgow_web)
combined_jerry_graph.update(jerry_ro)

In [11]:

def print_node_attribute_keys(graph, name="Graph"):
    all_keys = set()
    for _, attr_dict in graph.nodes(data=True):
        all_keys.update(attr_dict.keys())
    print(f"{name} - Node attribute keys:", all_keys)

print_node_attribute_keys(jerry_ai_graph, "jerry_ai_graph")
print_node_attribute_keys(jerry_glasgow_web, "jerry_glasgow_web")
print_node_attribute_keys(jerry_ro, "jerry_ro")


jerry_ai_graph - Node attribute keys: {'embedding', 'dialogue', 'text', 'speaker', 'theme'}
jerry_glasgow_web - Node attribute keys: {'text', 'name'}
jerry_ro - Node attribute keys: {'embedding', 'text', 'wildcard_sim'}


In [24]:

from sentence_transformers import SentenceTransformer, util

In [13]:

model = SentenceTransformer("paraphrase-multilingual-MiniLM-L12-v2")

In [14]:

# wildcard updated
context = """
Diane Rothenberg [NOT] feeling left out of a Paradise of Poets. 
In the news: 2025. 
Crimes against humanity committed by Russia in Ukraine. 
Separatists backed by Russia also committed such crimes against Romanian-speaking Moldovans in Transnistria during the frozen conflict of 1992. 
The greatest war against children in history perpetrated by Israel in Gaza. 
President Trump and ‘vice Vance’ crave Greenland’s ice sheet while denying climate change.
"""

In [15]:

context_embedding = model.encode(context, convert_to_numpy=True)

In [36]:

#import torch
#import torch.nn.functional as F

#def cosine_similarity(a, b):
    #return F.cosine_similarity(a, b, dim=0).item()

def add_context_similarity_edges(graph, context_embedding, similarity_threshold=0.8):
    #context_embedding = context_embedding.cpu()  # Ensure on CPU

    # Step 1: Compute similarity of each node to context
    node_similarities = {}
    texts = []
    node_ids = []
    for node_id, data in graph.nodes(data=True):
        if "text" in data:
            texts.append(data["text"])
            node_ids.append(node_id)

    embeddings = model.encode(texts, convert_to_numpy=True)

    with open('jerry_combo_embeddings_margento_manifest_o_1.pkl', 'wb') as fp:
            pickle.dump(embeddings, fp)
        
    for node_id, emb in zip(node_ids, embeddings):
        if emb is not None:
            # emb_tensor = torch.tensor(emb).cpu()
            # similarity = cosine_similarity(emb_tensor, context_embedding)
            similarity = float(util.cos_sim(emb, context_embedding).item())
            node_similarities[node_id] = similarity

    # Step 2: Get nodes similar enough to the context
    nodes_above_thresh = [n for n, sim in node_similarities.items() if sim >= similarity_threshold]

    # Step 3: For each unique pair, check similarity and add edge if not already connected
    for i in range(len(nodes_above_thresh)):
        for j in range(i + 1, len(nodes_above_thresh)):
            n1, n2 = nodes_above_thresh[i], nodes_above_thresh[j]
            sim1 = node_similarities[n1]
            sim2 = node_similarities[n2]

            source, target = (n1, n2) if sim1 > sim2 else (n2, n1)

            # Avoid if an edge already exists (in either direction)
            if graph.has_edge(source, target) or graph.has_edge(target, source):
                continue

            # Optional: Calculate a dynamic weight based on similarity difference
            # similarity_gap = abs(sim1 - sim2)
            #edge_weight = 2 + (1 - similarity_gap)  # You can adjust this logic
            edge_weight = 2.7
            
            # Add new context-based edge
            graph.add_edge(
                source,
                target,
                weight=edge_weight,
                relation="context_similar",
                context_similarity_source=sim1,
                context_similarity_target=sim2
            )


In [39]:

add_context_similarity_edges(graph=combined_jerry_graph, context_embedding=context_embedding, similarity_threshold=0.40)

In [40]:

print("Number of nodes:", combined_jerry_graph.number_of_nodes())
print("Number of edges:", combined_jerry_graph.number_of_edges())

Number of nodes: 751
Number of edges: 3665


In [42]:

with open('margento_jerry_combo_graph.gpickle', 'wb') as f:
    pickle.dump(combined_jerry_graph, f, pickle.HIGHEST_PROTOCOL)