In [1]:
import duckduckgo_search

In [2]:
from duckduckgo_search import DDGS

In [3]:
from sentence_transformers import SentenceTransformer, util

  from .autonotebook import tqdm as notebook_tqdm


In [4]:

participants = [
    "Peter Manson",
    "Matthew Rothenberg",
    "Jeffrey Robinson",
    "Peter Middleton",
    "Allen Fisher",
    "Maggie O’Sullivan",
    "Phil Davenport",
    "Nia Davies",
    "Rachel Robinson",
    "Cecilia Vicuña",
    "Nicky Melville",
    "Calum Roger",
    "Maria Sledmere",
    "Jane Goldman",
    "Gerrie Fellows",
    "Jeff Hilson",
    "Rebecca Kosick",
    "Margento",
    "Tom Pickard",
    "Jèssica Pujol Duran",
    "Will Rowe",
    "Zoe Skoulding",
    "Rhys Trimble",
    "Carol Watts",
    "Javier Taboada",
    "Scott Thurston",
    "John Coyle",
    "Robert Sheppard",
    "MontenegroFisher",
    "Martín Gubbins",
    "Juan Carlos Villavicencio",
    "Tamara de Inés Antón",
    "Chris Tanasescu",
    "Felipe Cussen",
    "Colin Still",
    "Diane Rothenberg", # added by me
]

In [39]:
wild_card = "Diane Rothenberg [NOT] feeling left out of a Paradise of Poets"

In [5]:

names = participants

NUM_RESULTS = 16 
SIMILARITY_THRESHOLD_0 = 0.3
#SIMILARITY_THRESHOLD_1 = 0.6

# --- Models ---
model = SentenceTransformer('all-MiniLM-L6-v2')

In [51]:
SIMILARITY_THRESHOLD_1 = 0.5

In [40]:

wild_card_embedding = model.encode(wild_card, convert_to_tensor=True)

In [None]:

import time
nodes = []
edges = []

# --- Search and Node Creation ---
# with DDGS() as ddgs:
node_id = 0
for name in names:
        query = f'"{name}"' if ' ' in name else name
        results = DDGS().text(query, max_results=NUM_RESULTS)
        for res in results:
            snippet = res.get("body")
            if snippet:
                embedding = model.encode(snippet, convert_to_tensor=True)
                nodes.append({
                    "id": node_id,
                    "text": snippet,
                    "name": name,
                    "embedding": embedding
                })
                node_id += 1

In [None]:

# DYNAMICALLY UPDATING SEARCH RESULTS

def collect_data():
    global nodes
    node_id = len(nodes)  # Continue node_id from previous session
    with DDGS() as ddgs:
        for name in names:
            query = f'"{name}"' if ' ' in name else name
            results = ddgs.text(query, max_results=NUM_RESULTS)
            for res in results:
                snippet = res.get("body")
                if snippet:
                    embedding = model.encode(snippet, convert_to_tensor=True)
                    nodes.append({
                        "id": node_id,
                        "text": snippet,
                        "name": name,
                        "embedding": embedding
                    })
                    node_id += 1


In [None]:
!pip install schedule

In [None]:

import schedule
# import time

def monthly_job():
    today = datetime.date.today()
    if today.day == 1:  # Run only on the 1st of each month
        collect_data()

schedule.every().day.at("09:00").do(monthly_job)

while True:
    schedule.run_pending()
    time.sleep(5)


In [7]:

len(nodes)

435

In [9]:

for node in nodes:
    if node['name'] == "Diane Rothenberg":
        print(node['text'])

Official website of East Coast-based actor and filmmaker Dianne Rothenberg. SAG • AFTRA; AEA.DIANNE ROTHENBERG OFFICIALACTOR | …I am very excited to have …DIANNE ROTHENBERG ACTOR | …See Dianne in her tv and film …
Diane Rothenberg's own book, Mothers of the Nation, in which this essay also appeared, was published by Pierre Joris's Ta’wil Books in the same year, & a newly revised & expanded edition …
-- · Education: Fordham University · Location: 10128. View Diane Rothenberg’s profile on LinkedIn, a professional community of 1 billion members.
Diane Rothenberg is on Facebook. Join Facebook to connect with Diane Rothenberg and others you may know. Facebook gives people the power to share and...
Dec 9, 2011 · Diane Brodatz Rothenberg Diane Brodatz Rothenberg b. NYC, 1932. B.A. Queens College, 1953; Ph.D. in Anthropology, The Graduate Center of the University of the City of New …
Jul 28, 2009 · Diane Rothenberg: On the Insanity of Cornplanter (Part One) [A professional anthropolgist & an a

In [49]:

import networkx as nx


In [52]:

import pickle

In [55]:

edges = []

for i, source in enumerate(nodes):
    for j, target in enumerate(nodes):
        if i == j:
            continue
            
        sim = float(util.cos_sim(source["embedding"], target["embedding"]))
        sim_to_wild_source = float(util.cos_sim(source["embedding"], wild_card_embedding))
        sim_to_wild_target = float(util.cos_sim(target["embedding"], wild_card_embedding))
        
        if sim >= SIMILARITY_THRESHOLD_1 and sim_to_wild_source >= sim_to_wild_target:
            weight = 1 if source["name"] == target["name"] else 2
            edges.append((source["id"], target["id"], {
                "similarity": sim,
                "weight": weight
            }))

        with open('jerry_web_searches_edges_margento_manifest_o_1.pkl', 'wb') as fp:
            pickle.dump(edges, fp)

In [56]:

G = nx.MultiDiGraph()

for node in nodes:
    G.add_node(node["id"], text=node["text"], name=node["name"])
    
for source, target, attrs in edges:
    G.add_edge(source, target, **attrs)

print(f"Graph has {G.number_of_nodes()} nodes and {G.number_of_edges()} edges.")

Graph has 435 nodes and 1212 edges.


In [57]:

with open('margento_jerry_web_searches_plus_wildcard_graph.gpickle', 'wb') as f:
    pickle.dump(G, f, pickle.HIGHEST_PROTOCOL)