In [140]:
# Install the missing package
#%pip install sentence-transformers

import pandas as pd 
from sentence_transformers import SentenceTransformer

from sklearn.cluster import AgglomerativeClustering
import numpy as np 

In [141]:
ar_dj = pd.read_csv('Synthetic_Aaron_Rodgers_Entity_Mentions.csv')
ar_dj['entity'] = 'Aaron Rodgers'
mj_df = pd.read_csv('Synthetic_Michael_Jordan_Entity_Mentions.csv')
mj_df['entity'] = 'Michael Jordan'
sj_df = pd.read_csv('Synthetic_Steve_Jobs_Entity_Mentions.csv')
sj_df['entity'] = 'Steve Jobs' 
vp_pd = pd.read_csv('Synthetic_Vladimir_Putin_Entity_Mentions.csv')
vp_pd['entity'] = 'Vladimir Putin'
cp_df = pd.read_csv('Synthetic_Chinese_Business_Enterprise_Mentions.csv')
ol_df = pd.read_csv('Synthetic_Osama_Bin_Laden_Entity_Mentions.csv')
ad_df = pd.read_csv('Synthetic_Arms_Dealer_Entity_Mentions.csv')
cb_df = pd.read_csv('Synthetic_Chinese_Business_Enterprise_Mentions.csv')
tw_df = pd.read_csv('Synthetic_Tiger_Woods_Entity_Mentions.csv')
re_df = pd.read_csv('Synthetic_Random_Entity_Mentions.csv')
ts_df =pd.read_csv('Synthetic_Taylor_Swift_Entity_Mentions.csv')
df = pd.concat([ar_dj, mj_df, sj_df, vp_pd, cp_df, ol_df, ad_df, cb_df, tw_df, re_df, ts_df], ignore_index=True)

Simple step by step pipeline

In [142]:
#Adds a dictionary of entity mentions to the dataframe in metadata
from langchain.schema import Document 

document = [Document(page_content=row['text'], metadata={'entity': row.to_dict()}) for _, row in df.iterrows()] 

In [143]:
for doc in document:
    print(doc.metadata['entity'])

{'text': 'Mr. Rodgers was featured in a sports segment on Lambeau Field.', 'entity': 'Aaron Rodgers'}
{'text': 'Aaron Rodgers participated in offseason training at Lambeau Field.', 'entity': 'Aaron Rodgers'}
{'text': 'The Quarterback was featured in a sports segment on the Pat McAfee Show.', 'entity': 'Aaron Rodgers'}
{'text': 'Green Bay QB hosted a charity event at ESPN.', 'entity': 'Aaron Rodgers'}
{'text': 'Rodgers led a fourth-quarter comeback in a quarterback camp.', 'entity': 'Aaron Rodgers'}
{'text': 'The Quarterback hosted a charity event at NFL Honors.', 'entity': 'Aaron Rodgers'}
{'text': 'A. Rodgers was featured in a sports segment on the Pat McAfee Show.', 'entity': 'Aaron Rodgers'}
{'text': 'AR12 gave an interview to NBC Sports.', 'entity': 'Aaron Rodgers'}
{'text': 'Aaron Rodgers gave an interview to ESPN.', 'entity': 'Aaron Rodgers'}
{'text': 'AR12 signed a new contract with ESPN.', 'entity': 'Aaron Rodgers'}
{'text': 'Mr. Rodgers signed a new contract with ESPN.', 'enti

Text Splitter

In [144]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(chunk_size=100, chunk_overlap=20)
chunks = text_splitter.split_documents(document)

Embedding and store in FAISS

In [145]:
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS

In [146]:
embed_model = HuggingFaceEmbeddings(model_name='sentence-transformers/all-mpnet-base-v2')  # Use a valid model identifier
vector_db = FAISS.from_documents(chunks, embed_model)

Create Retrieval

In [147]:
from langchain.chains import RetrievalQA
from langchain.chat_models import ChatOllama


In [148]:
retriever = vector_db.as_retriever()
llm = ChatOllama(model="llama3")



In [149]:
qa = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type='stuff',
    retriever=retriever,
    return_source_documents=True
)

In [150]:
query = "What is the name of the person who played football in Green Bay?"
print(qa(query)['result'])

Based on the provided context, it seems that Aaron Rodgers is the Green Bay QB who was featured in various sports segments and was traded to a new team during NFL Honors.


In [151]:
query = "Who is coducting illict trafficking of weaposns?"
print(qa(query)['result'])

Based on the given context, it appears that Black Market Liaison and Shadow Broker are both involved in illicit trafficking of weapons. Specifically:

* Black Market Liaison used a front company to ship cargo through the Tri-Border Area, suggesting they may be conducting arms trafficking.
* Shadow Broker was connected to arms trafficking routes near a remote airstrip in Sudan.

It seems that both Black Market Liaison and Shadow Broker are involved in illegal weapon trading activities.


In [152]:
query = "Jane Doe has released multiple surprise albums during the pandemic, often writing and producing them herself. She avoids public political statements but once criticized a senator. Can you tell me who she might actually be?"
print(qa(query)['result'])

Based on the context provided, it's possible that Jane Doe is actually Taylor Swift. Taylor Swift was featured in a documentary about Spotify, which matches one of the given pieces of context. Additionally, Taylor Swift has released surprise albums during the pandemic and has been known to write and produce her own music. She also tends to avoid public political statements but has occasionally spoken out against certain politicians or policies.


In [153]:
query = "Does this database have Steve Jobs?"
print(qa(query)['result'])

Yes, based on the provided context, it appears that Steve Jobs is mentioned several times as being featured or appearing on covers related to Apple events and products, such as Macworld, iPhone launch, WWDC, and Apple Event.


In [154]:
query = "Based on similar activities, focus on job: Jane Doe has released multiple surprise albums during the pandemic, often writing and producing them herself. She avoids public political statements but once criticized a senator. Can you tell me who she might actually be?"
print(qa(query)['result'])

A fun challenge!

Given Jane Doe's activities of releasing surprise albums, writing and producing them herself, and avoiding public political statements with one notable exception (criticizing a senator), I can try to make an educated guess.

Considering the context provided earlier, it seems that some celebrities have been involved in cybersecurity-related incidents or have taken stands on certain issues. With Jane Doe's behavior mirroring Taylor Swift's in terms of surprise album releases and self-production, I'm going to take a wild guess and say that Jane Doe might actually be Taylor Swift!



Going to add a political spectrum; to simulate from dicatorship to dictatorship resistance