In [1]:
# web based loader
from langchain_community.document_loaders import WebBaseLoader
import bs4
# load, chunk and index the content of webpages

loader = WebBaseLoader(web_path=("https://www.vulture.com/article/diddy-lawsuit-allegations-explainer.html",),bs_kwargs=dict(parse_only=bs4.SoupStrainer(
    class_=("clay-subheader","clay-paragraph")
)))

text_docs = loader.load()

USER_AGENT environment variable not set, consider setting it to identify your requests.


In [2]:
from langchain.text_splitter import RecursiveCharacterTextSplitter
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
documents = text_splitter.split_documents(text_docs)
documents[:5]

[Document(metadata={'source': 'https://www.vulture.com/article/diddy-lawsuit-allegations-explainer.html'}, page_content='Self-proclaimed “bad boy for life” Sean “Diddy” Combs is facing serious legal issues following numerous allegations of sexual assault, harassment, and sex trafficking throughout his career. On March 25, simultaneous bicoastal raids of his Los Angeles and Miami homes played out live on CNN while the hip-hop mogul was on his private jet amid what could turn out to be a very public reckoning. Then, on September 16, the indictment against Diddy was revealed, and he was charged on three counts, including sex trafficking by force. He pleaded not guilty before he was denied bail. He is currently being held in jail awaiting a trial. In addition to an investigation from Homeland Security and the federal government, he is also facing four federal lawsuits. A fifth, brought in late 2023 by his ex-girlfriend Cassandra “Cassie” Ventura, which included allegations of rape and phys

In [4]:
from langchain_community.embeddings import OllamaEmbeddings
from langchain_community.vectorstores import FAISS

db=FAISS.from_documents(documents[:30],OllamaEmbeddings(model="llama2-uncensored"))

In [5]:
query = "who is diddy"
retrieved_results= db.similarity_search(query)
print(retrieved_results[0].page_content)

Sean Combs’s 26-year-old son Christian “King” Combs is being sued for sexual assault, battery, and intentional infliction of emotional distress, among other allegations.The 31-page lawsuit brought by Grace O’Marcaigh claims that she was sexually assaulted while working as a yacht deckhand for the family during Christmas break in 2022 in the Caribbean. She is also asking that Sean “Diddy” Combs be held liable for his son’s actions for charting the yacht assuming responsibility for his guests throughout the Caribbean trip.O’Marcaigh alleges that there was “a constant rotation of suspected sex workers and other A-List celebrities such as French Montana and actor Cuba Gooding Jr.” on the yacht. O’Marcaigh alleges “sex workers were sprawled out unconscious about that yacht and it was difficult to distinguish which bottles of alcohol were laced with drugs and which bottles were not.” In the early morning of December 28, 2022, a “heavily intoxicated” Christian Combs physically assaulted her,


In [6]:
from langchain_community.llms import Ollama
## Load Ollama LAMA2 LLM model
llm=Ollama(model="llama2-uncensored")
llm

  llm=Ollama(model="llama2-uncensored")


Ollama(model='llama2-uncensored')

In [10]:
## Design ChatPrompt Template
from langchain_core.prompts import ChatPromptTemplate
prompt = ChatPromptTemplate.from_template("""
Answer the following question based only on the provided context. 
Think step by step before providing a detailed answer. 
I will tip you $1000 if the user finds the answer helpful. 
<context>
{context}
</context>
Question: {input}""")

In [11]:
## chain Introduction
from langchain.chains.combine_documents import create_stuff_documents_chain
document_chain = create_stuff_documents_chain(llm,prompt)

In [12]:
retriever = db.as_retriever()
retriever

VectorStoreRetriever(tags=['FAISS', 'OllamaEmbeddings'], vectorstore=<langchain_community.vectorstores.faiss.FAISS object at 0x0000012F251B7A90>, search_kwargs={})

In [13]:
from langchain.chains import create_retrieval_chain
retrevial_chain = create_retrieval_chain(retriever,document_chain)


In [14]:
response = retrevial_chain.invoke({"input":"who is diddy, and what did he do"})

In [15]:
response['answer']

'Diddy is Sean Combs\' stage name. He is an American rapper, singer, record executive, entrepreneur, and actor. In the 1990s, he rose to fame as a member of the hip-hop group Puff Daddy & The Family, which was later renamed "Diddy". After leaving his group in 2007, Combs has released several albums under his own name, including "Last Train to Paris" (2010), "M.A.D. City" (2011) and "No Way Out 2" (2013). He also founded Bad Boy Records in 1993, which has since become one of the most successful labels in hip-hop music history. Aside from his music career, Combs is a frequent media personality and has appeared on several reality television shows, including "Making the Band", "I Want to Work for Diddy" and "The Four: Battle for Stardom".'