In [1]:
import dotenv
from langchain.prompts import PromptTemplate
from langchain.chains import LLMChain
from langchain import HuggingFaceHub
import os

In [2]:
# Load environment variables
dotenv.load_dotenv()

True

In [3]:
# Fetch the API key from environment variables
llm_key = os.getenv("llm_key")

In [4]:
# Ensure the API key is present
if not llm_key:
    raise ValueError("Missing Hugging Face API Key in environment variables.")

In [12]:
# Initialize the Hugging Face Hub LLM
llm = HuggingFaceHub(
    huggingfacehub_api_token=llm_key,
    repo_id="google/flan-t5-large",
    model_kwargs={"temperature": 0.7, "max_length": 200}
)

In [13]:
prompt1 = PromptTemplate(
    input_variables=['word'],
    template="What is the antonym of {word} in english?"
)

In [14]:
# Create the LLM chain
chain1 = LLMChain(llm=llm, prompt=prompt1, output_key="antonym")

In [15]:
# Run the chain
result = chain1.run("happy")
print("Antonym:", result)

Antonym: sad


In [16]:
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.document_loaders import PyPDFLoader, DirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter

In [17]:
def load_pdf(data):
    loader = DirectoryLoader(data,
                    glob="*.pdf",
                    loader_cls=PyPDFLoader)
    
    documents = loader.load()

    return documents

In [18]:
extracted_data = load_pdf("pdfs")

In [20]:
#Create text chunks
def text_split(extracted_data):
    text_splitter = RecursiveCharacterTextSplitter(chunk_size = 500, chunk_overlap = 20)
    text_chunks = text_splitter.split_documents(extracted_data)

    return text_chunks

In [21]:
splits = text_split(extracted_data)

In [24]:
print(splits[0])

page_content='PROFESSIONAL ETHICS (HUL-304) 
ASSIGNMENT 
 
 
 
Name: Parth Singh 
Branch: ECE-IOT 
Enrollment Number: BT22ECI026 
 
 
 
Submitted to- Dr. Vikrant Dhenge' metadata={'source': 'pdfs\\BT22ECI026(PARTH SINGH)(PE ASSIGNMENT).pdf', 'page': 0}


In [25]:
#download embedding model
def download_hugging_face_embeddings():
    embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
    return embeddings

In [27]:
embeddings = download_hugging_face_embeddings()

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to see activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development


In [28]:
query_result = embeddings.embed_query("Hello world")
print("Length", len(query_result))

  attn_output = torch.nn.functional.scaled_dot_product_attention(


Length 384


In [29]:
from langchain.vectorstores import Chroma


In [30]:
persist_directory = 'db'

In [32]:
vectordb = Chroma.from_documents(documents=splits,embedding=embeddings,persist_directory=persist_directory)

In [33]:
vectordb.persist()

  vectordb.persist()


In [34]:
vectordb = None

In [36]:
vectordb = Chroma(persist_directory=persist_directory,embedding_function=embeddings)

  vectordb = Chroma(persist_directory=persist_directory,embedding_function=embeddings)


In [37]:
vectordb

<langchain_community.vectorstores.chroma.Chroma at 0x21017868fd0>

In [52]:
retriever = vectordb.as_retriever(search_kwargs={"k": 2})

In [53]:
docs = retriever.get_relevant_documents("corporate")

In [54]:
print(docs)

[Document(metadata={'page': 4, 'source': 'pdfs\\BT22ECI026(PARTH SINGH)(PE ASSIGNMENT).pdf'}, page_content='Case:    A university laboratory discovers a new method for cleaning up oil \nspills with minimal environmental damage. However, commercializing the \ntechnology requires partnerships with oil companies, raising concerns about \npotential exploitation.   \n \n   Questions and Answers:      \n1.    How should research teams navigate ethical concerns when collaborating \nwith industry partners?'), Document(metadata={'page': 1, 'source': 'pdfs\\BT22ECI026(PARTH SINGH)(PE ASSIGNMENT).pdf'}, page_content='Answer:  The company should involve local communities in the planning \nprocess, offer fair compensation, provide alternate livelihoods, and ensure the \npreservation of cultural practices.   \n \n2.  What ethical responsibilities do companies have toward affected \ncommunities during such projects?    \n    Answer:  Companies must prioritize community engagement, ensure \ntransparen

In [55]:
from langchain.chains import RetrievalQA

In [56]:
qa_chain = RetrievalQA.from_chain_type(llm=llm,
                                  chain_type="stuff",
                                  retriever=retriever,
                                  return_source_documents=True)

In [57]:
def process_llm_response(llm_response):
    print(llm_response['result'])
    print('\n\nSources:')
    for source in llm_response["source_documents"]:
        print(source.metadata['source'])

In [67]:
query = "what are some professinal ethics"

In [68]:
llm_response = qa_chain(query)

In [69]:
llm_response

{'query': 'what are some professinal ethics',
 'result': 'Companies should adopt flexible licensing models, collaborate with governments and NGOs, and prioritize global health over exclusive profits.',
 'source_documents': [Document(metadata={'page': 0, 'source': 'pdfs\\BT22ECI026(PARTH SINGH)(PE ASSIGNMENT).pdf'}, page_content='PROFESSIONAL ETHICS (HUL-304) \nASSIGNMENT \n \n \n \nName: Parth Singh \nBranch: ECE-IOT \nEnrollment Number: BT22ECI026 \n \n \n \nSubmitted to- Dr. Vikrant Dhenge'),
  Document(metadata={'page': 7, 'source': 'pdfs\\BT22ECI026(PARTH SINGH)(PE ASSIGNMENT).pdf'}, page_content='Answer: Companies should adopt flexible licensing models, collaborate \nwith governments and NGOs, and prioritize global health over exclusive profits.   \n \n2.       What ethical responsibilities do corporations have in global healthcare \ncrises?         \n         Answer: Corporations must act with urgency, ensure equitable \ndistribution, and balance profitability with moral obligati

In [70]:
process_llm_response(llm_response)

Companies should adopt flexible licensing models, collaborate with governments and NGOs, and prioritize global health over exclusive profits.


Sources:
pdfs\BT22ECI026(PARTH SINGH)(PE ASSIGNMENT).pdf
pdfs\BT22ECI026(PARTH SINGH)(PE ASSIGNMENT).pdf
