In [1]:

import os
from dotenv import load_dotenv, find_dotenv
_ = load_dotenv(find_dotenv())
openai_api_key = os.environ["OPENAI_API_KEY"]

In [None]:
from langchain_openai import ChatOpenAI
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_core.vectorstores import VectorStore
from langchain_core.document_loaders import DocumentLoader
from langchain_core.retrievers import Retriever
from langchain_core.chains import RetrievalQA

In [None]:
from langchain_openai import ChatOpenAI
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_core.vectorstores import VectorStore
from langchain.embeddings import OpenAIEmbeddings
from langchain.document_loaders import PyPDFLoader
# from langchain.retrievers import Retriever
# from langchain_core.chains import RetrievalQA

In [11]:
loaders = PyPDFLoader("data/National AI Policy Consultation Draft V1.pdf")

pages = loaders.load()

In [12]:
len(pages)

41

In [16]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000,
    chunk_overlap=100
)

In [17]:
splitted_text_chunks = text_splitter.split_documents(pages)

In [19]:
len(splitted_text_chunks)

150

In [21]:
# embeddings
embeddings = OpenAIEmbeddings()

In [None]:
from langchain.vectorstores import FAISS

vector_db = FAISS.from_documents(splitted_text_chunks, embeddings)

In [27]:
vector_db

<langchain_community.vectorstores.faiss.FAISS at 0x2297aac85d0>

In [28]:
retriever = vector_db.as_retriever()

In [29]:
response = retriever.invoke("What is the national AI policy?")

In [30]:
response

[Document(metadata={'source': 'data/National AI Policy Consultation Draft V1.pdf', 'page': 5}, page_content='of AI in the country . For that, the policy document is put in place to reap long -term and sustainable benefits \nfor its people.  \nThe policy document offers a wide range of developmental initiatives necessary for awareness and \nadoption, reimagining the transparent and fair use of personal data using AI and stimulating innovation \nthrough industry -academia collaborations and investments in AI-led initiatives. The National AI Policy is \ncrafted to focus on the equitable distribution o f opportunity and its responsible use , having the following \ndefining attributes.  \n• Evidence -Based and Target Oriented  \n• User -Centric and Forward -Looking  \n• Objective and Overarching  \nThe AI policy further aims to augment AI and allied technologies through balanced demand and supply -\nside interventions , as briefly described below.   \n• Market Enablement - Establishment  of

In [31]:
len(response)

4

In [47]:
# But i just want only 1 output without metadata only page content data in response. How can i do that?

retriever = vector_db.as_retriever(search_kwargs={"top_k": 1})

In [48]:
response = retriever.invoke("What is the national AI policy?")

In [52]:
response[0].page_content

'of AI in the country . For that, the policy document is put in place to reap long -term and sustainable benefits \nfor its people.  \nThe policy document offers a wide range of developmental initiatives necessary for awareness and \nadoption, reimagining the transparent and fair use of personal data using AI and stimulating innovation \nthrough industry -academia collaborations and investments in AI-led initiatives. The National AI Policy is \ncrafted to focus on the equitable distribution o f opportunity and its responsible use , having the following \ndefining attributes.  \n• Evidence -Based and Target Oriented  \n• User -Centric and Forward -Looking  \n• Objective and Overarching  \nThe AI policy further aims to augment AI and allied technologies through balanced demand and supply -\nside interventions , as briefly described below.   \n• Market Enablement - Establishment  of research & innovation cent ers in AI for developing, test -'

**Using Simple LCEL**

In [None]:
from langchain_openai import ChatOpenAI
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough

In [61]:
template = """Answer the question based on the following context:

{context}

Question {question}
"""

# output_parser = StrOutputParser()

prompt = ChatPromptTemplate.from_template(template)

model = ChatOpenAI()

In [62]:
def format_docs(docs):
    return "\n\n".join([d.page_content for d in docs])

In [None]:
chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | prompt
    | model
    | StrOutputParser()
)

In [65]:
response = chain.invoke("what is the national AI policy?")

In [66]:
response

'The national AI policy is a comprehensive document put in place by the government of Pakistan to create awareness, promote the responsible use of AI, stimulate innovation through collaborations, and address challenges and risks associated with AI adoption in the country. It focuses on investment in research and development infrastructure, ethical and responsible use of AI, job displacement concerns, and collaboration with other countries to share best practices and expertise in AI.'

In [71]:
from operator import itemgetter
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain

In [72]:
# adding history to it
history = []

In [78]:
template = """Answer the question based on the following context:

Question {question}

conversation_history: {chat_history}
"""

prompt = ChatPromptTemplate.from_template(template)
question_fetcher = itemgetter("question")
chat_history_fetcher = itemgetter("chat_history")
model = ChatOpenAI()
setup = {"question": question_fetcher, "chat_history": chat_history_fetcher}
chain = setup | prompt | model | StrOutputParser()
query = "tell me about Karachi"
response = chain.invoke(
    {"question": query, "chat_history": "\n".join(history)})
print(response)
query = "user_question:"+query
response = "ai_response:"+response
history.append((query, response))

Karachi is the largest city in Pakistan and serves as its financial and economic hub. It is located on the coast of the Arabian Sea and is known for its diverse population, vibrant culture, and bustling markets. Karachi is home to various industries, including textiles, manufacturing, and finance. The city also has a rich history, with landmarks such as the Quaid-e-Azam's Mausoleum, Karachi's Port Grand, and the Karachi Safari Park. Overall, Karachi is a dynamic and lively city with a lot to offer to residents and visitors alike.


In [79]:
history

['user_question:tell me about Karachi',
 ('user_question:tell me about Karachi',
  "ai_response:Karachi is the largest city in Pakistan and serves as its financial and economic hub. It is located on the coast of the Arabian Sea and is known for its diverse population, vibrant culture, and bustling markets. Karachi is home to various industries, including textiles, manufacturing, and finance. The city also has a rich history, with landmarks such as the Quaid-e-Azam's Mausoleum, Karachi's Port Grand, and the Karachi Safari Park. Overall, Karachi is a dynamic and lively city with a lot to offer to residents and visitors alike.")]

In [None]:
# Initialize a new Git repository if you haven't already
git init

# Add all the file
s to the staging area
git add .

# Commit the changes with a message
git commit -m "Initial commit"

# Add the remote repository URL (replace <REMOTE_URL> with your actual repository URL)
git remote add origin <REMOTE_URL>

# Push the changes to the remote repository
git push -u origin master

In [None]:
```markdown
# Solution to Push Rejected Due to Repository Rule Violations

When you encounter the error `push declined due to repository rule violations`, it typically means that the repository has some rules or protections in place that prevent certain types of changes from being pushed directly to the `master` branch. Here are some steps to resolve this issue:

1. **Check Branch Protection Rules**:
    - Go to the repository on GitHub.
    - Navigate to `Settings` > `Branches`.
    - Check if there are any branch protection rules set for the `master` branch. These rules might require pull requests, code reviews, or status checks before changes can be merged.

2. **Create a New Branch**:
    - Instead of pushing directly to the `master` branch, create a new branch for your changes.
    - Use the following commands to create a new branch and push your changes:
      ```bash
      git checkout -b new-branch-name
      git add .
      git commit -m "Your commit message"
      git push origin new-branch-name
      ```

3. **Open a Pull Request**:
    - After pushing your changes to the new branch, go to the repository on GitHub.
    - Open a pull request from your new branch to the `master` branch.
    - Follow any required steps such as code reviews or status checks to get your pull request merged.

By following these steps, you can comply with the repository rules and successfully contribute your changes.
```

In [None]:
```markdown
# Project Title

This repository contains a series of Jupyter Notebooks demonstrating various aspects of working with Language Models and Retrieval-Augmented Generation (RAG) using LangChain and OpenAI.

## Notebooks

1. **01_talk_with_llm.ipynb**: Introduction to interacting with Language Models.
2. **02_Talk_with_llm_part2.ipynb**: Continuation of interacting with Language Models.
3. **03_Data_Loders_RAG.ipynb**: Data loading techniques for RAG.
4. **04_RAG_components.ipynb**: Components involved in RAG.
5. **05_Basic_RAG_implementation.ipynb**: Basic implementation of RAG.
6. **06_LCEL.ipynb**: Using Simple LCEL (LangChain Execution Language).

## Data

- The `data` directory contains the necessary data files for the notebooks.

## Requirements

To install the required dependencies, run:

```bash
pip install -r requirements.txt
```

## Author

- **Haseebullah Kehar**

## License

This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details.
```