In [1]:
MODEL = "llama2"
print(MODEL)

llama2


In [2]:
from langchain_community.llms import Ollama
from langchain_community.embeddings import OllamaEmbeddings

model = Ollama(model=MODEL)
embeddings = OllamaEmbeddings(model=MODEL)

model.invoke("Tell me a joke")

"Sure! Here's one:\n\nWhy don't scientists trust atoms?\nBecause they make up everything!\n\nI hope that brought a smile to your face!"

In [3]:
from langchain_core.output_parsers import StrOutputParser

parser = StrOutputParser()

chain = model | parser 
#chain.invoke("Tell me a joke")

In [4]:
from langchain.prompts import PromptTemplate

template = """
Answer the question based on the context below. If you can't 
answer the question, reply "I don't know".

Context: {context}

Question: {question}
"""

prompt = PromptTemplate.from_template(template)
prompt.format(context="Here is some context", question="Here is a question")


'\nAnswer the question based on the context below. If you can\'t \nanswer the question, reply "I don\'t know".\n\nContext: Here is some context\n\nQuestion: Here is a question\n'

In [5]:
chain = prompt | model | parser

#chain.invoke({"context": "My parents named me Santiago", "question": "What's your name'?"})

In [6]:
from langchain_community.document_loaders import PyPDFLoader

loader = PyPDFLoader("iitg_dataset.pdf")
pages = loader.load_and_split()
pages

[Document(page_content="Established in 1994, the Indian Institute of Technology Guwahati (IIT Guwahati or IITG) stands as one of \nIndia's premier engineering institutions. Nestled along the picturesque northern banks of the Brahmaputra\nRiver in Guwahati, Assam, the institute offers a wide array of undergraduate, postgraduate, and doctoral \nprograms across engineering, science, and humanities disciplines. Its sprawling 700-acre campus boasts \nstate-of-the-art infrastructure, including academic buildings, research centers, hostels, sports facilities, \nand student activity centers, providing an enriching environment for learning and innovation.  \n  \nBeyond academics, IIT Guwahati fosters a vibrant student life with numerous clubs, societies, and cultural\nevents that cater to a diverse range of interests, from music and dance to entrepreneurship initiatives. \nThe institute's emphasis on research and innovation has led to significant contributions in fields such as \nnanotechnology

In [8]:
from langchain_community.vectorstores import DocArrayInMemorySearch
    
vectorstore = DocArrayInMemorySearch.from_documents(pages, embedding=embeddings)

In [9]:
retriever = vectorstore.as_retriever()
retriever.invoke("lohit")

[Document(page_content="Established in 1994, the Indian Institute of Technology Guwahati (IIT Guwahati or IITG) stands as one of \nIndia's premier engineering institutions. Nestled along the picturesque northern banks of the Brahmaputra\nRiver in Guwahati, Assam, the institute offers a wide array of undergraduate, postgraduate, and doctoral \nprograms across engineering, science, and humanities disciplines. Its sprawling 700-acre campus boasts \nstate-of-the-art infrastructure, including academic buildings, research centers, hostels, sports facilities, \nand student activity centers, providing an enriching environment for learning and innovation.  \n  \nBeyond academics, IIT Guwahati fosters a vibrant student life with numerous clubs, societies, and cultural\nevents that cater to a diverse range of interests, from music and dance to entrepreneurship initiatives. \nThe institute's emphasis on research and innovation has led to significant contributions in fields such as \nnanotechnology

In [10]:
from operator import itemgetter

chain = (
    {
        "context": itemgetter("question") | retriever,
        "question": itemgetter("question"),
    }
    | prompt
    | model
    | parser
)

In [12]:
questions = [
    "What is Lohit?"
]

for question in questions:
    print(f"Question: {question}")
    print(f"Answer: {chain.invoke({'question': question})}")
    print()

Question: What is Lohit?
Answer: Based on the provided context, Lohit appears to be a hostel at the Indian Institute of Technology Guwahati (IIT Guwahati or IITG).



In [13]:
from langchain_community.document_loaders import JSONLoader

In [14]:
import json
from pathlib import Path
from pprint import pprint


file_path='/home/raone/LLM Project/NER/iitg_annotations.json'
data = json.loads(Path(file_path).read_text())

In [15]:
pprint(data)

[['Established in 1994, the Indian Institute of Technology Guwahati (IIT '
  "Guwahati or IITG) stands as one of India's premier engineering "
  'institutions. Nestled along the picturesque northern banks of the '
  'Brahmaputra River in Guwahati, Assam, the institute offers a wide array of '
  'undergraduate, postgraduate, and doctoral programs across engineering, '
  'science, and humanities disciplines. Its sprawling 700-acre campus boasts '
  'state-of-the-art infrastructure, including academic buildings, research '
  'centers, hostels, sports facilities, and student activity centers, '
  'providing an enriching environment for learning and innovation.\r',
  {'entities': [[25, 87, 'INSTITUTION'], [220, 235, 'PLACES']]}],
 ['\r', {'entities': []}],
 ['Beyond academics, IIT Guwahati fosters a vibrant student life with numerous '
  'clubs, societies, and cultural events that cater to a diverse range of '
  'interests, from music and dance to entrepreneurship initiatives. The '
  "inst

In [16]:
file_path = '/home/raone/LLM Project/NER/iitg_annotations.json'
pprint(Path(file_path).read_text())

('[["Established in 1994, the Indian Institute of Technology Guwahati (IIT '
 "Guwahati or IITG) stands as one of India's premier engineering institutions. "
 'Nestled along the picturesque northern banks of the Brahmaputra River in '
 'Guwahati, Assam, the institute offers a wide array of undergraduate, '
 'postgraduate, and doctoral programs across engineering, science, and '
 'humanities disciplines. Its sprawling 700-acre campus boasts '
 'state-of-the-art infrastructure, including academic buildings, research '
 'centers, hostels, sports facilities, and student activity centers, providing '
 'an enriching environment for learning and '
 'innovation.\\r",{"entities":[[25,87,"INSTITUTION"],[220,235,"PLACES"]]}],["\\r",{"entities":[]}],["Beyond '
 'academics, IIT Guwahati fosters a vibrant student life with numerous clubs, '
 'societies, and cultural events that cater to a diverse range of interests, '
 "from music and dance to entrepreneurship initiatives. The institute's "
 'emphas

In [18]:
loader = JSONLoader(file_path="/home/raone/LLM Project/NER/iitg_annotations.json", jq_schema=".", text_content=False)

documents = loader.load()


In [19]:
pprint(documents)

[Document(page_content='[["Established in 1994, the Indian Institute of Technology Guwahati (IIT Guwahati or IITG) stands as one of India\'s premier engineering institutions. Nestled along the picturesque northern banks of the Brahmaputra River in Guwahati, Assam, the institute offers a wide array of undergraduate, postgraduate, and doctoral programs across engineering, science, and humanities disciplines. Its sprawling 700-acre campus boasts state-of-the-art infrastructure, including academic buildings, research centers, hostels, sports facilities, and student activity centers, providing an enriching environment for learning and innovation.\\r", {\'entities\': [[25, 87, \'INSTITUTION\'], [220, 235, \'PLACES\']]}], [\'\\r\', {\'entities\': []}], ["Beyond academics, IIT Guwahati fosters a vibrant student life with numerous clubs, societies, and cultural events that cater to a diverse range of interests, from music and dance to entrepreneurship initiatives. The institute\'s emphasis on r

In [None]:
vectorstore = DocArrayInMemorySearch.from_documents(documents, embedding=embeddings)

In [None]:
vectorstore

<langchain_community.vectorstores.docarray.in_memory.DocArrayInMemorySearch at 0x7fe13087e0d0>

In [None]:
retriever = vectorstore.as_retriever()
retriever.invoke("marks")

[Document(page_content='{"Students": [{"Name": "John", "ID": "S001", "Marks": 85}, {"Name": "Emma", "ID": "S002", "Marks": 92}, {"Name": "Michael", "ID": "S003", "Marks": 78}, {"Name": "Sophia", "ID": "S004", "Marks": 94}, {"Name": "William", "ID": "S005", "Marks": 87}, {"Name": "Olivia", "ID": "S006", "Marks": 91}, {"Name": "James", "ID": "S007", "Marks": 80}, {"Name": "Amelia", "ID": "S008", "Marks": 89}, {"Name": "Benjamin", "ID": "S009", "Marks": 83}, {"Name": "Isabella", "ID": "S010", "Marks": 95}, {"Name": "Alexander", "ID": "S011", "Marks": 76}, {"Name": "Mia", "ID": "S012", "Marks": 90}, {"Name": "Daniel", "ID": "S013", "Marks": 88}, {"Name": "Elijah", "ID": "S014", "Marks": 82}, {"Name": "Charlotte", "ID": "S015", "Marks": 93}, {"Name": "Ava", "ID": "S016", "Marks": 84}, {"Name": "William", "ID": "S017", "Marks": 79}, {"Name": "Evelyn", "ID": "S018", "Marks": 96}, {"Name": "Matthew", "ID": "S019", "Marks": 75}, {"Name": "Harper", "ID": "S020", "Marks": 97}, {"Name": "Andrew", 

In [None]:
retriever

VectorStoreRetriever(tags=['DocArrayInMemorySearch'], vectorstore=<langchain_community.vectorstores.docarray.in_memory.DocArrayInMemorySearch object at 0x7fe13087e0d0>)

In [None]:
from operator import itemgetter

chain = (
    {
        "context": itemgetter("question") | retriever,
        "question": itemgetter("question"),
    }
    | prompt
    | model
    | parser
)

In [None]:
questions = [
    "how many marks Jack got?"
]

for question in questions:
    print(f"Question: {question}")
    print(f"Answer: {chain.invoke({'question': question})}")
    print()
   

Question: how many marks Jack got?
Answer:  Based on the provided JSON data, Jack has a mark of 98.

