In [42]:
import os
from dotenv import load_dotenv

load_dotenv()

OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
#MODEL = "gpt-3.5-turbo"
MODEL = "mixtral:8x7b"
MODEL = "llama2"

In [43]:
from langchain_community.llms import Ollama
from langchain_openai.chat_models import ChatOpenAI
from langchain_community.embeddings import OllamaEmbeddings
from langchain_openai.embeddings import OpenAIEmbeddings

if MODEL.startswith("gpt"):
    model = ChatOpenAI(openai_api_key=OPENAI_API_KEY, model=MODEL)
    embeddings = OpenAIEmbeddings()
else:
    model = Ollama(model=MODEL)
    embeddings = OllamaEmbeddings(model=MODEL)

model.invoke("Tell me a joke")

ConnectionError: HTTPConnectionPool(host='localhost', port=11434): Max retries exceeded with url: /api/generate (Caused by NewConnectionError('<urllib3.connection.HTTPConnection object at 0x00000204767A9A10>: Failed to establish a new connection: [WinError 10061] No connection could be made because the target machine actively refused it'))

In [32]:
from langchain_core.output_parsers import StrOutputParser

parser = StrOutputParser()

chain = model | parser 
chain.invoke("Tell me a joke")

"Why don't scientists trust atoms?\n\nBecause they make up everything!"

In [33]:
from langchain.prompts import PromptTemplate

template = """
Answer the question based on the context below. If you can't 
answer the question, reply "I don't know".

Context: {context}

Question: {question}
"""

prompt = PromptTemplate.from_template(template)
prompt.format(context="Here is some context", question="Here is a question")

'\nAnswer the question based on the context below. If you can\'t \nanswer the question, reply "I don\'t know".\n\nContext: Here is some context\n\nQuestion: Here is a question\n'

In [34]:
chain = prompt | model | parser

chain.invoke({"context": "My parents named me Santiago", "question": "What's your name'?"})

'My name is Santiago.'

In [35]:
from langchain_community.document_loaders import PyPDFLoader

loader = PyPDFLoader("D:\RAG_for_multiple_pdf\VisionTransformer.pdf")
pages = loader.load_and_split()
len(pages)

26

In [36]:
from langchain_community.vectorstores import DocArrayInMemorySearch

vectorstore = DocArrayInMemorySearch.from_documents(pages, embedding=embeddings)

In [37]:
retriever = vectorstore.as_retriever()
retriever.invoke("how could do fine tuning in vision transformer")

[Document(page_content='Published as a conference paper at ICLR 2021\ninherent to CNNs, such as translation equivariance and locality, and therefore do not generalize well\nwhen trained on insufﬁcient amounts of data.\nHowever, the picture changes if the models are trained on larger datasets (14M-300M images). We\nﬁnd that large scale training trumps inductive bias. Our Vision Transformer (ViT) attains excellent\nresults when pre-trained at sufﬁcient scale and transferred to tasks with fewer datapoints. When\npre-trained on the public ImageNet-21k dataset or the in-house JFT-300M dataset, ViT approaches\nor beats state of the art on multiple image recognition benchmarks. In particular, the best model\nreaches the accuracy of 88.55% on ImageNet, 90.72% on ImageNet-ReaL, 94.55% on CIFAR-100,\nand77.63% on the VTAB suite of 19 tasks.\n2 R ELATED WORK\nTransformers were proposed by Vaswani et al. (2017) for machine translation, and have since be-\ncome the state of the art method in many N

In [38]:
from operator import itemgetter

chain = (
    {
        "context": itemgetter("question") | retriever,
        "question": itemgetter("question"),
    }
    | prompt
    | model
    | parser
)

In [39]:
questions = [
    "what is positional embedding   "
    # "How many hours of live sessions?",
    # "How many coding assignments are there in the program?",
    # "Is there a program certificate upon completion?",
    # "What programming language will be used in the program?",
    # "How much does the program cost?",
]

for question in questions:
    print(f"Question: {question}")
    print(f"Answer: {chain.invoke({'question': question})}")
    print()

Question: what is positional embedding   
Answer: Positional embedding is a technique used to encode spatial information in the context of image recognition tasks. It helps the model understand the positional relationships between different patches or elements in an image.



In [40]:
chain.batch([{"question": q} for q in questions])

['Positional embedding is a technique used to encode spatial information in neural networks, particularly in the context of processing images or sequences. It helps the model understand the position or order of elements in the input data.']

In [41]:
for s in chain.stream({"question": "What is the vision transfromer"}):
    print(s, end="", flush=True)

The Vision Transformer (ViT) is a model that applies the Transformer architecture directly to sequences of image patches for image recognition tasks. It has been shown to perform well on image classification tasks when pre-trained on large datasets and transferred to multiple image recognition benchmarks.