# ***LLM Model based on OpenAI***

## Defining API key

Importing OpenAI key from a python file

In [1]:
from secret_keys import openai_key

In [2]:
import os
os.environ["OPENAI_API_KEY"] = openai_key

## Creating LLM model object

Defining model name

In [3]:
model_name = "gpt-3.5-turbo"

Importing libraries from LangChain

In [4]:
from langchain_openai import ChatOpenAI
from langchain.schema import HumanMessage, SystemMessage

Making an object of the LLM model

In [5]:
llm = ChatOpenAI(model_name=model_name, temperature=0)

## Asking questions to LLM Model

In [6]:
messages = [
    [
        SystemMessage(content="You are a helpful assistant that give good answers to specific questions.")
    ],
    [
        HumanMessage(content="Who is Shoaib Sikander?")
    ],
]

Producing an answer

In [7]:
output=llm.generate(messages)
#print(output)

Extracting response from the answer

In [8]:
#print question and answer
print('QUESTION: ' + messages[1][0].content)
print('ANSWER: ' + output.generations[1][0].message.content + '\n')

QUESTION: Who is Shoaib Sikander?
ANSWER: Shoaib Akhtar, also known as the "Rawalpindi Express," is a former Pakistani cricketer who is considered one of the fastest bowlers in the history of the game. He was known for his raw pace and aggressive bowling style.



Printing token usage and model name

In [9]:
print(output.llm_output)

{'token_usage': {'completion_tokens': 61, 'prompt_tokens': 35, 'total_tokens': 96}, 'model_name': 'gpt-3.5-turbo', 'system_fingerprint': 'fp_c2295e73ad'}


## Updating model's knowledge base with our own data

#### Importing libraries

Importing libraries for loading a PDF file

In [10]:
from langchain_community.document_loaders import UnstructuredFileLoader
from langchain_community.document_loaders import UnstructuredPDFLoader

#### Loading PDF file containing knowledge and preparing pre-processing

Loading PDF file

In [11]:
loader = UnstructuredPDFLoader('File.pdf')
documents = loader.load()

Splitting the text loaded from document

In [12]:
SIZE=1000

from langchain.text_splitter import CharacterTextSplitter
text_splitter = CharacterTextSplitter(chunk_size=SIZE, chunk_overlap=0)
texts = text_splitter.split_documents(documents)
print(texts)
#print(len(texts))

[Document(page_content='Muhammad Shoaib Sikander is a 32-year-old man. He belongs to Pakistan and currently living in Germany. He completed his bachelor’s in electrical engineering from University of The Punjab in Lahore, Pakistan and Masters in Control, Microsystem, Microelectronics from University of Bremen, Germany. Currently he is working as a Software Engineer for AI Solutions in LS telcom AG, Germany.', metadata={'source': 'File.pdf'})]


#### Embeddings

Loading the embeddings

In [13]:
#from langchain.embeddings import OpenAIEmbeddings
from langchain_openai import OpenAIEmbeddings

In [14]:
embeddings = OpenAIEmbeddings()

#### VectorDB

Saving into Vector Database

In [15]:
from langchain_community.vectorstores import Chroma

In [16]:
db = Chroma.from_documents(texts, embeddings)

#### Updating the model

Updating the model's knowledge base with new data

In [17]:
from langchain.chains import VectorDBQA
#from langchain.chains import RetrievalQA
from langchain_community.chat_models import ChatOpenAI

In [18]:
llm_updated = VectorDBQA.from_chain_type(llm=llm, chain_type="stuff", vectorstore=db, k=1)
#llm_updated = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", vectorstore=db, k=1)



## Asking same question to the updated LLM model

In [19]:
question = "Who is Shoaib Sikander?"

output = llm_updated.invoke(question)

print('QUESTION: ' + output.get('query'))
print('ANSWER: ' + output.get('result'))

QUESTION: Who is Shoaib Sikander?
ANSWER: Shoaib Sikander is a 32-year-old man from Pakistan who is currently living in Germany. He has a background in electrical engineering and works as a Software Engineer for AI Solutions in LS telcom AG, Germany.
