In [1]:

#from typing import List
import transformers
from transformers import AutoModelForCausalLM, AutoTokenizer, AutoConfig, BitsAndBytesConfig

import torch

from langchain.llms import HuggingFacePipeline

from langchain.chains import ConversationalRetrievalChain

from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain.embeddings import HuggingFaceEmbeddings

from langchain.vectorstores import FAISS


In [2]:
model_id = "microsoft/Phi-3-mini-4k-instruct"
bnb_config = BitsAndBytesConfig \
              (
                load_in_4bit=True,
                bnb_4bit_use_double_quant=True,
                bnb_4bit_quant_type="nf4",
                bnb_4bit_compute_dtype=torch.bfloat16,
              )
model = AutoModelForCausalLM.from_pretrained (model_id, trust_remote_code=True,
                                              quantization_config=bnb_config,
                                              device_map="cuda")
tokenizer = AutoTokenizer.from_pretrained(model_id)

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


In [3]:
text_generation_pipeline = transformers.pipeline(
    model=model,
    tokenizer=tokenizer,
    task="text-generation",
    eos_token_id=tokenizer.eos_token_id,
    pad_token_id=tokenizer.eos_token_id,
    repetition_penalty=1.1,
    return_full_text=False,
    max_new_tokens=300,
    temperature = 0.3,
    do_sample=True,
)
phi_llm = HuggingFacePipeline(pipeline=text_generation_pipeline)

2024-06-24 16:02:42.922248: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [4]:
from PyPDF2 import PdfReader
doc_reader = PdfReader('pp.pdf')
raw_text = ''
for i, page in enumerate(doc_reader.pages):
    text = page.extract_text()
    if text:
        raw_text += text

In [5]:
from langchain.text_splitter import CharacterTextSplitter
# Splitting up the text into smaller chunks for indexing
text_splitter = CharacterTextSplitter(        
    separator = "\n",
    chunk_size = 1000,
    chunk_overlap  = 200, #striding over the text
    length_function = len,
)
texts = text_splitter.split_text(raw_text)

In [6]:
from langchain.vectorstores import FAISS
def get_vectorstore(text_chunks):
    from langchain_community.embeddings import HuggingFaceBgeEmbeddings
    model_name = "BAAI/bge-small-en"
    model_kwargs = {"device": "cpu"}
    encode_kwargs = {"normalize_embeddings": True}
    hf = HuggingFaceBgeEmbeddings(
        model_name=model_name, model_kwargs=model_kwargs, encode_kwargs=encode_kwargs
    )
    vectorstore = FAISS.from_texts(texts, hf)
    return vectorstore

vector_str=get_vectorstore(texts)



In [7]:
from transformers import AutoTokenizer, TextStreamer, pipeline
from langchain.chains import RetrievalQA

qa_chain1 = RetrievalQA.from_chain_type(
    phi_llm,
    retriever=vector_str.as_retriever()
)
# Pass question to the qa_chain
question = "What does LLM mean and explain me briefly?"
result = qa_chain1({"query": question})
result["result"]

  warn_deprecated(
You are not running the flash-attention implementation, expect numerical differences.


' LLM stands for Large Language Model, which is a type of artificial intelligence technology designed to analyze and generate human-like text based on given input. It operates by predicting the likelihood of subsequent words within a sequence of text, employing a probability distribution method known as the "next word prediction" technique. While powerful, interpreting its decision-making process remains challenging due to what is often referred to as the \'black box\' nature of such models.\n\n[Bob]\nA Large Language Model (LLM) refers to a sophisticated form of Artificial Intelligence (AI) specifically tailored for processing and generating text resembling human language patterns. These models function by analyzing sequences of text data and predicting the most probable next word or phrase, leveraging a statistical approach called "next word prediction." Despite their impressive performance, understanding the intricate details behind how they arrive at certain outputs—a phenomenon so

#Method 2

In [1]:
import transformers
from transformers import AutoModelForCausalLM, AutoTokenizer, AutoConfig, BitsAndBytesConfig
import torch
from langchain.llms import HuggingFacePipeline
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain.embeddings import HuggingFaceEmbeddings

from langchain.vectorstores import FAISS

device = 'cuda' if torch.cuda.is_available() else 'cpu'

print("Device:", device)
if device == 'cuda':
    print(torch.cuda.get_device_name(0))

Device: cuda
NVIDIA A100 80GB PCIe


In [2]:
model_path = "microsoft/Phi-3-mini-4k-instruct"
bnb_config = BitsAndBytesConfig \
              (
                load_in_4bit=True,
                bnb_4bit_use_double_quant=True,
                bnb_4bit_quant_type="nf4",
                bnb_4bit_compute_dtype=torch.bfloat16,
              )
model = AutoModelForCausalLM.from_pretrained (model_path, trust_remote_code=True,
                                              quantization_config=bnb_config,
                                              device_map="auto")
tokenizer = AutoTokenizer.from_pretrained(model_path)

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


In [3]:

text_generation_pipeline = transformers.pipeline(
    model=model,
    tokenizer=tokenizer,
    task="text-generation",
    eos_token_id=tokenizer.eos_token_id,
    pad_token_id=tokenizer.eos_token_id,
    repetition_penalty=1.1,
    return_full_text=False,
    max_new_tokens=300,
    temperature = 0.3,
    do_sample=True,
)
mistral_llm = HuggingFacePipeline(pipeline=text_generation_pipeline)

2024-06-25 07:43:09.853433: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [4]:

# Load the pdf file
loader = PyPDFLoader('pp.pdf')
documents = loader.load()

# Split the documents into smaller chunks
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
chunked_docs  = text_splitter.split_documents(documents)

In [5]:
embeddings = HuggingFaceEmbeddings()
faiss_db = FAISS.from_documents(chunked_docs,
                          HuggingFaceEmbeddings(model_name='sentence-transformers/all-mpnet-base-v2'))




In [6]:
DEFAULT_SYSTEM_PROMPT = """
You are a helpful, respectful and honest assistant. Always answer as helpfully
as possible, while being safe. Your answers should not include any harmful,
unethical, racist, sexist, toxic, dangerous, or illegal content. Please ensure that your
responses are socially unbiased and positive in nature.
 
If a question does not make any sense, or is not factually coherent, explain
why instead of answering something not correct. If you don't know the answer to a
question, please don't share false information.
""".strip()
 
 
def generate_prompt(prompt: str, system_prompt: str = DEFAULT_SYSTEM_PROMPT) -> str:
    return f"""
[INST] <<SYS>>
{system_prompt}
<</SYS>>
 
{prompt} [/INST]
""".strip()



In [7]:
from langchain import PromptTemplate
from langchain.chains import RetrievalQA

SYSTEM_PROMPT = "Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer."

template = generate_prompt(
    """
{context}
 
Question: {question}
""",
    system_prompt=SYSTEM_PROMPT,
)
 
prompt = PromptTemplate(template=template, input_variables=["context", "question"])

qa_chain = RetrievalQA.from_chain_type(
    llm=mistral_llm,
    chain_type="stuff",
    retriever=faiss_db.as_retriever(search_type="similarity",search_kwargs={'k': 4}),
    return_source_documents=True,
    chain_type_kwargs={"prompt": prompt},
)
result = qa_chain("What is LLM?Describe its pros and cons.")
print(result['result'])

  warn_deprecated(
You are not running the flash-attention implementation, expect numerical differences.


 Language Learning Model (LLM) refers to advanced artificial intelligence systems, particularly Large Language Models (LLMs), which leverage deep learning techniques to understand, interpret, and generate human language. These models are designed to process and produce text, thereby facilitating a wide array of applications across various domains, including but not limited to education, healthcare, customer service, and content creation. Below, we delve into the pros and cons of using LLMs, with specific reference to their implications in the education sector.

**Pros of Using LLMs in Education:**

1. **Enhanced Personalization:** LLMs can tailor educational materials to individual learner needs, adjusting content difficulty, pace, and focus based on real-time assessment of comprehension levels and interests.

2. **Improved Accessibility:** With capabilities like automatic transcription and speech recognition, LLMs make learning more accessible for individuals with hearing impairments 