In [1]:
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.embeddings import HuggingFaceInstructEmbeddings
from langchain.vectorstores import Chroma, FAISS
from langchain.text_splitter import CharacterTextSplitter
from langchain.llms import OpenAI
from langchain.chains import ConversationalRetrievalChain

import openai
import os
# openai.api_key = ""
os.environ['OPENAI_API_KEY'] = ""

In [2]:
from langchain.document_loaders import TextLoader
from langchain.document_loaders import DirectoryLoader

loader = DirectoryLoader('data/TextBooks', glob="./dsc250fall2023*", show_progress=True) ### many doc loaders
docs = loader.load()

text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
documents = text_splitter.split_documents(docs)

100%|██████████| 12/12 [01:16<00:00,  6.36s/it]
Created a chunk of size 2460, which is longer than the specified 1000
Created a chunk of size 1791, which is longer than the specified 1000
Created a chunk of size 1614, which is longer than the specified 1000
Created a chunk of size 1389, which is longer than the specified 1000
Created a chunk of size 1066, which is longer than the specified 1000
Created a chunk of size 1544, which is longer than the specified 1000
Created a chunk of size 1019, which is longer than the specified 1000
Created a chunk of size 1229, which is longer than the specified 1000


In [3]:
# embeddings = OpenAIEmbeddings() ### many embedding functions
embeddings = HuggingFaceInstructEmbeddings(query_instruction="Represent the query for retrieval: ", model_kwargs={'device':'cuda:0'}, encode_kwargs={'batch_size':32})


  from tqdm.autonotebook import trange


load INSTRUCTOR_Transformer
max_seq_length  512


In [4]:
embeddings.embed_documents()

HuggingFaceInstructEmbeddings(client=INSTRUCTOR(
  (0): Transformer({'max_seq_length': 512, 'do_lower_case': False}) with Transformer model: T5EncoderModel 
  (1): Pooling({'word_embedding_dimension': 768, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False})
  (2): Dense({'in_features': 1024, 'out_features': 768, 'bias': False, 'activation_function': 'torch.nn.modules.linear.Identity'})
  (3): Normalize()
), model_name='hkunlp/instructor-large', cache_folder=None, model_kwargs={'device': 'cuda:0'}, encode_kwargs={'batch_size': 32}, embed_instruction='Represent the document for retrieval: ', query_instruction='Represent the query for retrieval: ')

In [5]:
vectorstore = FAISS.from_documents(documents, embeddings) ### choose from FAISS, Chroma, 
vectorstore.save_local('data/TextBooks/vectorstore_DSC250.vs')
FAISS.load_local("data/TextBooks/vectorstore_DSC250.vs", embeddings=embeddings)

<langchain.vectorstores.faiss.FAISS at 0x7fb8cdfe6ee0>

In [6]:
documents[0]

Document(page_content='DSC250: Advanced Data Mining\n\nLanguage Models\n\nZhiting Hu Lecture 9, October 26, 2023\n\nLast lecture\n\nNeural language models:\n\n! Embedding: one-hot vectors -> embedding vectors\n\n! Neural networks\n\n2\n\nNeural Architectures of LMs\n\n3\n\nOutline\n\nRecurrent Networks (RNNs)\n\n! Long-range dependency, vanishing gradients ! LSTM ! RNNs in different forms\n\nAttention Mechanisms ! (Query, Key, Value) ! Attention on Text and Images\n\nTransformers: Multi-head Attention\n\n! Transformer ! BERT\n\n4\n\nOutline\n\nRecurrent Networks (RNNs)\n\n! Long-range dependency, vanishing gradients ! LSTM ! RNNs in different forms\n\nAttention Mechanisms ! (Query, Key, Value) ! Attention on Text and Images\n\nTransformers: Multi-head Attention\n\n! Transformer ! BERT\n\n5\n\nConvNets v.s. Recurrent Networks (RNNs)\n\nSpatial Modeling vs. Sequential Modeling ● Fixed vs. variable number of computation steps.\n\nℎ"\n\nℎ#\n\nℎ$\n\nℎ!\n\nℎ!\n\n=\n\n𝑥"\n\n𝑥#\n\n𝑥$\n\n𝑥!\n\n

In [5]:
query = "What is ML algorithm?"
docs = vectorstore.similarity_search(query)
print(docs[0].page_content)

An ML algorithm is a set of precise instructions telling the computer how to learn from data.

This is because real world data has “structure”.

An ML algorithm is a set of precise instructions telling the computer how to learn from data.

Spoiler: the algorithms are usually pretty simple. It’s the data that does the real work.

An ML algorithm is a set of precise instructions telling the computer how to learn from data.

Spoiler: the algorithms are usually pretty simple. It’s the data that does the real work.

This is because real world data has “structure”.

Problem: On a scale from 1-10, how happy is this person?

Recall: Least Squares Regression

▶ Example: predict the price of a laptop.

▶ Choose some features:

▶ CPU speed, amount of RAM, weight (kg).

▶ Prediction function (weighted “vote”):

(price) = 𝑤0 + 𝑤1 × (cpu) + 𝑤2 × (ram) + 𝑤3 × (weight)

▶ Learn 𝑤𝑖 by minimizing squared error.

Representations

▶ Computers don’t understand the concept of a

laptop.


In [6]:
from langchain.memory import ConversationBufferMemory
memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)

In [None]:
from langchain.llms import HuggingFacePipeline
import torch
llm = HuggingFacePipeline.from_model_id(
                model_id="lmsys/vicuna-7b-v1.5",
                task="text-generation",#"text2text-generation",#,
                model_kwargs={"temperature": 0.7, "max_length": 32, "torch_dtype": torch.float16},
                pipeline_kwargs={'max_new_tokens':32},
                device="cuda:0"
            )
qa = ConversationalRetrievalChain.from_llm(llm, vectorstore.as_retriever(), memory=memory, return_source_documents=True)

In [None]:
query = "What is ML algorithm?"
result = qa({"question": query})

In [None]:
result

In [None]:
from model.llm_langchain_tutor import LLMLangChainTutor

In [None]:
lmtutor = LLMLangChainTutor(openai_key="")
lmtutor.load_vector_store("data/TextBooks/vectorstore_DSC140B-Lec01")
lmtutor.conversational_qa_init()

In [None]:
lmtutor.conversational_qa('Is there a discussion session this week?')

In [None]:
from model.llm_langchain_tutor import LLMLangChainTutor

In [None]:
lmtutor = LLMLangChainTutor(embedding='instruct_embedding', llm='hf_facebook/bart-large-cnn', device='cuda:0')

In [None]:
lmtutor.load_document(doc_path="data/TextBooks", glob='./DSC140B-Lec01.pdf', chunk_size=100, chunk_overlap=10)
lmtutor.generate_vector_store()
# lmtutor.conversational_qa_init()

In [None]:
lmtutor.similarity_search("is there a discussion session?")[0].page_content

In [None]:
lmtutor.conversational_qa('is there a discussion session this week?')

# LLama based embedding generation

In [1]:
import torch

In [2]:
from langchain.embeddings import HuggingFaceEmbeddings

In [None]:
model_name = "lmsys/vicuna-13b-v1.5"
embedding_model = HuggingFaceEmbeddings(model_name=model_name)


Downloading (…)l-00001-of-00003.bin:   0%|          | 0.00/9.95G [00:00<?, ?B/s]

Downloading (…)l-00002-of-00003.bin:   0%|          | 0.00/9.90G [00:00<?, ?B/s]

Downloading (…)l-00003-of-00003.bin:   0%|          | 0.00/6.18G [00:00<?, ?B/s]

Downloading (…)model.bin.index.json:   0%|          | 0.00/33.4k [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/438 [00:00<?, ?B/s]

Downloading tokenizer.model:   0%|          | 0.00/500k [00:00<?, ?B/s]

Downloading tokenizer_config.json:   0%|          | 0.00/749 [00:00<?, ?B/s]

No sentence-transformers model found with name /home/reventh/.cache/torch/sentence_transformers/lmsys_vicuna-13b-v1.5. Creating a new one with MEAN pooling.


In [None]:
from langchain.embeddings import HuggingFaceEmbeddings

model_name = "meta-llama/Llama-2-13b-chat-hf"
token = "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"
model_kwargs = {'use_auth_token' : token} #your token to use the models
embedding_model = HuggingFaceEmbeddings(model_name=model_name, model_kwargs=model_kwargs)

#Please select a token to use as `pad_token` `(tokenizer.pad_token = tokenizer.eos_token e.g.)` 
#or add a new pad token via `tokenizer.add_special_tokens({'pad_token': '[PAD]'})
embedding_model.client.tokenizer.pad_token =  embedding_model.client.tokenizer.eos_token

embedding = embedding_model.embed_query("your text")


Downloading .gitattributes:   0%|          | 0.00/1.58k [00:00<?, ?B/s]

Downloading LICENSE.txt:   0%|          | 0.00/7.02k [00:00<?, ?B/s]

Downloading README.md:   0%|          | 0.00/10.4k [00:00<?, ?B/s]

Downloading (…)nsible-Use-Guide.pdf:   0%|          | 0.00/1.25M [00:00<?, ?B/s]

Downloading USE_POLICY.md:   0%|          | 0.00/4.77k [00:00<?, ?B/s]

Downloading config.json:   0%|          | 0.00/587 [00:00<?, ?B/s]

Downloading generation_config.json:   0%|          | 0.00/188 [00:00<?, ?B/s]

Downloading (…)of-00003.safetensors:   0%|          | 0.00/9.95G [00:00<?, ?B/s]

Downloading (…)of-00003.safetensors:   0%|          | 0.00/9.90G [00:00<?, ?B/s]

Downloading (…)of-00003.safetensors:   0%|          | 0.00/6.18G [00:00<?, ?B/s]

Downloading (…)fetensors.index.json:   0%|          | 0.00/33.4k [00:00<?, ?B/s]

Downloading (…)l-00001-of-00003.bin:   0%|          | 0.00/9.95G [00:00<?, ?B/s]