In [1]:
from langchain import PromptTemplate, HuggingFaceHub, LLMChain
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.indexes import VectorstoreIndexCreator
from langchain.callbacks.manager import CallbackManager
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
from langchain.chains.qa_with_sources import load_qa_with_sources_chain
from langchain.document_loaders import DirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import FAISS
from huggingface_hub import hf_hub_download
import textwrap
import glob

In [2]:
# import libraries and modules
import langchain
import openai
import os

In [3]:
# Set the API key by reading the folder path. Use this code if you're running the code on Google Colab. Otherwise, use the actual folder path
folder_path = '/Users/jasper/Desktop/LangChain/'

# Folder path
os.chdir(folder_path)

In [4]:
# Read the text file containing the API key
with open(folder_path + "Jasper_HF_Token.txt", "r") as f:
  HUGGING_FACE_API_KEY = ' '.join(f.readlines())

In [5]:
template = """ You are going to be my assistant.
Please try to give me the most beneficial answers to my
question with reasoning for why they are correct.

 Question: {input} Answer: """
prompt = PromptTemplate(template=template, input_variables=["input"])

In [6]:
model = HuggingFaceHub(repo_id="facebook/mbart-large-50",
                       model_kwargs={"temperature": 0, "max_length":200},
                       huggingfacehub_api_token=HUGGING_FACE_API_KEY)
chain = LLMChain(prompt=prompt, llm=model)



In [7]:
hf_embeddings = HuggingFaceEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2')

In [13]:
import nltk
nltk.download('punkt')

[nltk_data] Error loading punkt: <urlopen error [SSL:
[nltk_data]     CERTIFICATE_VERIFY_FAILED] certificate verify failed:
[nltk_data]     unable to get local issuer certificate (_ssl.c:992)>


False

In [16]:
import nltk
print(nltk.data.path)

['/Users/jasper/nltk_data', '/Library/Frameworks/Python.framework/Versions/3.11/nltk_data', '/Library/Frameworks/Python.framework/Versions/3.11/share/nltk_data', '/Library/Frameworks/Python.framework/Versions/3.11/lib/nltk_data', '/usr/share/nltk_data', '/usr/local/share/nltk_data', '/usr/lib/nltk_data', '/usr/local/lib/nltk_data']


In [20]:
my_loader = DirectoryLoader('/Users/jasper/Desktop/LangChain/', glob='apple_earnings.txt')
docs = my_loader.load()
text_split = RecursiveCharacterTextSplitter(chunk_size = 700, chunk_overlap = 0)
text = text_split.split_documents(docs)

In [21]:
# pass the text and embeddings to FAISS
vectorstore = FAISS.from_documents(text, hf_embeddings)