In [2]:
from dotenv import load_dotenv,find_dotenv
import os
import sys
import certifi
from langchain import HuggingFaceHub, PromptTemplate
from langchain.chains import LLMChain, SimpleSequentialChain
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS


In [3]:
load_dotenv(find_dotenv())

True

In [4]:
# repo_id = "meta-llama/Llama-2-7b"
repo_id = "distilgpt2" # use as fast, in the future you can sub out for another model

In [5]:
# bypass VPN to connect to hugging face hub
os.environ['CURL_CA_BUNDLE'] = ''

llm = HuggingFaceHub(repo_id=repo_id)
llm("explain large language models in one sentence")

  from .autonotebook import tqdm as notebook_tqdm


' and then repeat it until it is clear that it does not need to happen. What if this is only the example of what happens when learning this model is complete?\n\nThe first, first example is the'

In [6]:
template = """
You are an expert data scientist with an expertise in building deep learning models. 
Explain the concept of {concept} in a couple of lines
"""

prompt = PromptTemplate(
    input_variables=["concept"],
    template=template,
)

llm(prompt.format(concept="autoencoder"))



'The idea behind autoencoder, as it is a combination of a collection of'

In [7]:
chain = LLMChain(llm=llm, prompt=prompt)

# Run the chain only specifying the input variable.
print(chain.run("autoencoder"))



The idea behind autoencoder, as it is a combination of a collection of


In [8]:
second_prompt = PromptTemplate(
    input_variables=["ml_concept"],
    template="Turn the concept description of {ml_concept} and explain it to me like I'm five in 500 words",
)
chain_two = LLMChain(llm=llm, prompt=second_prompt)

In [9]:
overall_chain = SimpleSequentialChain(chains=[chain, chain_two], verbose=True)

# Run the chain specifying only the input variable for the first chain.
explanation = overall_chain.run("autoencoder")
print(explanation)



[1m> Entering new SimpleSequentialChain chain...[0m




[36;1m[1;3mThe idea behind autoencoder, as it is a combination of a collection of[0m




[33;1m[1;3m. I think it is because, the concept is so powerful as to be able[0m

[1m> Finished chain.[0m
. I think it is because, the concept is so powerful as to be able


In [10]:
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size = 100,
    chunk_overlap  = 0,
)

texts = text_splitter.create_documents([explanation])
print(texts[0].page_content)

. I think it is because, the concept is so powerful as to be able


In [11]:
embeddings = HuggingFaceEmbeddings()

query_result = embeddings.embed_query(texts[0].page_content)
print(query_result)

[-0.004590710625052452, 0.02497156895697117, -0.026590118184685707, 0.029301786795258522, -0.004198260605335236, 0.03743816539645195, -0.011421391740441322, -0.02677946537733078, -0.07057925313711166, 0.023970087990164757, -0.009051102213561535, 0.029116252437233925, -0.006887656636536121, 0.0026810995768755674, 0.06928547471761703, 0.016850963234901428, -0.03028842806816101, 0.03064923919737339, -0.04688407853245735, 0.048498816788196564, -0.01806466281414032, -0.0022269042674452066, -0.0004179888346698135, -0.017997100949287415, 0.048278093338012695, -0.016196250915527344, 0.04768437519669533, -0.03786081075668335, -0.03855917602777481, -0.041525520384311676, -0.06974520534276962, 0.013177662156522274, -0.024009956046938896, 0.06161739304661751, 1.6011028947104933e-06, -0.008795121684670448, 0.017496522516012192, 0.02207856811583042, -0.07023639231920242, 0.04811456426978111, -0.009461010806262493, 0.04526809975504875, 0.013514040969312191, 0.018549829721450806, -0.021121496334671974

In [12]:
db = FAISS.from_documents(texts, embeddings)

In [13]:
db.save_local("faiss_index")

In [14]:
query = "What is magical about an autoencoder?"
result = db.similarity_search(query)

print(result)

[Document(page_content='. I think it is because, the concept is so powerful as to be able', metadata={})]


In [16]:
type(embeddings)

langchain.embeddings.huggingface.HuggingFaceEmbeddings