In [None]:
import warnings

warnings.filterwarnings("ignore")

In [None]:
import os
from dotenv import load_dotenv, find_dotenv

load_dotenv()
_ = load_dotenv(find_dotenv()) # read local .env file

In [None]:
import pandas as pd

perfume_df = pd.read_csv('data/final_perfume_data.csv', encoding='unicode_escape')
perfume_df.head()

# LLM Chain

In [None]:
from langchain_openai import ChatOpenAI
from langchain.prompts import ChatPromptTemplate
from langchain.chains import LLMChain

llm = ChatOpenAI(temperature=0.9)
prompt = ChatPromptTemplate.from_template(
    "What is the best name to describe a pefume with the following notes: {notes}",
)

chain = LLMChain(llm=llm, prompt=prompt)

notes = "floral, citrus, musk"
chain.run(notes)

# Simple Sequential Chain

The output of the first chain is the input of the second chain, and so on. This works well for a single input and output.

In [None]:
from langchain.chains import SimpleSequentialChain

llm = ChatOpenAI(temperature=0.9)

# prompt template 1
first_prompt = ChatPromptTemplate.from_template(
    "What is the best name to describe a pefume with the following notes: {notes}",
)

# Chain 1
chain1 = LLMChain(llm=llm, prompt=first_prompt)
# prompt template 2
second_prompt = ChatPromptTemplate.from_template(
    "Write a 20 word description for a perfume with the following name: {perfume_name}",
)

# Chain 2
chain2 = LLMChain(llm=llm, prompt=second_prompt)

overall_simple_chain = SimpleSequentialChain(chains=[chain1, chain2], verbose=True)
overall_simple_chain.run(notes)

# Sequential Chain

Works well for multiple inputs and outputs. The output of the first chain is the input of the second chain, and so on.

In [None]:
from langchain_openai import ChatOpenAI
from langchain.prompts import ChatPromptTemplate
from langchain.chains import LLMChain

llm = ChatOpenAI(temperature=0.9)

# prompt template 1: give notes a name
first_prompt = ChatPromptTemplate.from_template(
    "What is the best name to describe a pefume with the following notes: {notes}",
)

# Chain 1: input = notes, output = perfume name
chain1 = LLMChain(llm=llm, prompt=first_prompt, output_key="perfume_name")
# prompt template 2: give name a description
second_prompt = ChatPromptTemplate.from_template(
    "Write a 20 word description for a perfume with the following name: {perfume_name}",
)

# Chain 2: input = perfume name, output = perfume description
chain2 = LLMChain(llm=llm, prompt=second_prompt, output_key="perfume_description")
#prompt template 3: convert the description to a tagline
third_prompt = ChatPromptTemplate.from_template(
    "Write a catchy 8 word tagline for a perfume with the following description: {perfume_description}",
)

# Chain 3: input = perfume description, output = perfume tagline
chain3 = LLMChain(llm=llm, prompt=third_prompt, output_key="perfume_tagline")
# prompt template 4: similar fragrance to notes
fifth_prompt = ChatPromptTemplate.from_template(
    "List 3 other perfumes with the following notes: {notes}",
)

# Chain 4: input = notes, output = similar fragrances
chain4 = LLMChain(llm=llm, prompt=fifth_prompt, output_key="similar_fragrances")
# overall chain: input= notes
# and output = perfume name, description, tagline, similar fragrances
from langchain.chains import SequentialChain

overall_chain = SequentialChain(
    chains=[chain1, chain2, chain3, chain4],
    input_variables=["notes"],
    output_variables=["perfume_name", "perfume_description", "perfume_tagline", "similar_fragrances"],
    verbose=True
)
notes = notes
overall_chain(notes)

# Q&A with custom data with RAG

In [None]:
from langchain_openai import ChatOpenAI, OpenAIEmbeddings # chat completion model and embeddings
from langchain_text_splitters import CharacterTextSplitter # split text into characters
from langchain.document_loaders import CSVLoader # load data from a csv file - from langchain_community
from langchain.vectorstores import DocArrayInMemorySearch #vectorstore, in memory, no need to connect to an external vectorestore
from IPython.display import display, Markdown  # display markdown in jupyter notebook

In [None]:
# create a document loader - each doc represents a perfume
documents = 'data/final_perfume_data.csv'
loader = CSVLoader(file_path=documents, encoding='unicode_escape').load()

# split text into characters
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
docs = text_splitter.split_documents(loader)

# create embeddings
embeddings = OpenAIEmbeddings()
embed = embeddings.embed_query("floral, citrus, musk") # example embedding
print(len(embed)) #print the length of the embedding for the example 
print(embed[:5]) #print the numerical representation of the first 5 elements of the embedding

# create vectorstore
vectorstore_db = DocArrayInMemorySearch.from_documents(docs, embeddings)


In [None]:
# Similarity search

query = "please list similar fragrances to chanel no 5."
docs = vectorstore_db.similarity_search(query)

results = "".join([docs[i].page_content for i in range(len(docs))])
print(results)

In [None]:
## doing question answering - DO NOT DO THIS SECTION

# create a retriever
retriever = vectorstore_db.as_retriever()

# create a chat model
llm = ChatOpenAI(temperature=0.0)

# join docs in a variable
# qdocs = "\n\n".join([docs[i].page_content for i in range(len(docs))])

def qdocs(docs):
    return "\n\n".join([docs[i].page_content for i in range(len(docs))])

# get a response
response = llm.invoke(f"{qdocs} Question: {query}")

# display the response
# display(Markdown(response.content))

def display_markdown(content):
    display(Markdown(content))
    return content


In [None]:
# retrieval
from langchain_core.prompts import PromptTemplate
from langchain_core.runnables import RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser

template = """Use the following pieces of context to answer the question at the end.
If you don't know the answer, just say that you don't know, don't try to make up an answer.
Return the answer in a markdown table with the name, brand and image.

{context}

Question: {question}

Helpful Answer:"""

custom_rag_prompt = PromptTemplate.from_template(template)

rag_chain = (
    {"context": retriever | qdocs, "question": RunnablePassthrough()}
    | custom_rag_prompt
    | llm
    | StrOutputParser()
    | display_markdown
)

# rag_chain.invoke("what are the notes of Freeworld Parfum?")
rag_chain.invoke("List 3 similar fragrances to Circus Fantasy.")
