# Model I/O

## Language Model

In [2]:
from langchain.llms import OpenAI

In [9]:
# The LLM objects take string as input and output string
llm = OpenAI(model_name="gpt-3.5-turbo-instruct", temperature=0.4)

In [15]:
huamn_text = "What should I do if I wish to analyse customer reviews"

In [16]:
llm.invoke(huamn_text)

'\n\n1. Define your goals: Before starting the analysis, it is important to clearly define your goals and what you hope to achieve from the analysis. This will help guide your approach and ensure that you are focusing on the most relevant aspects of the customer reviews.\n\n2. Choose a platform: There are many platforms available for analyzing customer reviews, such as social media monitoring tools, sentiment analysis tools, and text analysis tools. Choose a platform that best fits your needs and budget.\n\n3. Collect the data: The first step in analyzing customer reviews is to collect the data. This can be done by manually going through each review or by using a data scraping tool to gather reviews from various sources.\n\n4. Clean and organize the data: Once you have collected the data, it is important to clean and organize it. This involves removing any irrelevant or duplicate reviews and categorizing the reviews based on different criteria such as product, service, or sentiment.\n\

## Chat Model
Takes series of messages

In [11]:
from langchain.chat_models import ChatOpenAI
from langchain.schema import SystemMessage, HumanMessage

In [8]:
# The ChatModel objects take a list of messages as input and output a message.
chat_model = ChatOpenAI(temperature=0.4)

In [17]:
chat_model(
    [
        SystemMessage(content="You are a Bot to make concise and actionable response to users")
        , HumanMessage(content=huamn_text)
    ]
)

AIMessage(content='If you wish to analyze customer reviews, here are a few steps you can follow:\n\n1. Collect the data: Gather customer reviews from various sources such as online platforms, social media, surveys, or feedback forms. Make sure you have a substantial amount of data for meaningful analysis.\n\n2. Define your objectives: Determine what you want to achieve through the analysis. Are you looking to identify common themes, sentiment analysis, or extract specific insights? Clearly define your goals to guide your analysis.\n\n3. Preprocess the data: Clean the data by removing irrelevant information, such as duplicates or spam. Normalize the text by converting it to lowercase, removing punctuation, and removing stop words (common words like "and," "the," etc.).\n\n4. Apply sentiment analysis: Use natural language processing techniques to determine the sentiment of each review. Assign positive, negative, or neutral sentiment scores to each review to understand overall customer se

## Prompt
use template and input variables

contains content info

In [18]:
from langchain.prompts import PromptTemplate

my_prompt_template = PromptTemplate.from_template("What should I do if I wish to {things_wish_to_do}")
my_prompt_template.format(things_wish_to_do="learn skiing")

'What should I do if I wish to learn skiing'

In [21]:
my_prompt_template.format(things_wish_to_do="understand why customers are unhappy with service")

str

contains message info

In [25]:
from langchain.prompts.chat import ChatPromptTemplate

sys_temp = "You are a Bot to make {res_style_1} and {res_style_2} response to {user_role} in {word_limit} words"
human_temp = "{human_text}"

my_chat_prompt_template = ChatPromptTemplate.from_messages(
    [
        ("system", sys_temp)
        , ("human", human_temp)
    ]
)

human_content = my_prompt_template.format(things_wish_to_do="understand why customers are unhappy with service")

chat_prompt = my_chat_prompt_template.format_messages(res_style_1="concise"
                                                      , res_style_2="actionable"
                                                      , user_role="Data Scientist"
                                                      , word_limit="200"
                                                      , human_text=human_content)

[SystemMessage(content='You are a Bot to make concise and actionable response to Data Scientist in 200 words'),
 HumanMessage(content='What should I do if I wish to understand why customers are unhappy with service')]

Output parser

put together

In [None]:
chain = chat_prompt | chat_model |

# Retrieval

## Document Loader

In [27]:
import pandas as pd
from langchain.document_loaders import DataFrameLoader

In [28]:
df = pd.read_csv("../data/ms_sample.csv")

In [29]:
df.columns

Index(['ID', 'Q1_OSAT', 'Comments'], dtype='object')

In [30]:
df_loader = DataFrameLoader(df, page_content_column="Comments")
df_loader.load()

[Document(page_content='Courteous people who seem to care about the customer', metadata={'ID': 1, 'Q1_OSAT': 10}),
 Document(page_content='My concern was handled quickly and professionally by a caring individual who was knowledgeable in the area of my needs. They asked if there was anything else they could do to assist me, but they had already solved my problem.', metadata={'ID': 36, 'Q1_OSAT': 10}),
 Document(page_content='nan', metadata={'ID': 70, 'Q1_OSAT': 10}),
 Document(page_content='I had the GREATEST customer service and interaction with Deja!! She was very helpful and knowledgeable. She smiled and shook my hand.', metadata={'ID': 107, 'Q1_OSAT': 10}),
 Document(page_content='I felt someone was trying to trick me into giving personal info and saying they were from XXX. I did report it and I also hung up on this man.', metadata={'ID': 111, 'Q1_OSAT': 10}),
 Document(page_content="My issue which included transferring finds from one account to another's was easily managed with all

## 4. Embedding
Change text into a vector, could be used to compare similarity

In [32]:
from langchain.embeddings import OpenAIEmbeddings

embedding_model = OpenAIEmbeddings()
text = "Hi It's time for skiing"
text_emdedding = embedding_model.embed_query(text)

print("Your embedding length: {}".format(len(text_emdedding)))
print("Samples: {} ...".format(text_emdedding[:5]))

Your embedding length: 1536
Samples: [0.002953369864917754, 0.009271258475735903, -0.005900462615691105, -0.020576292621032808, -0.017073677829616826] ...


## Vectorstore

In [51]:
from langchain.vectorstores import Chroma
from langchain.document_loaders import CSVLoader

loader = CSVLoader(file_path="../data/ms_sample.csv", encoding="utf-8"
                   , csv_args={'delimiter': ','})
data = loader.load()
# vectorstore = FAISS.from_documents(df_loader, embedding_model)

In [52]:
data

[Document(page_content='ID: 1\nQ1_OSAT: 10\nComments: Courteous people who seem to care about the customer', metadata={'source': '../data/ms_sample.csv', 'row': 0}),
 Document(page_content='ID: 36\nQ1_OSAT: 10\nComments: My concern was handled quickly and professionally by a caring individual who was knowledgeable in the area of my needs. They asked if there was anything else they could do to assist me, but they had already solved my problem.', metadata={'source': '../data/ms_sample.csv', 'row': 1}),
 Document(page_content='ID: 70\nQ1_OSAT: 10\nComments: ', metadata={'source': '../data/ms_sample.csv', 'row': 2}),
 Document(page_content='ID: 107\nQ1_OSAT: 10\nComments: I had the GREATEST customer service and interaction with Deja!! She was very helpful and knowledgeable. She smiled and shook my hand.', metadata={'source': '../data/ms_sample.csv', 'row': 3}),
 Document(page_content='ID: 111\nQ1_OSAT: 10\nComments: I felt someone was trying to trick me into giving personal info and saying

In [55]:
vectorstore = Chroma.from_documents(data, embedding_model)

retriever = vectorstore.as_retriever()

## Retriever

In [65]:
def format_docs(docs):
    return "\n\n".join([d.page_content for d in docs])

docs = format_docs(data)

In [67]:
from langchain.chains.combine_documents.stuff import StuffDocumentsChain
from langchain.chains.llm import LLMChain
from langchain.prompts import PromptTemplate

# Define prompt
prompt_template = """Write a concise summary of the following:
"{text}"
CONCISE SUMMARY:"""
prompt = PromptTemplate.from_template(prompt_template)

# Define LLM chain
llm = ChatOpenAI(temperature=0, model_name="gpt-3.5-turbo-16k")
llm_chain = LLMChain(llm=llm, prompt=prompt)

# Define StuffDocumentsChain
stuff_chain = StuffDocumentsChain(llm_chain=llm_chain, document_variable_name="text")


print(stuff_chain.run(docs))




AttributeError: 'str' object has no attribute 'page_content'

In [48]:
chain({"question":"how many reponses in this data, a response is identified by a unique ID", "chat_history":""})

{'question': 'how many reponses in this data, a response is identified by a unique ID',
 'chat_history': '',
 'answer': 'There are 4 responses in this data. Each response is identified by a unique ID.'}