In [None]:
# To get environment variables
import os

# Make the display a bit wider
from IPython.display import display, HTML
display(HTML("<style>.container { width:90% !important; }</style>"))

# To split our transcript into pieces
from langchain.text_splitter import RecursiveCharacterTextSplitter

# Our chat model. We'll use the default which is gpt-3.5-turbo
from langchain.chat_models import ChatOpenAI
from langchain.chains.summarize import load_summarize_chain

# Prompt templates for dynamic values
from langchain.prompts.chat import (
    ChatPromptTemplate,
    SystemMessagePromptTemplate,
    AIMessagePromptTemplate, # I included this one so you know you'll have it but we won't be using it
    HumanMessagePromptTemplate
)

# To create our chat messages
from langchain.schema import (
    AIMessage,
    HumanMessage,
    SystemMessage
)

In [None]:
os.environ['OPENAI_API_KEY'] = '...'

In [None]:
with open('../data/Transcripts/acme_co_v2.txt', 'r') as file:
    content = file.read()

In [None]:
print ("Transcript:\n")
print(content[:215]) # Why 215? Because it cut off at a clean line

Split our documents so we don't run into token issues. Experiment with what chunk size words best for your use case

In [None]:
text_splitter = RecursiveCharacterTextSplitter(separators=["\n\n", "\n"], chunk_size=2000, chunk_overlap=250)
texts = text_splitter.create_documents([content])

In [None]:
print (f"You have {len(texts)} texts")
texts[0]

In [None]:
# Your api key should be an environment variable, or else put it here
# We are using a chat model in case you wanted to use gpt4
llm = ChatOpenAI(temperature=0)

We're going to start with the vanilla load_summarize_chain to see how it goes.
If you want to see the default prompts that are used you can explore the LangChain code. Here are the [map reduce prompts](https://github.com/hwchase17/langchain/blob/master/langchain/chains/summarize/map_reduce_prompt.py)

In [None]:
# verbose=True will output the prompts being sent to the 
chain = load_summarize_chain(llm, chain_type="map_reduce", verbose=True)

In [None]:
output = chain.run(texts)

In [None]:
print (output)

### Custom Prompts

I'm going to write custom prompts that give the AI more instructions on what role I want it to play

In [None]:
template="""

You are a helpful assistant that helps {sales_rep_name}, a sales rep at {sales_rep_company}, summarize information from a sales call.
Your goal is to write a summary from the perspective of {sales_rep_name} that will highlight key points that will be relevant to making a sale
Do not respond with anything outside of the call transcript. If you don't know, say, "I don't know"
Do not repeat {sales_rep_name}'s name in your output

"""
system_message_prompt = SystemMessagePromptTemplate.from_template(template)

human_template="{text}" # Simply just pass the text as a human message
human_message_prompt = HumanMessagePromptTemplate.from_template(human_template)

In [None]:
chat_prompt = ChatPromptTemplate.from_messages(messages=[system_message_prompt, human_message_prompt])

In [None]:
chain = load_summarize_chain(llm,
                             chain_type="map_reduce",
                             map_prompt=chat_prompt
                            )

# Because we aren't specifying a combine prompt the default one will be used

In [None]:
output = chain.run({
                    "input_documents": texts,
                    "sales_rep_company": "Marin Transitions Partner", \
                    "sales_rep_name" : "Greg"
                   })

In [None]:
print (output)

 I wanted to change the format of the output without needing the user to do extra prompting.

### Promptless changes

To do this I'll write a few points about the different output types I would like. However, I'll that I'll expose to the user is a simple selection, radio button, or drop down. (We'll use text for now but you can do this in your app).

I want to give the user the option to select between different summary output types.

I'll have them pick between:
1. One Sentence
2. Bullet Points
3. Short
4. Long

I could try to pass these words to the LLM, but I want to be more explicit with it. Plus, giving good instructions is the way to go!

In [None]:
summary_output_options = {
    'one_sentence' : """
     - Only one sentence
    """,
    
    'bullet_points': """
     - Bullet point format
     - Separate each bullet point with a new line
     - Each bullet point should be concise
    """,
    
    'short' : """
     - A few short sentences
     - Do not go longer than 4-5 sentences
    """,
    
    'long' : """
     - A verbose summary
     - You may do a few paragraphs to describe the transcript if needed
    """
}

Create a new template that takes an additional parameter. I need to put this in the combined prompt so that the LLM will output in my format. If I did this in the map section I would lose the format after the combined prompt was done

**Map Prompt**

In [None]:
template="""

You are a helpful assistant that helps {sales_rep_name}, a sales rep at {sales_rep_company}, summarize information from a sales call.
Your goal is to write a summary from the perspective of Greg that will highlight key points that will be relevant to making a sale
Do not respond with anything outside of the call transcript. If you don't know, say, "I don't know"
"""
system_message_prompt_map = SystemMessagePromptTemplate.from_template(template)

human_template="{text}" # Simply just pass the text as a human message
human_message_prompt_map = HumanMessagePromptTemplate.from_template(human_template)

chat_prompt_map = ChatPromptTemplate.from_messages(messages=[system_message_prompt_map, human_message_prompt_map])

**Combined Prompt**

In [None]:
template="""

You are a helpful assistant that helps {sales_rep_name}, a sales rep at {sales_rep_company}, summarize information from a sales call.
Your goal is to write a summary from the perspective of Greg that will highlight key points that will be relevant to making a sale
Do not respond with anything outside of the call transcript. If you don't know, say, "I don't know"

Respond with the following format
{output_format}

"""
system_message_prompt_combine = SystemMessagePromptTemplate.from_template(template)

human_template="{text}" # Simply just pass the text as a human message
human_message_prompt_combine = HumanMessagePromptTemplate.from_template(human_template)

chat_prompt_combine = ChatPromptTemplate.from_messages(messages=[system_message_prompt_combine, human_message_prompt_combine])

In [None]:
chain = load_summarize_chain(llm,
                             chain_type="map_reduce",
                             map_prompt=chat_prompt_map,
                             combine_prompt=chat_prompt_combine,
                             verbose=True
                            )

In [None]:
user_selection = 'one_sentence'

output = chain.run({
                    "input_documents": texts,
                    "sales_rep_company": "Marin Transitions Partner", \
                    "sales_rep_name" : "Greg",
                    "output_format" : summary_output_options[user_selection]
                   })

In [None]:
print(output)