# Experiments with promopts

Assumed Pinecone db is already created and populated with data.

In [1]:
import pinecone
import openai
import numpy as np
import os
from dotenv import load_dotenv

# Langchain
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.text_splitter import CharacterTextSplitter
from langchain.vectorstores import Pinecone
from langchain.document_loaders import TextLoader
from langchain.chat_models import ChatOpenAI
from langchain.chains import RetrievalQA, LLMChain
from langchain.callbacks import wandb_tracing_enabled
from langchain.prompts import (
    PromptTemplate,
    ChatPromptTemplate,
    HumanMessagePromptTemplate,
)
from typing import Optional

from langchain.chains.openai_functions import (
    create_openai_fn_chain,
    create_structured_output_chain,
)
from langchain.schema import HumanMessage, AIMessage, ChatMessage

# wandb
import wandb 

  from tqdm.autonotebook import tqdm


## Initialization

* Load all the API keys from Pinecone and OpenAI
* Load the Pinecone client
* Set-up wandb tracing

In [2]:
# Load variables from the .env file
load_dotenv('../Sn33k/.env')

# Access the variables
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
PINECONE_API_KEY = os.getenv("PINECONE_API_KEY")
INDEX_NAME = os.getenv("PINECONE_INDEX_NAME")
PINECONE_ENVIRONMENT= os.getenv("PINECONE_ENVIRONMENT")

openai.api_key = OPENAI_API_KEY
os.environ["OPENAI_API_KEY"] = OPENAI_API_KEY

# wandb
os.environ["LANGCHAIN_WANDB_TRACING"] = "true"
# here we are configuring the wandb project name
os.environ["WANDB_PROJECT"] = "Singlife"

## WANDB setup

In [6]:
wandb.login()
wandb_config = {"project": "Singlife"}

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


KeyboardInterrupt: 

In [5]:
from langchain.callbacks.tracers import WandbTracer 
Tracer = WandbTracer(wandb_config)

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


ServiceStartTimeoutError: Timed out waiting for wandb service to start after 30.0 seconds. Try increasing the timeout with the `_service_wait` setting.

## LLM Setup

In [7]:
model_name = "gpt-3.5-turbo-0613"
temperature = 0.0
llm = ChatOpenAI(model_name=model_name, temperature=temperature)

## Vectorstore Setup ( Pinecone )

Pinecone integration with Langchain

### Initialize Pinecone client

In [8]:
# initialize pinecone
pinecone.init(
    api_key=PINECONE_API_KEY,  # find at app.pinecone.io
    environment=PINECONE_ENVIRONMENT,  # next to api key in console
)

index_name = "singlife"

embeddings = OpenAIEmbeddings(model='text-embedding-ada-002')
# if you already have an index, you can load it like this
docsearch = Pinecone.from_existing_index(index_name, embeddings)

# List all indexes information
index_description = pinecone.describe_index(index_name)
print('index_description: ', index_description)

index = pinecone.Index(index_name) 
index_stats_response = index.describe_index_stats()
print('index_stats_response: ', index_stats_response)

index_description:  IndexDescription(name='singlife', metric='cosine', replicas=1, dimension=1536.0, shards=1, pods=1, pod_type='p1', status={'ready': True, 'state': 'Ready'}, metadata_config=None, source_collection='')
index_stats_response:  {'dimension': 1536,
 'index_fullness': 0.1,
 'namespaces': {'': {'vector_count': 3488}},
 'total_vector_count': 3488}


### Create Vectorstore

In [9]:
vectorstore = Pinecone(index, embeddings.embed_query, "text")

# OpenAI Function Calling

> describe functions to gpt-4-0613 and gpt-3.5-turbo-0613, and have the model intelligently choose to output a JSON object containing arguments to call those functions


models have been fine-tuned to both detect when a function needs to be called (depending on the user’s input) and to respond with JSON that adheres to the function signature

### Functions are specified with the following fields:

* Name: The name of the function.
* Description: A description of what the function does. The model will use this to decide when to call the function.
* Parameters: The parameters object contains all of the input fields the function requires. These inputs can be of the following types: String, Number, Boolean, Object, Null, AnyOf. Refer to the API reference docs for details.
* Required: Which of the parameters are required to make a query. The rest will be treated as optional.

In [28]:
json_schema  = {
                "title": "generate_video_script",
                "description": "Generates 15-30sec video script based on custom knowledge base. Two components 1.Scene assets descriptions 2.Subtitle script",
                "type": "object",
                "parameters": {
                    "type": "object",
                    "properties": {
                        "scene": {
                            "type": "string",
                            "description": "Scene description for video should be visual and general",
                        },
                        "subtitles": {
                            "type": "string",
                            "description": "Funny and sarcastic video subtitles script for video",
                        },
                    },
                    "required": ["scene", "subtitles"],
                },
            }


### Create QAretriever

In [24]:
qa_chain = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=vectorstore.as_retriever(),
)

### Prompt Template for Video chain 

In [29]:
# Prompt template for video script generation
video_prompt = PromptTemplate(
    
    template="""Goal:Generate 15-30sec video script based on custom knowledge base (Information below) and user query. Two components 1.Scene assets descriptions 2.Subtitle script 
Custom knowledge base:{documents}

Using the above information, generate a video script that addresses this user query: {query}. Video style/format: Funny and sarcastic""",
    input_variables= ["documents", "query"]
)

### Chain for video script generation

In [30]:
video_chain = create_structured_output_chain(json_schema, llm, video_prompt, verbose=True)

In [31]:
# Test video_chain
documents = """Singlife offers several types of insurance:\n\n1. Singlife Direct Purchase Insurance: This is a life insurance plan that provides coverage for up to S$400,000. It offers protection in the event of death, terminal illness, or total and permanent disability. You can choose between a fixed period of coverage or lifelong protection.\n\n2. Singlife Personal Accident Insurance: This is a personal accident plan that covers expenses incurred due to accidental injury. It also provides coverage for children for free. The plan includes benefits such as payout for accidental death, daily hospital allowance, permanent disablement, and medical expenses.\n\n3. Singlife Corporate Travel Insurance: This insurance is designed for business travel and provides coverage for employees during their trips. It includes benefits such as extended length of cover, trip cancellation for any reason, accidental injury coverage, terrorism coverage, and COVID-19 coverage.\n\n4. Singlife Home Insurance: This insurance protects your home and its contents. It offers comprehensive coverage for household contents, renovations, removal of debris, family worldwide liability, alternative accommodation, loss of rent, and more.\n\nThese are the main types of insurance offered by Singlife."""
query="""I am travelling to Japan for a ski trip with my family next week. What kind of travel insurance coverage do we need? In the format of sarcastic and funny.
"""
video_chain.run(query = query, documents=documents)



[1m> Entering new  chain...[0m
Prompt after formatting:
[32;1m[1;3mGoal:Generate 15-30sec video script based on custom knowledge base (Information below) and user query. Two components 1.Scene assets descriptions 2.Subtitle script 
Custom knowledge base:Singlife offers several types of insurance:

1. Singlife Direct Purchase Insurance: This is a life insurance plan that provides coverage for up to S$400,000. It offers protection in the event of death, terminal illness, or total and permanent disability. You can choose between a fixed period of coverage or lifelong protection.

2. Singlife Personal Accident Insurance: This is a personal accident plan that covers expenses incurred due to accidental injury. It also provides coverage for children for free. The plan includes benefits such as payout for accidental death, daily hospital allowance, permanent disablement, and medical expenses.

3. Singlife Corporate Travel Insurance: This insurance is designed for business travel and prov

TypeError: __str__ returned non-string (type list)

# Let the experiment begin

In [14]:
query = "I am travelling to Japan for a ski trip with my family next week.What kind of travel insurance coverage do we need?"

In [15]:
from langchain.chains import SequentialChain,SimpleSequentialChain
overall_chain = SimpleSequentialChain(chains=[qa_chain(query), video_chain], verbose=True)


ValidationError: 1 validation error for SimpleSequentialChain
__root__
  Chains used in SimplePipeline should all have one input, got memory=None callbacks=None callback_manager=None verbose=True tags=None metadata=None prompt=PromptTemplate(input_variables=['documents', 'query', 'video_style'], output_parser=None, partial_variables={}, template='Goal:Generate 15-30sec video script based on custom knowledge base (Information below) and user query. Two components 1.Scene assets descriptions 2.Subtitle script \nCustom knowledge base:{documents}\n\nUsing the above information, generate a video script that addresses this user query: {query}. Video style/format: {video_style}', template_format='f-string', validate_template=True) llm=ChatOpenAI(cache=None, verbose=False, callbacks=None, callback_manager=None, tags=None, metadata=None, client=<class 'openai.api_resources.chat_completion.ChatCompletion'>, model_name='gpt-3.5-turbo-0613', temperature=0.0, model_kwargs={}, openai_api_key='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', openai_api_base='', openai_organization='', openai_proxy='', request_timeout=None, max_retries=6, streaming=False, n=1, max_tokens=None, tiktoken_model_name=None) output_key='function' output_parser=JsonOutputFunctionsParser(args_only=True) return_final_only=True llm_kwargs={'functions': [{'name': 'output_formatter', 'description': 'Output formatter. Should always be used to format your response to the user.', 'parameters': {'title': 'generate_video_script', 'description': 'Generates 15-30sec video script based on custom knowledge base. Two components 1.Scene assets descriptions 2.Subtitle script', 'parameters': {'type': 'object', 'properties': {'scene': {'type': 'string', 'description': 'Scene description for video should be visual and general'}, 'subtitles': {'type': 'string', 'description': 'Funny and sarcastic video subtitles script for video'}}, 'required': ['scene', 'subtitles']}}}], 'function_call': {'name': 'output_formatter'}} with 3 inputs. (type=value_error)

In [None]:
user_request = """Singlife offers several types of insurance:\n\n1. Singlife Direct Purchase Insurance: This is a life insurance plan that provides coverage for up to S$400,000. It offers protection in the event of death, terminal illness, or total and permanent disability. You can choose between a fixed period of coverage or lifelong protection.\n\n2. Singlife Personal Accident Insurance: This is a personal accident plan that covers expenses incurred due to accidental injury. It also provides coverage for children for free. The plan includes benefits such as payout for accidental death, daily hospital allowance, permanent disablement, and medical expenses.\n\n3. Singlife Corporate Travel Insurance: This insurance is designed for business travel and provides coverage for employees during their trips. It includes benefits such as extended length of cover, trip cancellation for any reason, accidental injury coverage, terrorism coverage, and COVID-19 coverage.\n\n4. Singlife Home Insurance: This insurance protects your home and its contents. It offers comprehensive coverage for household contents, renovations, removal of debris, family worldwide liability, alternative accommodation, loss of rent, and more.\n\nThese are the main types of insurance offered by Singlife.

Using the above information, generate a video script that addresses this user query:I am travelling to Japan for a ski 
trip with my family next week. What kind of travel insurance coverage do we need? In the format of sarcastic and funny."""

# wandb trace
with wandb_tracing_enabled():
    first_response = llm.predict_messages([HumanMessage(content=user_request)],
    functions=function_descriptions)
first_response

AIMessage(content='', additional_kwargs={'function_call': {'name': 'generate_video_script', 'arguments': '{\n  "scene": "Scene: A family excitedly packing their bags for a ski trip to Japan. They are surrounded by winter clothes, ski equipment, and travel essentials.",\n  "subtitles": "Subtitle: So, you\'re off to Japan for a ski trip, huh? Well, buckle up and get ready for some insurance talk! We\'ve got just the coverage you need to keep your family safe and sound on those slippery slopes. Introducing Singlife\'s Corporate Travel Insurance, the superhero of travel protection!\\n\\nSubtitle: Picture this: you\'re shredding the powdery slopes, feeling like a pro, when suddenly, disaster strikes! You take a tumble and sprain your ankle. Ouch! But fear not, with Singlife\'s Corporate Travel Insurance, you\'re covered for accidental injuries. We\'ll take care of those medical expenses so you can focus on perfecting your snowplow.\\n\\nSubtitle: And hey, accidents happen, right? What if yo