# Experiments with promopts

Assumed Pinecone db is already created and populated with data.

In [1]:
import pinecone
import openai
import numpy as np
import os
from dotenv import load_dotenv

# Langchain
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.text_splitter import CharacterTextSplitter
from langchain.vectorstores import Pinecone
from langchain.document_loaders import TextLoader
from langchain.chat_models import ChatOpenAI
from langchain.chains import RetrievalQA, LLMChain
from langchain.callbacks import wandb_tracing_enabled
from langchain.prompts import (
    PromptTemplate,
    ChatPromptTemplate,
    HumanMessagePromptTemplate,
)
from typing import Optional
from langchain.chains import SimpleSequentialChain ,SequentialChain

from langchain.chains.openai_functions import (
    create_openai_fn_chain,
    create_structured_output_chain,
)
from langchain.schema import HumanMessage, AIMessage, ChatMessage

# wandb
import wandb 

# Import singlife from utils 
from utils.singlife import Singlife

  from tqdm.autonotebook import tqdm


PATH_TO_ENV:  c:\Users\51703\Documents\GitHub\SingLife-Polyfintech2023\custom_llm\.env


## Initialization

* Load all the API keys from Pinecone and OpenAI
* Load the Pinecone client
* Set-up wandb tracing

In [2]:
# Load variables from the .env file
load_dotenv('../Sn33k/.env')

# Access the variables
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
PINECONE_API_KEY = os.getenv("PINECONE_API_KEY")
INDEX_NAME = os.getenv("PINECONE_INDEX_NAME")
PINECONE_ENVIRONMENT= os.getenv("PINECONE_ENVIRONMENT")

openai.api_key = OPENAI_API_KEY
os.environ["OPENAI_API_KEY"] = OPENAI_API_KEY

# wandb
os.environ["LANGCHAIN_WANDB_TRACING"] = "true"
os.environ['WANDB_NOTEBOOK_NAME'] = 'test_llm_pinecone.ipynb'
# here we are configuring the wandb project name
os.environ["WANDB_PROJECT"] = "Singlife"

## WANDB setup

In [3]:
wandb.login()
wandb_config = {"project": "Singlife"}

[34m[1mwandb[0m: Currently logged in as: [33mkaleb-nim[0m ([33mshiok[0m). Use [1m`wandb login --relogin`[0m to force relogin


In [4]:
from langchain.callbacks.tracers import WandbTracer 
Tracer = WandbTracer(wandb_config)

[34m[1mwandb[0m: Streaming LangChain activity to W&B at https://wandb.ai/shiok/Singlife/runs/7ftek8vp
[34m[1mwandb[0m: `WandbTracer` is currently in beta.
[34m[1mwandb[0m: Please report any issues to https://github.com/wandb/wandb/issues with the tag `langchain`.


## LLM Setup

In [5]:
model_name = "gpt-3.5-turbo-0613"
temperature = 0.0
llm_qa = ChatOpenAI(model_name=model_name, temperature=temperature)
llm_qa.to_json()

{'lc': 1,
 'type': 'constructor',
 'id': ['langchain', 'chat_models', 'openai', 'ChatOpenAI'],
 'kwargs': {'model_name': 'gpt-3.5-turbo-0613',
  'temperature': 0.0,
  'openai_api_key': {'lc': 1, 'type': 'secret', 'id': ['OPENAI_API_KEY']}}}

In [6]:
model_name = "gpt-3.5-turbo-0613"
temperature = 0.0
llm_video_script = ChatOpenAI(model_name=model_name, temperature=temperature, model_kwargs= {"functions":[
    {
      "name": "output_formatter",
      "description": "Output formatter. Should always be used to format your response to the user.",
      "parameters": {
        "title": "generate_video_script",
        "description": "Generates 15-30sec video script based on custom knowledge base. Two components 1.Scene assets descriptions 2.Subtitle script",
        "type": "object",
        "properties": {
          "list_of_video_chunk": {
            "type": "array",
            "description": "List of video_chunk to be included in the video, one video chunk should last 3-5 seconds and includes: 1. Scene  2. Subtitle",
            "items": {
              "type": "object"
            },
            "properties": {
              "scene": {
                "type": "string",
                "description": "Scene description for video should be visual and general"
              },
              "subtitles": {
                "type": "string",
                "description": "Funny and sarcastic video subtitles script for video"
              }
            },
            "required": ["scene", "subtitles"]
          }
        },
      }
    }
],"function_call":{"name":"output_formatter"}})
llm_video_script.to_json()

{'lc': 1,
 'type': 'constructor',
 'id': ['langchain', 'chat_models', 'openai', 'ChatOpenAI'],
 'kwargs': {'model_name': 'gpt-3.5-turbo-0613',
  'temperature': 0.0,
  'model_kwargs': {'functions': [{'name': 'output_formatter',
     'description': 'Output formatter. Should always be used to format your response to the user.',
     'parameters': {'title': 'generate_video_script',
      'description': 'Generates 15-30sec video script based on custom knowledge base. Two components 1.Scene assets descriptions 2.Subtitle script',
      'type': 'object',
      'properties': {'list_of_video_chunk': {'type': 'array',
        'description': 'List of video_chunk to be included in the video, one video chunk should last 3-5 seconds and includes: 1. Scene  2. Subtitle',
        'items': {'type': 'object'},
        'properties': {'scene': {'type': 'string',
          'description': 'Scene description for video should be visual and general'},
         'subtitles': {'type': 'string',
          'descrip

## Vectorstore Setup ( Pinecone )

Pinecone integration with Langchain

### Initialize Pinecone client

In [7]:
# initialize pinecone
pinecone.init(
    api_key=PINECONE_API_KEY,  # find at app.pinecone.io
    environment=PINECONE_ENVIRONMENT,  # next to api key in console
)

index_name = "singlife"

embeddings = OpenAIEmbeddings(model='text-embedding-ada-002')
# if you already have an index, you can load it like this
docsearch = Pinecone.from_existing_index(index_name, embeddings)

# List all indexes information
index_description = pinecone.describe_index(index_name)
print('index_description: ', index_description)

index = pinecone.Index(index_name) 
index_stats_response = index.describe_index_stats()
print('index_stats_response: ', index_stats_response)

index_description:  IndexDescription(name='singlife', metric='cosine', replicas=1, dimension=1536.0, shards=1, pods=1, pod_type='p1', status={'ready': True, 'state': 'Ready'}, metadata_config=None, source_collection='')
index_stats_response:  {'dimension': 1536,
 'index_fullness': 0.1,
 'namespaces': {'': {'vector_count': 3488}},
 'total_vector_count': 3488}


### Create Vectorstore

In [8]:
vectorstore = Pinecone(index, embeddings.embed_query, "text")

# OpenAI Function Calling

> describe functions to gpt-4-0613 and gpt-3.5-turbo-0613, and have the model intelligently choose to output a JSON object containing arguments to call those functions


models have been fine-tuned to both detect when a function needs to be called (depending on the user’s input) and to respond with JSON that adheres to the function signature

### Functions are specified with the following fields:

* Name: The name of the function.
* Description: A description of what the function does. The model will use this to decide when to call the function.
* Parameters: The parameters object contains all of the input fields the function requires. These inputs can be of the following types: String, Number, Boolean, Object, Null, AnyOf. Refer to the API reference docs for details.
* Required: Which of the parameters are required to make a query. The rest will be treated as optional.

### Create QAretriever

In [9]:
qa_chain = RetrievalQA.from_chain_type(
    llm=llm_qa,
    chain_type="refine",
    retriever=vectorstore.as_retriever(),
    verbose=True,
)

In [10]:
qa_chain

RetrievalQA(memory=None, callbacks=None, callback_manager=None, verbose=True, tags=None, metadata=None, combine_documents_chain=RefineDocumentsChain(memory=None, callbacks=None, callback_manager=None, verbose=False, tags=None, metadata=None, input_key='input_documents', output_key='output_text', initial_llm_chain=LLMChain(memory=None, callbacks=None, callback_manager=None, verbose=False, tags=None, metadata=None, prompt=ChatPromptTemplate(input_variables=['question', 'context_str'], output_parser=None, partial_variables={}, messages=[SystemMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context_str'], output_parser=None, partial_variables={}, template='Context information is below. \n---------------------\n{context_str}\n---------------------\nGiven the context information and not prior knowledge, answer any questions', template_format='f-string', validate_template=True), additional_kwargs={}), HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['question'],

In [11]:
query = "I am travelling to Japan for a ski trip with my family next week.What kind of travel insurance coverage do we need?"
response = qa_chain(query)
response



[1m> Entering new RetrievalQA chain...[0m



[1m> Finished chain.[0m


{'query': 'I am travelling to Japan for a ski trip with my family next week.What kind of travel insurance coverage do we need?',
 'result': "Based on the provided context, it appears that the Singlife Corporate Travel Insurance policy is not suitable for a personal ski trip with your family. The policy is specifically designed for corporate travel purposes and may not provide the necessary coverage for leisure activities such as skiing.\n\nFor a ski trip with your family, it is recommended to consider a travel insurance policy that specifically covers recreational activities like skiing. Look for policies that include coverage for medical expenses related to skiing accidents, emergency medical evacuation, trip cancellation or interruption, and lost or damaged ski equipment.\n\nIt is important to review the policy details, including coverage limits, exclusions, and any specific requirements or conditions related to skiing activities. Consider exploring travel insurance options from prov

### Prompt Template for Video chain 

In [18]:
# Prompt template for video script generation
video_prompt = PromptTemplate(
    template="""Goal:Generate 15-30sec video script based on custom knowledge base (Information below) and user query. Two components 1.Scene assets descriptions 2.Subtitle script 
    Custom knowledge base:{result}\n\nUsing the above information, generate a video script that addresses this user query:\n\n"{query}".\nReturn the generated video script in the style/format: Funny and sarcastic""",
    input_variables= ["result", "query"]
)
video_prompt

PromptTemplate(input_variables=['result', 'query'], output_parser=None, partial_variables={}, template='Goal:Generate 15-30sec video script based on custom knowledge base (Information below) and user query. Two components 1.Scene assets descriptions 2.Subtitle script \n    Custom knowledge base:{result}\n\nUsing the above information, generate a video script that addresses this user query:\n\n"{query}".\nReturn the generated video script in the style/format: Funny and sarcastic', template_format='f-string', validate_template=True)

In [19]:
video_prompt_style ="Funny and sarcastic"
video_prompt = PromptTemplate(
    template="""Goal:Generate 15-30sec video script based on custom knowledge base (Information below) and user query. Two components 1.Scene assets descriptions 2.Subtitle script 
    Custom knowledge base:{result}\n\nUsing the above information, generate a video script that addresses this user query:\n\n"{query}".\nReturn the generated video script in the style/format: """+video_prompt_style,
    input_variables= ["result", "query"]
)
video_prompt

PromptTemplate(input_variables=['result', 'query'], output_parser=None, partial_variables={}, template='Goal:Generate 15-30sec video script based on custom knowledge base (Information below) and user query. Two components 1.Scene assets descriptions 2.Subtitle script \n    Custom knowledge base:{result}\n\nUsing the above information, generate a video script that addresses this user query:\n\n"{query}".\nReturn the generated video script in the style/format: Funny and sarcastic', template_format='f-string', validate_template=True)

### Create the video Chain

In [14]:
json_schema = {
        "title": "generate_video_script",
        "description": "Generates 15-30sec video script based on custom knowledge base. Two components 1.Scene descriptions 2.Subtitle script",
        "type": "object",
        "properties": {
          "list_of_video_chunk": {
            "type": "array",
            "description": "List of video_chunk to be included in the video, one video chunk should last 3-5 seconds and is a dictionary with keys: 1. Scene  2. Subtitle",
            "items": {
              "type": "object"
            },
            "properties": {
              "scene": {
                "type": "string",
                "description": "Scene description for video should be visual and general"
              },
              "subtitles": {
                "type": "string",
                "description": "Funny and sarcastic video subtitles script for video"
              }
            },
            "required": ["scene", "subtitles"]
          }
        },"required": ["list_of_video_chunk"],
      }

In [15]:
video_chain2 = create_structured_output_chain(json_schema, llm_qa, video_prompt, verbose=True)

### Chain for video script generation

In [20]:
overall_chain = SequentialChain(chains=[qa_chain, video_chain2],input_variables=["query"])

In [275]:
wandb.init(project="Singlife", config=wandb_config)

In [21]:
response = overall_chain.run(query=query,callbacks=[Tracer])
response



[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mGoal:Generate 15-30sec video script based on custom knowledge base (Information below) and user query. Two components 1.Scene assets descriptions 2.Subtitle script 
    Custom knowledge base:Based on the provided context, it appears that the Singlife Corporate Travel Insurance policy is not suitable for a personal ski trip with your family. The policy is specifically designed for corporate travel purposes and may not provide the necessary coverage for leisure activities such as skiing.

For your ski trip to Japan, it is recommended to seek out a travel insurance policy that specifically covers recreational activities like skiing. Look for a policy that includes coverage for medical expenses related to skiing accidents, emergency medical evacuation, trip cancellation or interruption, and loss or damage to ski equipment.

It is importan

{'scene_assets_description': "The video starts with a family packing their bags for their ski trip to Japan. They are excited and energetic, wearing winter clothes and holding ski equipment. The scene transitions to a comical animation of a corporate office, with people in suits and ties. The animation shows a sign that says 'Singlife Corporate Travel Insurance'. The scene quickly changes to a beautiful snowy mountain in Japan, with skiers enjoying their time on the slopes. The video ends with the family happily skiing together and a text overlay that says 'Get the right coverage for your ski trip!'",
 'subtitle_script': "Subtitle 1: Family: We're going skiing in Japan!\nSubtitle 2: Narrator: But wait, not all travel insurance policies are created equal.\nSubtitle 3: Narrator: The Singlife Corporate Travel Insurance policy may not be suitable for your family ski trip.\nSubtitle 4: Narrator: You need a policy that covers recreational activities like skiing.\nSubtitle 5: Narrator: Look f

# Let the experiment begin

In [14]:
query = "I am travelling to Japan for a ski trip with my family next week.What kind of travel insurance coverage do we need?"

In [253]:
user_request = """Based on the Singlife Corporate Travel Insurance policy, you should consider the following coverage for your ski trip to Japan:
1. Accidental death, permanent disablement and burns benefit: This provides coverage in case of accidental injury during your trip.
2. Medical and medical evacuation: This is crucial for a ski trip as it covers any medical emergencies or injuries that may occur, including the cost of evacuation if necessary.
3. Trip cancellation: If there's a last-minute cancellation, you can receive coverage for non-refundable deposits or unused travel and accommodation costs.
4. Full terrorism cover: This offers a lump-sum payout if an unfortunate event occurs.
5. Delayed departure, missed departure or connection: This covers any additional expenses incurred due to delayed or missed flights.
6. Loss or damage of baggage and personal belongings: This provides coverage for lost or damaged personal items during your trip.
7. Rental vehicle excess: If you plan to rent a vehicle during your trip, this coverage can be beneficial.
8. COVID-19 Coverage: The policy covers trip interruptions or cancellations due to COVID-19 and covers the medical treatment if you contract COVID-19 during or after the trip.
Remember to choose a plan that best suits your family's needs. The Elite Plan offers comprehensive coverage for frequent travellers, while the Classic Plan covers the basics.

Using the above information, generate a video script that addresses this user query:I am travelling to Japan for a ski 
trip with my family next week. What kind of travel insurance coverage do we need? In the format of sarcastic and funny."""

# wandb trace
with wandb_tracing_enabled():
    first_response = llm_video_script

first_response

AttributeError: 'ChatOpenAI' object has no attribute 'run'

# Documentation testing ( Delete kalate )

In [77]:
from langchain.chains.openai_functions.openapi import get_openapi_chain

In [245]:
chain = get_openapi_chain(
    "https://www.klarna.com/us/shopping/public/openai/v0/api-docs/"
)
chain

Attempting to load an OpenAPI 3.0.1 spec.  This may result in degraded performance. Convert your OpenAPI spec to 3.1.* spec for better support.


SequentialChain(memory=None, callbacks=None, callback_manager=None, verbose=False, tags=None, metadata=None, chains=[LLMChain(memory=None, callbacks=None, callback_manager=None, verbose=False, tags=None, metadata=None, prompt=ChatPromptTemplate(input_variables=['query'], output_parser=None, partial_variables={}, messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['query'], output_parser=None, partial_variables={}, template="Use the provided API's to respond to this user query:\n\n{query}", template_format='f-string', validate_template=True), additional_kwargs={})]), llm=ChatOpenAI(cache=None, verbose=False, callbacks=None, callback_manager=None, tags=None, metadata=None, client=<class 'openai.api_resources.chat_completion.ChatCompletion'>, model_name='gpt-3.5-turbo-0613', temperature=0.7, model_kwargs={}, openai_api_key='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', openai_api_base='', openai_organization='', openai_proxy='', request_timeout=None, max_re

In [71]:
chain.run("What are some options for a men's large blue button down shirt")

{'products': [{'name': "Levi's Men's Classic Button-Down Shirt, Medium",
   'url': 'https://www.klarna.com/us/shopping/pl/cl10001/3202043708/Clothing/Levi-s-Men-s-Classic-Button-Down-Shirt-Medium/?utm_source=openai&ref-site=openai_plugin',
   'price': '$15.99',
   'attributes': ['Material:Cotton',
    'Target Group:Man',
    'Color:Blue,Purple,Black,Orange,Green',
    'Size:S,XL,L,M,XXL']},
  {'name': 'Cubavera Four Pocket Guayabera Shirt',
   'url': 'https://www.klarna.com/us/shopping/pl/cl10001/3202055522/Clothing/Cubavera-Four-Pocket-Guayabera-Shirt/?utm_source=openai&ref-site=openai_plugin',
   'price': '$24.99',
   'attributes': ['Material:Polyester,Cotton',
    'Target Group:Man',
    'Color:Red,White,Blue,Black',
    'Properties:Pockets',
    'Pattern:Solid Color',
    'Size:S,XL,L,M,XXL']},
  {'name': 'Short Sleeve Cotton Plaid Shirt',
   'url': 'https://www.klarna.com/us/shopping/pl/cl10001/3201940921/Clothing/Short-Sleeve-Cotton-Plaid-Shirt/?utm_source=openai&ref-site=openai_

In [33]:
llm  = ChatOpenAI(model_name="gpt-4-0613", temperature=1)
query = """Example Update summary:
Fully Custom Trained BERTopic Model for verification of disruption events from News Articles
    * Webscraped 2.5k news articles from 4 different sources + manually labeled test set 375 
    * Automated Grid search parameter tuning BERTopic model
    * Evaluation BERTopic model
Refactored code base
    *Modify the schema of Supabase
    *Bug-fix + Cleanup of technical debt
    *End-to-End testing using pytest for key functionalities 
Learnt OpenAI API / Langchain
    *Built custom-trained WhatsApp chatbot

    
Using that example, generate a summary of my work with the following details
Points to anotate down
HSC Everything
Points 
Fully read 2 research papers ( Imitate their work wholesale first ) 
Visualize the Map 
Understand the Gurobi, 
Domain knowledge on 

Mini-task:
Meeting to settle compatibility issue. First meeting with Liu Ning + Santosh. Since I've spent so long working out the compatibility, 
found the most promising method using torchscript c++ API to use ML model in c++ codebase.
My task was, given a completed traning script to develop ML model in sklearn. Ensure the model was serialized in Pytorch format instead. 
c++ TorchScript Libtorch


BERTopic Model improvements:
Use entire News article content to train instead of just the title
Re-trained BERTopic model with Both Title+Text data.
- Created another Evaluation Dataset to test 

Evaulated performace:
- Clusters Count for news article per topic became more distributed ( There's progress there )

LLM:
Custom trained my on personal data ( Can show demo )
I did alot of Pinecone
-Pinecone Query + OpenAI Text embedding
- Cleaned and scraped 5.7k PDFs
- Vectorized batched the pdfs into Pinecode DB

Mirxes project:
Got invited to join the actual Client Meeting with Mirxes ( Demo-ed the prototype to them ) 
Pivot-->Find how to make my output of mointoring disruption events to be useful

Data Analyst on Mirxes internal data
Show the mapping of the gastroclear graph"""

result = llm.predict(query)



AuthenticationError: Incorrect API key provided: sk-oWbZ7***************************************X6ta. You can find your API key at https://platform.openai.com/account/api-keys.

In [32]:
formatted_result = result.replace("emailAgent email:", "emailAgent email: \n")
# Save the result to a text file
with open("emailAgent.txt", "w") as f:
    f.write(result)

# Test the singlife class

In [2]:
# all the init is done in the Singlife class
singlife = Singlife()

index_description:  IndexDescription(name='singlife', metric='cosine', replicas=1, dimension=1536.0, shards=1, pods=1, pod_type='p1', status={'ready': True, 'state': 'Ready'}, metadata_config=None, source_collection='')
index_stats_response:  {'dimension': 1536,
 'index_fullness': 0.1,
 'namespaces': {'': {'vector_count': 3488}},
 'total_vector_count': 3488}
vectorstore created succesfully
Model successfully loaded: cache=None verbose=False callbacks=None callback_manager=None tags=None metadata=None client=<class 'openai.api_resources.chat_completion.ChatCompletion'> model_name='gpt-3.5-turbo-0613' temperature=0.0 model_kwargs={} openai_api_key='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' openai_api_base='' openai_organization='' openai_proxy='' request_timeout=None max_retries=6 streaming=False n=1 max_tokens=None tiktoken_model_name=None


In [3]:
query = "I am travelling to Japan for a ski trip with my family next week.What kind of travel insurance coverage do we need?"

In [4]:
result = singlife.generateScript(query=query, model_name="gpt-3.5-turbo-0613", video_style="Funny and sarcastic")
result

Time to initialize overall chain: -0.0009975433349609375


[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mGoal:Generate 15-30sec video script based on custom knowledge base (Information below) and user query. Two components 1.Scene assets descriptions 2.Subtitle script 
                Custom knowledge base:Based on the provided context, it appears that the Singlife Corporate Travel Insurance policy is specifically designed for corporate travel purposes. It includes coverage for various aspects such as trip cancellation, medical and medical evacuation, personal liability, baggage loss or damage, and other travel-related incidents.

However, since you are traveling for a ski trip with your family, it is important to note that the Singlife Corporate Travel Insurance may not provide specific coverage for leisure or personal trips. It is advisable to contact the insurance provider direc

{'scene_assets': 'Scene: A family packing their bags for a ski trip. They are excited and energetic.',
 'subtitle_script': "Subtitle: So, you're going on a ski trip with your family, huh? Well, buckle up and get ready for some insurance talk!",
 'video_script': "Scene: A family packing their bags for a ski trip. They are excited and energetic.\n\nSubtitle: So, you're going on a ski trip with your family, huh? Well, buckle up and get ready for some insurance talk!\n\nScene: Family members looking confused and worried.\n\nSubtitle: Now, here's the deal. The Singlife Corporate Travel Insurance policy may not be your best bet for this leisurely adventure.\n\nScene: Family members scratching their heads.\n\nSubtitle: It's designed for corporate travel, not family ski trips. Bummer, right?\n\nScene: Family members looking disappointed.\n\nSubtitle: But don't worry, we've got a solution for you!\n\nScene: Family members looking hopeful.\n\nSubtitle: You'll need to find a travel insurance poli