In [2]:
import os 
import sys
import json 
import pandas as pd 
import re

from langchain.document_loaders import JSONLoader,TextLoader
from langchain.text_splitter import CharacterTextSplitter

os.environ['OPENAI_API_KEY'] = ""

### Data Mortgage 

In [3]:
excel_path = "..//data/Data_Mortgage.xlsx"
excel_data = pd.read_excel(excel_path)
excel_data.dropna(subset='Opportunity',inplace=True)
excel_data.head()

Unnamed: 0,Date,Company / Account,Opportunity,Unnamed: 3,Lead,Assigned,Priority,Status,Task,Ameyo Recording URL,Call Type,CallDurationInSeconds
1,4/4/2023,Mohammed Jaffer,Mohammed Jaffer,Mohammed Jaffer,,Yaseen Syed Ali,Low,Completed,True,https://prypto-api.aswat.co/surveillance/recor...,Outbound,422
2,4/4/2023,G Abbas,G Abbas,G Abbas,,Yaseen Syed Ali,Low,Completed,True,https://prypto-api.aswat.co/surveillance/recor...,Outbound,237
6,4/4/2023,Ahsan Khan,Ahsan Khan,Ahsan Khan,,Yaseen Syed Ali,Low,Completed,True,https://prypto-api.aswat.co/surveillance/recor...,Outbound,74
11,4/5/2023,Fayiqa Iftikhar,Fayiqa Iftikhar,Fayiqa Iftikhar,,Yaseen Syed Ali,Low,Completed,True,https://prypto-api.aswat.co/surveillance/recor...,Outbound,481
13,4/5/2023,Smith Suresh Shetty,Smith Suresh Shetty,Smith Suresh Shetty,,Yaseen Syed Ali,Low,Completed,True,https://prypto-api.aswat.co/surveillance/recor...,Outbound,269


In [4]:
### Read The Json data of recordings
data_path = "../data/Processed_data/Audio_data.json"
with open(data_path,'r+') as fd:
    audio_data = json.load(fd)
fd.close()

### Extract all the records for Sales Executive

For example : ABC had converastions with multiple customers so giving this record to data Loader 

In [5]:
name_of_executive = 'Juraira Manzoor'
df_executive = excel_data[excel_data['Assigned']==name_of_executive]
print(df_executive.shape)
df_executive.head()

(59, 12)


Unnamed: 0,Date,Company / Account,Opportunity,Unnamed: 3,Lead,Assigned,Priority,Status,Task,Ameyo Recording URL,Call Type,CallDurationInSeconds
452,5/3/2023,Hassan .,Hassan .,Hassan .,,Juraira Manzoor,Low,Completed,True,https://prypto-api.aswat.co/surveillance/recor...,Outbound,193
453,5/3/2023,Rimantas macevicius,Rimantas macevicius,Rimantas macevicius,,Juraira Manzoor,Low,Completed,True,https://prypto-api.aswat.co/surveillance/recor...,Outbound,192
465,5/4/2023,Maryam Rasaei,Maryam Rasaei,Maryam Rasaei,,Juraira Manzoor,Low,Completed,True,https://prypto-api.aswat.co/surveillance/recor...,Outbound,512
467,5/4/2023,Mohammed Larik,Mohammed Larik,Mohammed Larik,,Juraira Manzoor,Low,Completed,True,https://prypto-api.aswat.co/surveillance/recor...,Outbound,536
469,5/4/2023,Ahmad B,Ahmad B,Ahmad B,,Juraira Manzoor,Low,Completed,True,https://prypto-api.aswat.co/surveillance/recor...,Outbound,300


In [10]:
conversation_dict = dict()
conversation_dict['data']=list()
Conversation = []
customers = []

for records in audio_data['data']:
    if records['relationship_manager'] == 'Jurara':
        text = re.sub("\s\s+", " ", records['text'])
        customer = records['customer']
        text = "conversation with Customer {}:{}".format(customer,text)

        conversation_dict['data'].append({
                'name':name_of_executive,
                'text':text,
                'customer':records['customer'],
                'language':records['language']
            })

conversation_dict

with open('../data/tmp.json','w+') as fd:
    json.dump(conversation_dict,fd)


### Load data as JsonLoader

In [11]:


def metadata_func(record: dict, metadata: dict) -> dict:
    metadata['customer'] = record.get('customer')
    metadata['language'] = record.get('language')
    return metadata

loader = JSONLoader(
    file_path='../data/tmp.json',
    jq_schema='.data[]',
    content_key="text",
    metadata_func=metadata_func
)
texts = loader.load()


### Load Embeddings to Vector Database

In [12]:
import openai
import os 
from langchain.embeddings.base import Embeddings
from typing import List
from sentence_transformers import SentenceTransformer
from langchain.embeddings.openai import OpenAIEmbeddings

In [18]:
class LocalHuggingFaceEmbeddings(Embeddings):
    def __init__(self, model_id): 
        # Should use the GPU by default
        self.model = SentenceTransformer(model_id)
        
    def embed_documents(self, texts: List[str]) -> List[List[float]]:
        """Embed a list of documents using a locally running
           Hugging Face Sentence Transformer model
        Args:
            texts: The list of texts to embed.
        Returns:
            List of embeddings, one for each text.
        """
        embeddings =self.model.encode(texts)
        return embeddings

    def embed_query(self, text: str) -> List[float]:
        """Embed a query using a locally running HF 
        Sentence trnsformer. 
        Args:
            text: The text to embed.
        Returns:
            Embeddings for the text.
        """
        embedding = self.model.encode(text)
        return list(map(float, embedding))

huggingface_embeddings = LocalHuggingFaceEmbeddings('multi-qa-mpnet-base-dot-v1')

In [18]:
from langchain.vectorstores import FAISS

vectorstore =  FAISS.from_documents(documents=texts,
                                    embedding = huggingface_embeddings
                                    ) # turn dcos into Vectors and store them in RAM also add metadata 
vectorstore.save_local('../data/faiss_dmac_gpt_exp')

### Chat Query with ConversationalRetrievalChain

In [15]:
import os
from dotenv import load_dotenv
from langchain.llms import OpenAI
from langchain.prompts import PromptTemplate
from langchain.chains.question_answering import load_qa_chain
from langchain.chains import ConversationalRetrievalChain
from langchain.vectorstores import FAISS
from langchain.chains import LLMChain
from langchain.chains.conversational_retrieval.prompts import CONDENSE_QUESTION_PROMPT

In [None]:
chat = OpenAI(temperature=0)
# memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
vectordb = FAISS.load_local('../data/faiss_dmac_gpt_exp/',embeddings=huggingface_embeddings)

### Create Prompt

In [6]:
def create_prompt():
    prompt_template = """
    Analyze conversations between customer and sales executive from context.
    If customer shows interest in service or Property , conversation is a potential lead.  
    Always answer point wise with person names. Don't make up answers
   
    {context}
   
    {chat_history}
   
    Question: {question}
    Answer stepwise: 
    """
    prompt = PromptTemplate(input_variables=["context", "question", "chat_history"], template=prompt_template)
    return prompt


In [52]:
question_generator = LLMChain(llm=chat, prompt=create_prompt())
doc_chain = load_qa_chain(chat,chain_type='map_reduce')

chain = ConversationalRetrievalChain.from_llm(
                                    llm = chat,
                                    retriever=vectordb.as_retriever(),
                                    # question_generator=question_generator,
                                    combine_docs_chain_kwargs={"prompt": create_prompt()},
                                    )
chat_history = []


### Query with customers 

In [53]:
query = "What is Ms. Emmy Interested in? "
result = chain({"question": query, "chat_history": chat_history})

print("query:{}".format(result['question']))
print("response:\n{}".format(result['answer']))
chat_history.append((query,result['answer']))

query:What is Ms. Emmy Interested in? 
response:

    Ms. Emmy is interested in taking a mortgage for a one bedroom apartment.


In [54]:
print(chat_history)

[('What is Ms. Emmy Interested in? ', '\n    Ms. Emmy is interested in taking a mortgage for a one bedroom apartment.')]


In [55]:
query = "How much mortage is she talking about ?"
result = chain({"question": query, "chat_history": chat_history})
print("query:{}".format(result['question']))
print("response:\n{}".format(result['answer']))
chat_history.append((query,result['answer']))

query:How much mortage is she talking about ?
response:

Ms. Emmy has not yet finalized the mortgage amount for her one bedroom apartment. She will need to speak with a mortgage advisor to determine the amount.


In [56]:
query = "What are the next steps for her  ?"
result = chain({"question": query, "chat_history": chat_history})
print("query:{}".format(result['question']))
print("response:\n{}".format(result['answer']))
chat_history.append((query,result['answer']))

query:What are the next steps for her  ?
response:

1. Ms. Emmy should speak with a mortgage advisor to discuss her eligibility and the amount of mortgage she can take. 
2. The mortgage advisor will ask her to provide documents such as bank statements, passport, and other relevant documents. 
3. The mortgage advisor will then provide her with a pre-approval and she will have 45 days to avail it. 
4. After the pre-approval is finalized, Ms. Emmy can then proceed to finalize the mortgage amount for her one bedroom apartment.


In [8]:
query = "What is the value he looking for ?"
result = chain({"question": query, "chat_history": chat_history})
print("query:{}".format(result['question']))
print("response:\n{}".format(result['answer']))
chat_history.append((query,result['answer']))

query:What is the value he looking for ?
response:
 Majeed is interested in a mortgage value between 850,000 and 900,000.


In [16]:
from langchain.chat_models import ChatOpenAI
from langchain.embeddings.openai import OpenAIEmbeddings
docsearch = FAISS.load_local("../vector_embeddings/faiss_damac_audio_data_openai/", embeddings=OpenAIEmbeddings())
prompt = create_prompt()
chat = ChatOpenAI(verbose=True, temperature=0)

qa = ConversationalRetrievalChain.from_llm(llm=chat,
                                            retriever=docsearch.as_retriever(),
                                            combine_docs_chain_kwargs={"prompt": prompt},
                                            max_tokens_limit=4097
                                            )

In [9]:
chat_history = []
generated_response = qa({'question':'summary of conversations',"chat_history":chat_history})
formatted_response = f"{generated_response['answer']} \n\n"


In [10]:
print(formatted_response)

1. Conversation between Madame Franjia and Monsieur Saddi Khenazim in French language.
2. Madame Franjia was disconnected with Mr. Saddi Khenazim and there was a connection problem.
3. Monsieur Saddi Khenazim is a relationship manager.
4. It is unclear what the conversation was about as it was interrupted by the disconnection.
5. No potential lead check was done as the conversation was not related to services or property.

1. Conversation between شرام and ساس محمود in Arabic language.
2. شرام is interested in discussing real estate services and investments.
3. They were trying to reach a number from Union Transaction to the Agency.
4. The conversation was interrupted and it is unclear what the outcome was.
5. No potential lead check was done as the conversation was related to real estate investments.

1. Conversation between Ms. Honey and Mahsan in English language.
2. Mahsan is a relationship manager from Pripco Services.
3. Ms. Honey was referred by Mr. Gian Fernando for a mortgage f

In [21]:
from langchain.chat_models import ChatOpenAI
from langchain.embeddings.openai import OpenAIEmbeddings
docsearch = FAISS.load_local("../vector_embeddings/faiss_damac_audio_data_hugging_face/", 
                             embeddings=huggingface_embeddings)
prompt = create_prompt()
chat = ChatOpenAI(verbose=True, temperature=0)

qa = ConversationalRetrievalChain.from_llm(llm=chat,
                                            retriever=docsearch.as_retriever(),
                                            combine_docs_chain_kwargs={"prompt": prompt},
                                            max_tokens_limit=4097
                                            )

In [22]:
chat_history = []
generated_response = qa({'question':'summary of conversations',"chat_history":chat_history})
formatted_response = f"{generated_response['answer']} \n\n"
print(formatted_response)

Retrying langchain.chat_models.openai.ChatOpenAI.completion_with_retry.<locals>._completion_with_retry in 1.0 seconds as it raised APIConnectionError: Error communicating with OpenAI: ('Connection aborted.', RemoteDisconnected('Remote end closed connection without response')).


1. Jyot from Pripko Services calls Madam Fatima regarding her interest in mortgage.
2. Jyot asks Madam Fatima if she is a resident in UAE and her age (28).
3. Madam Fatima confirms that she is a salaried employee and has been working for four years.
4. Jyot asks if Madam Fatima has any liabilities like credit cards, personal loans, or car loans, to which she responds negatively.
5. Jyot asks about Madam Fatima's nationality (Emirati) and if she is looking for a property, to which she responds that she hasn't decided yet.
6. Madam Fatima confirms that this would be her first property and that she is not yet ready to discuss the price.
7. Jyot schedules a call back for Monday next week at the same time. 
8. In the second conversation, Monsieur Saddi Khenazim from an unknown company calls Madame Franjia, but the call gets disconnected twice due to a connection problem. 




In [8]:
# from glob import glob 

# filenames = glob("../data/Audio/*.json")


# def merge_JsonFiles(filename):
#     result = {'conversation data':[]}
#     for f1 in filename:
#         with open(f1, 'r') as infile:
#             data = json.load(infile)
#         print(f1,len(data['data']))
#         # result['conversation data'].append(data['data'][0])

#     # with open('counseling3.json', 'w') as output_file:
#     #     json.dump(result, output_file,indent=4)

# merge_JsonFiles(filenames)