In [1]:
import os 
import sys
import json 
import pandas as pd 
import re

### Data Mortgage 

In [2]:
excel_path = "/mnt/e/Personal/Samarth/repository/NLP-Basic2Advanced/DMAC_project/data/Data_Mortgage.xlsx"
excel_data = pd.read_excel(excel_path)
excel_data.dropna(subset='Opportunity',inplace=True)
excel_data.head()

Unnamed: 0,Date,Company / Account,Opportunity,Unnamed: 3,Lead,Assigned,Priority,Status,Task,Ameyo Recording URL,Call Type,CallDurationInSeconds
1,4/4/2023,Mohammed Jaffer,Mohammed Jaffer,Mohammed Jaffer,,Yaseen Syed Ali,Low,Completed,True,https://prypto-api.aswat.co/surveillance/recor...,Outbound,422
2,4/4/2023,G Abbas,G Abbas,G Abbas,,Yaseen Syed Ali,Low,Completed,True,https://prypto-api.aswat.co/surveillance/recor...,Outbound,237
6,4/4/2023,Ahsan Khan,Ahsan Khan,Ahsan Khan,,Yaseen Syed Ali,Low,Completed,True,https://prypto-api.aswat.co/surveillance/recor...,Outbound,74
11,4/5/2023,Fayiqa Iftikhar,Fayiqa Iftikhar,Fayiqa Iftikhar,,Yaseen Syed Ali,Low,Completed,True,https://prypto-api.aswat.co/surveillance/recor...,Outbound,481
13,4/5/2023,Smith Suresh Shetty,Smith Suresh Shetty,Smith Suresh Shetty,,Yaseen Syed Ali,Low,Completed,True,https://prypto-api.aswat.co/surveillance/recor...,Outbound,269


In [3]:
excel_data.Assigned.unique()

array(['Yaseen Syed Ali', 'Michelle Machado', 'Aymen Aydi',
       'Naila Samreen', 'Nazygul Bektenova', 'Khadidja Ghodbane',
       'Rohan Sankath', 'Harvinder Singh', 'Ehson Odinaev',
       'Juraira Manzoor', 'Abdellatif Laribi', 'Jude Forghema'],
      dtype=object)

In [None]:
### Read The Json data of recordings
data_path = "../data/Audio_data.json"
with open(data_path,'r+') as fd:
    audio_data = json.load(fd)
fd.close()

### Extract all the records for Sales Executive

For example : Juraira had converastions with multiple customers so giving this record to data Loader 

In [None]:
name_of_executive = 'Juraira Manzoor'
df_executive = excel_data[excel_data['Assigned']==name_of_executive]
print(df_executive.shape)
df_executive.head()


In [None]:
conversation_dict = dict()

conversation_dict['data']=list()
Conversation = []
customers = []

for records in audio_data['Data']:
    if records['content']['Assigned'] == name_of_executive:
        text = re.sub("\s\s+", " ", records['content']['text'])
        customer = records['content']['Customer']
        text = "conversation with Customer {}:{}".format(customer,text)

        conversation_dict['data'].append({
                'name':name_of_executive,
                'text':text,
                'customer':records['content']['Customer'],
                'date':''
            })

with open("../data/tmp.json",'w+') as fc:
    json.dump(conversation_dict,fc,indent=4)

# with open('../data/tmp.txt','w+') as fp:
#     for lines,name in zip(Conversation,customers):
#         lines = re.sub("\s\s+", " ", lines)
#         fp.writelines("Conversation with customer {} :{}\n".format(name,lines))


### Load data as JsonLoader

In [5]:
from langchain.document_loaders import JSONLoader,TextLoader
from langchain.text_splitter import CharacterTextSplitter
def metadata_func(record: dict, metadata: dict) -> dict:
    metadata['customer'] = record.get('customer')
    metadata['date'] = record.get('date')
    return metadata

loader = JSONLoader(
    file_path='../data/tmp.json',
    jq_schema='.data[]',
    content_key="text",
    metadata_func=metadata_func
)
texts = loader.load()


### Load Embeddings to Vector Database

In [1]:
from langchain.embeddings.base import Embeddings
from typing import List
from sentence_transformers import SentenceTransformer
from langchain.embeddings.openai import OpenAIEmbeddings

import openai
import os 
os.environ['OPENAI_API_KEY'] = "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" 

In [2]:


class LocalHuggingFaceEmbeddings(Embeddings):
    def __init__(self, model_id): 
        # Should use the GPU by default
        self.model = SentenceTransformer(model_id)
        
    def embed_documents(self, texts: List[str]) -> List[List[float]]:
        """Embed a list of documents using a locally running
           Hugging Face Sentence Transformer model
        Args:
            texts: The list of texts to embed.
        Returns:
            List of embeddings, one for each text.
        """
        embeddings =self.model.encode(texts)
        return embeddings

    def embed_query(self, text: str) -> List[float]:
        """Embed a query using a locally running HF 
        Sentence trnsformer. 
        Args:
            text: The text to embed.
        Returns:
            Embeddings for the text.
        """
        embedding = self.model.encode(text)
        return list(map(float, embedding))

# local_embeddings = LocalHuggingFaceEmbeddings('multi-qa-mpnet-base-dot-v1')

In [6]:
from langchain.vectorstores import FAISS
vectorstore =  FAISS.from_documents(documents=texts,
                                    embedding = OpenAIEmbeddings()
                                    ) # turn dcos into Vectors and store them in RAM also add metadata 

vectorstore.save_local('../data/faiss_dmac_gpt')

### Chat Query 

In [2]:
import os
from langchain.llms import OpenAI
from langchain.chains.question_answering import load_qa_chain
from langchain.chains import ConversationalRetrievalChain
from langchain.vectorstores import FAISS
from langchain.chains import LLMChain
from langchain.chains.conversational_retrieval.prompts import CONDENSE_QUESTION_PROMPT

chat = OpenAI(temperature=0)
# memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
vectordb = FAISS.load_local('../data/faiss_dmac_gpt/',embeddings=OpenAIEmbeddings())


In [3]:
question_generator = LLMChain(llm=chat, prompt=CONDENSE_QUESTION_PROMPT)
doc_chain = load_qa_chain(chat,chain_type='map_reduce')

chain = ConversationalRetrievalChain(retriever=vectordb.as_retriever(),question_generator=question_generator,
                                           combine_docs_chain=doc_chain
                                           )
chat_history = []


In [4]:
query = "What is Majeed Interested in? "
result = chain({"question": query, "chat_history": chat_history})

print("query:{}".format(result['question']))
print("response:\n{}".format(result['answer']))
chat_history.append((query,result['answer']))

query:What is Majeed Interested in? 
response:
 Majeed is interested in taking a mortgage.


In [5]:
query = "How much mortage is he talking about ?"
result = chain({"question": query, "chat_history": chat_history})
print("query:{}".format(result['question']))
print("response:\n{}".format(result['answer']))
chat_history.append((query,result['answer']))

query:How much mortage is he talking about ?
response:
 Majeed is interested in taking a mortgage between 850,000 and 900,000.


In [7]:
query = "What are the next steps for him ?"
result = chain({"question": query, "chat_history": chat_history})
print("query:{}".format(result['question']))
print("response:\n{}".format(result['answer']))
chat_history.append((query,result['answer']))

query:What are the next steps for him ?
response:
 Majeed's next steps regarding his mortgage are to provide the necessary documents to the mortgage advisor, who will then provide multiple offers from different banks for Majeed to choose from.


In [8]:
query = "What is the value he looking for ?"
result = chain({"question": query, "chat_history": chat_history})
print("query:{}".format(result['question']))
print("response:\n{}".format(result['answer']))
chat_history.append((query,result['answer']))

query:What is the value he looking for ?
response:
 Majeed is interested in a mortgage value between 850,000 and 900,000.


In [8]:
from glob import glob 

filenames = glob("../data/Processed_data/*.json")


def merge_JsonFiles(filename):
    result = {'conversation data':[]}
    for f1 in filename:
        with open(f1, 'r') as infile:
            data = json.load(infile)
        
        result['conversation data'].append(data['data'])

    with open('counseling3.json', 'w') as output_file:
        json.dump(result, output_file,indent=4)

merge_JsonFiles(filenames)

In [19]:
audio_data = "../data/Audio_data_2.json"
with open(audio_data,'r') as fd :
    data = json.load(fd)

conversation_dict = dict()


person = "Yaseen Syed Ali"
for record in  data['Data']:
    if record['content']['Assigned'] == person:
        print(record['content']['text'])
    

 Hello. Hello. Hiya. Hi, good evening Mr. Likur Rehman. Yes, yes this is, yes. We are seeing calling from Pripko, how are you doing today? Good, thank you. What company are you calling from? Pripko, I am calling you in regards to the mortgage assistance. As we referred your details from Daria. Okay, yeah. From that you are planning to purchase the property in UAE and you are looking for mortgage assistance? Yes, yes I am actually, yeah. So wonderful. I am just calling to get some information like a basic details to understand the qualification. Then I can put you across with one of the mortgage advisor who will be helping you to get the detailed information about the mortgage values and interest rate, everything. So I like to know. Can I speak to them? I have a business here and I am really strapped for time. So I can give you some basic information. Can you put me through to them and I will give you guys information at once. So I have got a business here and I am keen to invest some m