In [4]:
#Import the libraries
import os
import fitz
import json
import base64
import requests
from PIL import Image
import openai
import pandas as pd
import numpy as np


In [2]:
# OpenAI API Key
api_key = '<openai-api-key>'

In [3]:
# Function to encode the image
def encode_image(image_path):
    with open(image_path, "rb") as image_file:
        return base64.b64encode(image_file.read()).decode('utf-8')

In [4]:
# final extract table
# this function takes the images extracted as input and returns the content as a JSON Object for clarity using GPT-4o
def extract_table(image_path):

    base64_image = encode_image(image_path)
    print(image_path)
    headers = {
        "Content-Type": "application/json",
        "Authorization": f"Bearer {api_key}"
    }

    payload = {
        "model": "gpt-4o",
        "messages": [
            {
            "role": "user",
            "content": [
                {
                "type": "text",
                "text": "Create a json object of the given table"
                },
                {
                "type": "image_url",
                "image_url": {
                    "url": f"data:image/jpeg;base64,{base64_image}"
                }
                }
            ]
            }
        ],
        "max_tokens": 1000
        }

    response = requests.post("https://api.openai.com/v1/chat/completions", headers=headers, json=payload)

    result = response.json()
    print("result:",result)  

    # Returns the content from the Response
    if 'choices' in result and len(result['choices']) > 0:
        return result['choices'][0]['message']['content']
    else:
        print("Error: 'choices' not found in the response or empty choices list")
        return None

In [5]:
# Function to replace the tables in the PDF with the JSON Object created using extract_tables
def extract_and_replace_tables(pdf_file, output_folder, table_output_folder):

    # Output folder with Modified PDF
    os.makedirs(output_folder, exist_ok=True)

    if pdf_file.lower().endswith('.pdf'):
        pdf_path = pdf_file
        print(f"Processing PDF: {pdf_path}")

        # Image folder to store images of tables for the PDF
        image_folder = os.path.join(table_output_folder, f"{os.path.splitext(os.path.basename(pdf_file))[0]}_images")
        os.makedirs(image_folder, exist_ok=True)

        doc = fitz.open(pdf_path)

        for page_number in range(doc.page_count):
            page = doc[page_number]
            tabs = page.find_tables()

            for table_number, table in enumerate(tabs): 
                table_data = table.extract() 
                # The position of the table in the PDF 
                table_bbox = table.bbox

                # Table Area is converted to an image
                table_rect = fitz.Rect(table_bbox) 
                table_image = page.get_pixmap(clip=table_rect)
                img = Image.frombytes("RGB", [table_image.width, table_image.height], table_image.samples)

                # Saving the table image in the table_output folder 
                image_name = f"table_{page_number}_{table_number}.png"
                image_path = os.path.join(image_folder, image_name)
                img.save(image_path)

                extracted_table_dict = extract_table(image_path)
                print("extracted_table_dict:", extracted_table_dict)

                if extracted_table_dict != {}:
                    # Redaction annotation added to hide the original table 
                    page.add_redact_annot(table_bbox, fill=(1, 1, 1))
                    page.apply_redactions(images=fitz.PDF_REDACT_IMAGE_NONE)

                    # The table is replaced with the extracted content as JSON and json file id at the table's position
                    table_id = f"<$table_{page_number}-{table_number}$>"
                    page.insert_textbox(table_bbox, json.dumps(f"{table_id}, 'table_data':{extracted_table_dict}"), fontsize=8)

        # Modified PDF saved
        modified_pdf_path = os.path.join(output_folder, os.path.basename(pdf_file))

        doc.save(modified_pdf_path)
        doc.close()
        return modified_pdf_path




In [48]:
# File Paths
pdf_file = r"C:\Users\Dell\Downloads\Athina\PDF-Chatbot-RAG\backend\policy-booklet.pdf"
output_folder = r"C:\Users\Dell\Downloads\Athina\PDF-Chatbot-RAG\backend\output"
table_output_folder = r"C:\Users\Dell\Downloads\Athina\PDF-Chatbot-RAG\backend\table_output"

# Function
extract_and_replace_tables(pdf_file, output_folder, table_output_folder)

Processing PDF: C:\Users\Dell\Downloads\Athina\PDF-Chatbot-RAG\backend\policy-booklet.pdf
C:\Users\Dell\Downloads\Athina\PDF-Chatbot-RAG\backend\table_output\policy-booklet_images\table_7_0.png
result: {'id': 'chatcmpl-9YELohwZlyM4tIcDKzgFvgjSr3jzA', 'object': 'chat.completion', 'created': 1717945344, 'model': 'gpt-4o-2024-05-13', 'choices': [{'index': 0, 'message': {'role': 'assistant', 'content': 'Sure, here is the JSON object representation of the table:\n\n```json\n{\n  "Section 1: Liability": {\n    "Injuries to other people": {\n      "Third Party, Fire and Theft": "",\n      "Essentials": "✔️ Unlimited",\n      "Comprehensive": "✔️ Unlimited",\n      "Comprehensive Plus": "✔️ Unlimited"\n    },\n    "Property damage": {\n      "Third Party, Fire and Theft": "✔️ £20,000,000 per accident (includes all costs and expenses)",\n      "Essentials": "",\n      "Comprehensive": "✔️ £20,000,000 per accident (includes all costs and expenses)",\n      "Comprehensive Plus": "✔️ £20,000,000 p

'C:\\Users\\Dell\\Downloads\\Athina\\PDF-Chatbot-RAG\\backend\\output\\policy-booklet.pdf'

In [6]:
import fitz  

# Text is extracted from the PDF using fitz
def extract_text_from_pdf(pdf_path):
    text = ""
    pdf_document = fitz.open(pdf_path)
    for page_num in range(pdf_document.page_count):
        page = pdf_document.load_page(page_num)
        text += page.get_text()
    pdf_document.close()
    return text

DOC_PATH = r"C:\Users\Dell\Downloads\Athina\PDF-Chatbot-RAG\backend\output\policy-booklet.pdf"
text = extract_text_from_pdf(DOC_PATH)
print(text[:30000])

Your car insurance 
policy booklet
Page 2
FAQs	
3
Glossary 	
4
Making a claim	
6
What your cover includes	
8
Section 1: Liability 	
11
Section 2: Fire and theft 	
14
Section 3: Courtesy car 	
17
Section 4: Accidental damage 	
18
Section 5: Windscreen damage 	
20
Section 6: Personal benefits 	
21
Section 7: Motor Legal Cover 	
23
Section 8: Guaranteed Hire Car Plus 	
28
Section 9: Protected No Claim Discount	
30
Where you can drive	
31
Losses we don’t cover 	
33
Other conditions you need to know about	
36
How the policy works 	
37
Everything else 	
41
If you have a complaint 	
42
If you’re in an accident	
43
How to get in touch 	
Back cover
Contents
Welcome to Churchill
This booklet tells you about your car insurance
About the policy
The policy is made up of:
	
> This booklet.
	
> Your car insurance details.
	
> Your certificate (or certificates)  
of motor insurance.
If the policy includes Green Flag breakdown cover:
	
> Your breakdown cover and your car 
insurance are part of the same

In [7]:
# Dictionary mapping Unicode characters to their meanings
# Symbols like tick mark and cross mark are replaced with "Yes" and "No"
unicode_meanings = {
    r"\u00a": "£",
    r"\u2714\ufe0f": "Yes, ",
    r"\u274c": "No, "
}

# Unicode characters replaced
for unicode_char, meaning in unicode_meanings.items():
    text = text.replace(unicode_char, meaning)

print(text)


Your car insurance 
policy booklet
Page 2
FAQs	
3
Glossary 	
4
Making a claim	
6
What your cover includes	
8
Section 1: Liability 	
11
Section 2: Fire and theft 	
14
Section 3: Courtesy car 	
17
Section 4: Accidental damage 	
18
Section 5: Windscreen damage 	
20
Section 6: Personal benefits 	
21
Section 7: Motor Legal Cover 	
23
Section 8: Guaranteed Hire Car Plus 	
28
Section 9: Protected No Claim Discount	
30
Where you can drive	
31
Losses we don’t cover 	
33
Other conditions you need to know about	
36
How the policy works 	
37
Everything else 	
41
If you have a complaint 	
42
If you’re in an accident	
43
How to get in touch 	
Back cover
Contents
Welcome to Churchill
This booklet tells you about your car insurance
About the policy
The policy is made up of:
	
> This booklet.
	
> Your car insurance details.
	
> Your certificate (or certificates)  
of motor insurance.
If the policy includes Green Flag breakdown cover:
	
> Your breakdown cover and your car 
insurance are part of the same

In [8]:
text

'Your car insurance \npolicy booklet\nPage 2\nFAQs\t\n3\nGlossary \t\n4\nMaking a claim\t\n6\nWhat your cover includes\t\n8\nSection 1: Liability \t\n11\nSection 2: Fire and theft \t\n14\nSection 3: Courtesy car \t\n17\nSection 4: Accidental damage \t\n18\nSection 5: Windscreen damage \t\n20\nSection 6: Personal benefits \t\n21\nSection 7: Motor Legal Cover \t\n23\nSection 8: Guaranteed Hire Car Plus \t\n28\nSection 9: Protected No Claim Discount\t\n30\nWhere you can drive\t\n31\nLosses we don’t cover \t\n33\nOther conditions you need to know about\t\n36\nHow the policy works \t\n37\nEverything else \t\n41\nIf you have a complaint \t\n42\nIf you’re in an accident\t\n43\nHow to get in touch \t\nBack cover\nContents\nWelcome to Churchill\nThis booklet tells you about your car insurance\nAbout the policy\nThe policy is made up of:\n\t\n> This booklet.\n\t\n> Your car insurance details.\n\t\n> Your certificate (or certificates)  \nof motor insurance.\nIf the policy includes Green Flag brea

In [8]:
import fitz  
import re
import spacy

nlp = spacy.load("en_core_web_sm")

# Chunking of text
# If there is a table, it will be in one chunk only, starts with "<$table_" and ends before "Page"
# Splits the text into chunks of 500 characters while ensuring that complete sentences are maintained within each chunk
def chunk_text(text, chunk_size=500):
    chunks = []
    current_chunk = ""
    i = 0
    length = len(text)

    while i < length:
        table_start = text.find("<$table_", i)

        if table_start == -1 or table_start >= i + chunk_size:
            end = min(i + chunk_size, length)
            current_chunk += text[i:end]
            i = end
            
            doc = nlp(current_chunk)
            sentences = [sent.text for sent in doc.sents]
            temp_chunk = ""
            for sentence in sentences:
                if len(temp_chunk) + len(sentence) > chunk_size:
                    chunks.append(temp_chunk.strip())
                    temp_chunk = sentence
                else:
                    temp_chunk += sentence
            current_chunk = temp_chunk  
        else:
            if current_chunk.strip():
                chunks.append(current_chunk.strip())
                current_chunk = ""

            if table_start > i:
                current_chunk += text[i:table_start]
                doc = nlp(current_chunk)
                sentences = [sent.text for sent in doc.sents]
                temp_chunk = ""
                for sentence in sentences:
                    if len(temp_chunk) + len(sentence) > chunk_size:
                        chunks.append(temp_chunk.strip())
                        temp_chunk = sentence
                    else:
                        temp_chunk += sentence
                if temp_chunk.strip():
                    chunks.append(temp_chunk.strip())
                current_chunk = ""
                i = table_start

            table_end = text.find("Page", table_start)
            if table_end == -1:
                table_end = length
            else:
                table_end += len("Page")

            chunks.append(text[table_start:table_end].strip())
            i = table_end

    if current_chunk.strip():
        chunks.append(current_chunk.strip())

    return chunks

chunks = chunk_text(text)

for i, chunk in enumerate(chunks):
    print(f"Chunk {i+1}:\n{chunk}\n{'-'*50}\n")


Chunk 1:

--------------------------------------------------

Chunk 2:
Your car insurance 
policy booklet
Page 2
FAQs	
3
Glossary 	
4
Making a claim	
6
What your cover includes	
8
Section 1: Liability 	
11
Section 2: Fire and theft 	
14
Section 3: Courtesy car 	
17
Section 4: Accidental damage 	
18
Section 5: Windscreen damage 	
20
Section 6: Personal benefits 	
21
Section 7: Motor Legal Cover 	
23
Section 8: Guaranteed Hire Car Plus 	
28
Section 9: Protected No Claim Discount	
30
Where you can drive	
31
Losses we don’t cover 	
33
Other conditions you need to knowabout	
36
How the policy works 	
37
Everything else 	
41
If you have a complaint 	
42
If you’re in an accident	
43
How to get in touch 	
Back cover
Contents
Welcome to Churchill
This booklet tells you about your car insurance
About the policy
--------------------------------------------------

Chunk 3:
The policy is made up of:
	
>This booklet.
	
>Your car insurance details.
	
> Your certificate (or certificates)  
of motor in

In [5]:
from openai import OpenAI
client = OpenAI(api_key= api_key)

# Get embeddings of text using openai
def get_embedding(text, model="text-embedding-ada-002"):
    text = text.replace("\n", " ")
    response = client.embeddings.create(input = [text], model=model)
    # response = openai.Embedding.create(model=model, input=text) 
    return response.data[0].embedding



In [10]:
# Generating embeddings for each chunk
embeddings = []
for chunk in chunks:
    embedding = get_embedding(chunk)
    embeddings.append(embedding)

# DataFrame to store the chunks and their embeddings
df = pd.DataFrame({
    'text_chunk': chunks,
    'embedding': embeddings
})

In [11]:
print(df)
# Text Chunks and their embeddings stored
df.to_csv('embeddings_policy-booklet.csv', index=False)

                                            text_chunk  \
0                                                        
1    Your car insurance \npolicy booklet\nPage 2\nF...   
2    The policy is made up of:\n\t\n>This booklet.\...   
3    Please read all these documents carefully  \na...   
4    Your cover is the same as a  \nComprehensive p...   
..                                                 ...   
231  If this happens, we want you \nto tell us.We’l...   
232  Our staff will do everything they can to suppo...   
233                             lve your complaint.\n"   
234  <$table_41-0$>, 'table_data':Here's a JSON obj...   
235  43\nIf you’re in an accident\nNeed to claim?\n...   

                                             embedding  
0    [0.0015564068453386426, -0.01698176935315132, ...  
1    [0.0017841552617028356, 0.015992281958460808, ...  
2    [-0.00417745066806674, 0.012323152273893356, 0...  
3    [-0.014140964485704899, 0.020749015733599663, ...  
4    [-0.008116209

In [4]:
# Embeding the question
# Sample Query
question = "What is the procedure for making a claim?"
question_vector = get_embedding(question, model='text-embedding-ada-002')

In [12]:
import ast
df = pd.read_csv('embeddings_policy-booklet.csv')

# Converting the string representations of embeddings to lists of floats
df['embedding'] = df['embedding'].apply(ast.literal_eval)

In [13]:
# Applying cosine similarity to the text chunks and the query
from sklearn.metrics.pairwise import cosine_similarity
df['cosine_similarity'] = df['embedding'].apply(lambda x: cosine_similarity([question_vector], [x])[0][0])

In [14]:
# Sorting by cosine similarity
df = df.sort_values(by='cosine_similarity', ascending=False)

# Top 20 text chunks
top_20 = df.head(20)

print(top_20)

                                            text_chunk  \
126  We’ll only accept your claim if:\n>The inciden...   
130  Checking whether your claim has a reasonable \...   
127  > Take all reasonable precautions to prevent \...   
133  For these \nclaims your appointed representati...   
134  If there are conflicting opinions about whethe...   
26   Page 5\nPage 6\nMaking a claim\nIf you need to...   
196  > Start legal proceedings in your name, or in ...   
32   Page 7\nMaking a claim\nHow repairs and replac...   
139  > Keep us and your appointed representative\nu...   
29   If you’re unsure if a \ndocument is relevant, ...   
27   Even if you don’t \nmake a claim on your car, ...   
42   nd you must send us the \nregistration documen...   
136  When we might choose to settle your claim\nWe ...   
125  ✘We don’t cover claims for any contracts  \nto...   
28   Please have these handy when you get in touch....   
204  If we agree to pay \nyour claim and you have n...   
135  > Makes a

In [15]:
# Converting to list
top_20_list = str(top_20['text_chunk'].tolist())

In [16]:
from langchain.prompts import ChatPromptTemplate
from langchain.chat_models import ChatOpenAI

In [17]:
# Prompt Template
PROMPT_TEMPLATE = """
Below is the context of a car insurance policy booklet, answer the question based on this context: {context}
Give an answer to this question based on the above context: {question}.
Provide a correct clear answer. 
Do not justify your answers.
Do not give information not mentioned in the CONTEXT.
Do not say "according to the context" or "mentioned in the context" or similar.
"""

In [18]:
# Retrieved context and user query in the prompt template is loaded
prompt_template = ChatPromptTemplate.from_template(PROMPT_TEMPLATE)
prompt = prompt_template.format(context=top_20_list, question=question)
messages = [{"role":"user", "content":prompt}]

In [19]:
#  LLM Model called
model = ChatOpenAI(openai_api_key=api_key, model = 'gpt-4-turbo', temperature=0.1)
response_text = model.invoke(messages)

  warn_deprecated(


In [20]:
response_text

AIMessage(content='To make a claim, follow these steps:\n\n1. Contact the insurance company using the relevant phone number provided for the type of claim you need to make (general, windscreen, or motor legal helpline).\n2. Provide your personal details, policy number, car registration number, and a description of the loss or damage.\n3. If involved in an accident, also provide the other driver’s details.\n4. Keep the insurance company and your appointed representative updated with any developments related to the claim.\n5. Provide all relevant information, evidence, and documents as soon as possible to both the insurance company and your appointed representative.\n6. Follow any additional instructions provided by the insurance company to process your claim.', response_metadata={'token_usage': {'completion_tokens': 141, 'prompt_tokens': 2344, 'total_tokens': 2485}, 'model_name': 'gpt-4-turbo', 'system_fingerprint': 'fp_31a6c0accf', 'finish_reason': 'stop', 'logprobs': None}, id='run-18

In [21]:
# Displaying response content
print(response_text.content)

To make a claim, follow these steps:

1. Contact the insurance company using the relevant phone number provided for the type of claim you need to make (general, windscreen, or motor legal helpline).
2. Provide your personal details, policy number, car registration number, and a description of the loss or damage.
3. If involved in an accident, also provide the other driver’s details.
4. Keep the insurance company and your appointed representative updated with any developments related to the claim.
5. Provide all relevant information, evidence, and documents as soon as possible to both the insurance company and your appointed representative.
6. Follow any additional instructions provided by the insurance company to process your claim.


In [21]:
import json

# Loading the dataset
with open('Sample.json', 'r') as file:
    data = json.load(file)

# Extracting questions and responses
questions = [entry['query'] for entry in data]
responses = [entry['response'] for entry in data]

In [22]:
print(questions)

['What is the procedure for making a claim?', 'What does the policy cover for fire and theft?', 'How do I know if I am covered to drive other cars?', 'What should I do if I have an accident?', 'Are my electric car’s charging cables covered?', 'What is DriveSure?', 'What is covered under Motor Legal Cover?', 'Can I use my car abroad?', 'What does Section 4 cover?', 'What does the policy exclude?', 'What is a courtesy car?', 'What should I do if my car is written off?', 'How does Churchill handle windscreen repairs?', "What does the term 'excess' mean?", 'What are Churchill’s territorial limits?', 'What is covered under Section 5?', 'What happens if my car is stolen?', 'What are the requirements for a claim if my car is vandalized?', 'How are repairs handled if I choose my own repairer?', 'What is not covered under accidental damage?', 'Are personal belongings in the car covered?', "What does 'market value' mean?", 'What are the conditions for using a courtesy car?', "What is covered und

In [23]:
print(data)

[{'query': 'What is the procedure for making a claim?', 'response': 'Call 0345 878 6261 with your personal details, policy number, car registration number, and description of the loss or damage. For windscreen claims, call 0800 328 9150. For Motor Legal Cover claims, call 0345 246 2408.'}, {'query': 'What does the policy cover for fire and theft?', 'response': 'Section 2 covers loss or damage caused by fire, lightning, explosion, theft, or attempted theft. Accessories and spare parts are also covered if on your car, in it, or in your locked private garage.'}, {'query': 'How do I know if I am covered to drive other cars?', 'response': 'Check your certificate of motor insurance to see who is covered to drive other cars. The cover only includes injury to third parties or damage to their property.'}, {'query': 'What should I do if I have an accident?', 'response': "Report the accident as soon as possible with your personal details, policy number, car registration number, and a description 

In [25]:
# Defining a function to train and test the model
def generate_response(query, df):
    # Create prompt template and model outside the loop
    prompt_template = ChatPromptTemplate.from_template(PROMPT_TEMPLATE)
    model = ChatOpenAI(openai_api_key=api_key, model='gpt-4-turbo', temperature=0.1)

    # Use the provided code to generate responses for each question in X_test
    question_vector = get_embedding(query, model='text-embedding-ada-002')
    df['cosine_similarity'] = df['embedding'].apply(lambda x: cosine_similarity([question_vector], [x])[0][0])
    df = df.sort_values(by='cosine_similarity', ascending=False)
    top_10_list = df.head(10)['text_chunk'].tolist()
    prompt = prompt_template.format(context=top_10_list, question=query)
    messages = [{"role": "user", "content": prompt}]
    response = model.invoke(messages)
    return response.content


In [39]:
predictions = []

for entry in data:
    question = entry['query']
    response = entry['response']
    prediction = generate_response(question, df)
    entry['prediction'] = prediction
    predictions.append(prediction)
    print("Question: ", question, "\nResponse: ", response, "\nPrediction: ", prediction, "\n")

print(predictions)

Question:  What is the procedure for making a claim? 
Response:  Call 0345 878 6261 with your personal details, policy number, car registration number, and description of the loss or damage. For windscreen claims, call 0800 328 9150. For Motor Legal Cover claims, call 0345 246 2408. 
Prediction:  To make a claim, contact the relevant number provided for your specific cover type, and follow the steps outlined to enable quick processing of your claim. 

Question:  What does the policy cover for fire and theft? 
Response:  Section 2 covers loss or damage caused by fire, lightning, explosion, theft, or attempted theft. Accessories and spare parts are also covered if on your car, in it, or in your locked private garage. 
Prediction:  The policy covers repair or replacement of the car if it is stolen, damaged by an attempted theft, or damaged by fire, including any permanently fitted in-car entertainment equipment. 

Question:  How do I know if I am covered to drive other cars? 
Response:  C

In [40]:
print(predictions)

['To make a claim, contact the relevant number provided for your specific cover type, and follow the steps outlined to enable quick processing of your claim.', 'The policy covers repair or replacement of the car if it is stolen, damaged by an attempted theft, or damaged by fire, including any permanently fitted in-car entertainment equipment.', 'Check your certificate of motor insurance to see if you are covered to drive other cars.', 'If you have an accident, you should inform your insurance company as quickly as possible, providing your personal details, policy number, car registration number, a description of the loss or damage, and the other driver’s details if applicable.', 'Yes, your electric car’s charging cables are covered under ‘Section 2: Fire and theft’ or ‘Section 4: Accidental damage’ of your policy.', 'DriveSure is a telematics insurance product designed to capture how, when, and where your car is driven, using driver-monitoring technology to understand your driving styl

In [54]:
data

[{'query': 'What is the procedure for making a claim?',
  'response': 'Call 0345 878 6261 with your personal details, policy number, car registration number, and description of the loss or damage. For windscreen claims, call 0800 328 9150. For Motor Legal Cover claims, call 0345 246 2408.',
  'prediction': 'To make a claim, contact the relevant number provided for your specific cover type, and follow the steps outlined to enable quick processing of your claim.'},
 {'query': 'What does the policy cover for fire and theft?',
  'response': 'Section 2 covers loss or damage caused by fire, lightning, explosion, theft, or attempted theft. Accessories and spare parts are also covered if on your car, in it, or in your locked private garage.',
  'prediction': 'The policy covers repair or replacement of the car if it is stolen, damaged by an attempted theft, or damaged by fire, including any permanently fitted in-car entertainment equipment.'},
 {'query': 'How do I know if I am covered to drive 

In [76]:
with open('predictions.json', 'w') as f:
    json.dump(data, f, indent=4)

In [1]:
# Chat History
# Initializing chat history
chat_history = []

# Prompt Template for chat history
PROMPT_TEMPLATE = """
You are provided with context from a car insurance policy booklet and a chat history. 
Answer the following question based on the given context:
Context: {context}
Question: {question}
Chat History: {chat_history}

Provide a one-liner answer that is correct and complete. Follow these guidelines:
- Base your answer only on the provided context.
- Do not justify your answer.
- Do not include information not mentioned in the context.
- Avoid phrases like "according to the context" or "mentioned in the context."
"""

# the generate_response function with chat_history
def generate_response_chat(query, df, chat_history):

    prompt_template = ChatPromptTemplate.from_template(PROMPT_TEMPLATE)
    model = ChatOpenAI(openai_api_key=api_key, model='gpt-4-turbo', temperature=0.1)

    # Generating responses for each question 
    question_vector = get_embedding(query, model='text-embedding-ada-002')
    df['cosine_similarity'] = df['embedding'].apply(lambda x: cosine_similarity([question_vector], [x])[0][0])
    df = df.sort_values(by='cosine_similarity', ascending=False)
    top_10_list = df.head(10)['text_chunk'].tolist()
    prompt = prompt_template.format(context=top_10_list, question=query, chat_history=chat_history)
    messages = [{"role": "assistant", "content": prompt}]
    response = model.invoke(messages)
    
    # Adding the response to chat history
    chat_history.append("""question: {query} 
                answer: {response.content}""")
    
    return response.content, chat_history

In [9]:
while True:
    # User query
    query = input("Enter your query (or type 'exit' to quit): ")
    if query.lower() == 'exit':
        break
    #
    # response generated
    response, chat_history = generate_response_chat(query, df, chat_history)
    
    # response printed
    print("Assistant:", response)

Assistant: Check your certificate of motor insurance to see if you are covered to drive other cars. 
Assistant: The cover includes replacement or repair of broken glass in the windscreen, sunroof, or windows, and repair of bodywork scratches caused by the broken glass, provided there was no other loss or damage from the same incident.
