In [21]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np


In [22]:
# Load the dataset with efficient data types and handle missing values
data = pd.read_csv('uuuuuuu.csv', dtype={
    'age': 'int32', 'gender': 'category', 'bmi': 'float32', 'children': 'int32',
    'smoker': 'category', 'medical_history': 'str', 'family_medical_history': 'str',
    'exercise_frequency': 'str', 'occupation': 'str', 'charges': 'float32'
})


In [23]:
# Combine relevant text fields into a single document
data['document'] = data.apply(lambda row: f"Age: {row['age']}, Gender: {row['gender']}, BMI: {row['bmi']}, Children: {row['children']}, \
                                        Smoker: {row['smoker']}, Medical History: {row['medical_history']}, \
                                        Family Medical History: {row['family_medical_history']}, \
                                        Exercise Frequency: {row['exercise_frequency']}, Occupation: {row['occupation']}, \
                                        Charges: {row['charges']}", axis=1)

In [24]:
# Vectorize the documents
vectorizer = TfidfVectorizer(max_features=5000, ngram_range=(1, 2))
tfidf_matrix = vectorizer.fit_transform(data['document'])

In [25]:
# retrive the documents
def retrieve(query, top_n=5):
    query_vec = vectorizer.transform([query])
    similarities = cosine_similarity(query_vec, tfidf_matrix).flatten()
    indices = np.argsort(-similarities)[:top_n]
    return data['document'].iloc[indices]

# Example query
query = "Age: 50, Gender: male,BMI: 27.3 ,Smoker: no, Medical History: Diabetes ,family_medical_history: NONE,exercise_frequency: Occasionally"
retrieved_docs = retrieve(query)
retrieved_docs


192256    Age: 50, Gender: male, BMI: 27.09000015258789,...
301418    Age: 50, Gender: male, BMI: 27.0, Children: 2,...
422244    Age: 50, Gender: male, BMI: 27.8700008392334, ...
128070    Age: 50, Gender: female, BMI: 27.5, Children: ...
77031     Age: 50, Gender: male, BMI: 27.5, Children: 2,...
Name: document, dtype: object

In [26]:
import os
from langchain.prompts import PromptTemplate
from langchain.chains.question_answering import load_qa_chain
from langchain_community.llms import Ollama


In [27]:
# generating a response
def generate_response(combined_docs, query):
    prompt_template = f"""
    Act as a chat system for Health Insurance domain take as which input health insurance parameters and output Quotes for Insurance.
    Based on the sample insurance data provide a quotation for health insurance based on input health condition of the user.

    ID, age, gender, bmi, children, smoker, medical_history, family_medical_history, exercise_frequency,charges:
    sample health insurance data: {combined_docs}

    ---

    Provide a Quotation for the input health condition of the user based on the above sample health insurance data: 
    input health condition of the user: {query}
    """
    llm = Ollama(model="llama3:8b ", temperature=0.5)
    response = llm.invoke(prompt_template)
    
    
    # Remove the unwanted text from the response
    unwanted_text = "Please note that this is an estimated quote only. The actual premium may vary based on the specific health insurance provider, plan details, location, and other factors. It's always best to consult with a licensed insurance agent or broker for accurate quotes tailored to your unique situation."
    response = response.replace(unwanted_text, "")
    
    return response  # Directly return the filtered response string


In [28]:

def answer_general_question(query):
    llm = Ollama(model="mistral:7b-instruct", temperature=0.5)
    response = llm.invoke(query)
    return response

In [29]:
def get_user_input():
    print("Choose the type of query:")
    print("1. Health Insurance Quotation")
    print("2. General Question")
    query_type = input("Enter 1 or 2: ")

    if query_type == "1":
        age = input("Enter age: ")
        gender = input("Enter gender (male/female): ")
        height = float(input("Enter height in cm: "))
        weight = float(input("Enter weight in kilograms: "))
        bmi = round(weight / (height ** 2), 2)
        children = input("Enter number of children: ")
        smoker = input("Are you a smoker? (yes/no): ")
        medical_history = input("Enter medical history: ")
        family_medical_history = input("Enter family medical history: ")
        exercise_frequency = input("Enter exercise frequency: ")

        query = f"Age: {age}, Gender: {gender}, BMI: {bmi}, Children: {children}, Smoker: {smoker}, Medical History: {medical_history}, Family Medical History: {family_medical_history}, Exercise Frequency: {exercise_frequency}"
        return query, "insurance"
    else:
        query = input("Enter your general question: ")
        return query, "general"

In [31]:
def main():
    query, query_type = get_user_input()

    if query_type == "insurance":
        retrieved_docs = retrieve(query)
        combined_docs = "\n".join(retrieved_docs)
        response = generate_response(combined_docs, query)
    else:
        response = answer_general_question(query)

    print(response)

# Example usage
if __name__ == "__main__":
    main()

Choose the type of query:
1. Health Insurance Quotation
2. General Question
I'd be happy to provide a quotation for your health insurance based on the input health condition you provided.

To generate a quote, I'll need to analyze some key factors from the sample data. Based on your input:

* Age: 65 (which is similar to most of the sample data)
* Gender: male (similar to most of the sample data)
* BMI: 0.0 (very low, which may indicate a healthier lifestyle)
* Children: 3 (average number of children in some of the sample data)
* Smoker: yes (like many of the sample data records)
* Medical History: none (which is different from most of the sample data, where medical history is often present)
* Family Medical History: diabetes (similar to some of the sample data)
* Exercise Frequency: rarely (like some of the sample data)

Based on these factors, I'll provide a quote. Keep in mind that this is just an estimate and actual quotes may vary.

**Quote:** $21,500 - $25,000 per year

This quot