Import libraries

In [None]:
import nltk
from nltk.tokenize import word_tokenize

Define questions

In [None]:
nltk.download('punkt')

questions = {
    "high_blood_pressure": "Have you been told you have high blood pressure by a doctor?",
    # YES - 1, NO - 0
    "high_cholesterol": "Have you been told you have high cholestrol by a doctor?",
    # YES - 1, NO - 0
    "bmi": "What is your Body Mass Index (BMI)?",
    # numerical(validate range)
    "smoking": "Have you smoked at least 100 cigarettes in your entire life? (5 packs = 100 cigarettes)",
    # YES - 1, NO - 0
    "physical_activity": "Have you done any physical activity or exercise in the past 30 days, other than your regular job?",
    # YES - 1, NO - 0
    "fruit_consumption": "Do you consume fruits at least once per day?",
    # YES - 1, NO - 0
    "vegetable_consumption": "Do you consume vegetables at least once per day?",
    # YES - 1, NO - 0
    "mental_health_days": "How many days in the past 30 days was your mental health not good?",
    # scale 0 - 30 days
    "difficulty_walking": "Do you have serious difficulty walking or climbing stairs?",
    # YES - 1, NO - 0
    "sex": "What is your gender? (Male/Female)",
    # MALE - 1, FEMALE - 0
    "age": "What is your age?",
    # mpping done below
    "household_income": "What is your annual household income from all sources?"
    # mpping done below
}



[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.


In [None]:
# map age to age group
# Define age mapping based on ranges
age_map = {}

# Populate age_map with ranges
ranges = [
    (1, range(18, 25)),  # 18–24
    (2, range(25, 30)),  # 25–29
    (3, range(30, 35)),  # 30–34
    (4, range(35, 40)),  # 35–39
    (5, range(40, 45)),  # 40–44
    (6, range(45, 50)),  # 45–49
    (7, range(50, 55)),  # 50–54
    (8, range(55, 60)),  # 55–59
    (9, range(60, 65)),  # 60–64
    (10, range(65, 70)), # 65–69
    (11, range(70, 75)), # 70–74
    (12, range(75, 80)), # 75–79
    (13, range(80, 100)) # 80–99
]

# Fill the dictionary
for category, age_range in ranges:
    for age in age_range:
        age_map[age] = category

# Example: Print age_map for verification
print(age_map)

{18: 1, 19: 1, 20: 1, 21: 1, 22: 1, 23: 1, 24: 1, 25: 2, 26: 2, 27: 2, 28: 2, 29: 2, 30: 3, 31: 3, 32: 3, 33: 3, 34: 3, 35: 4, 36: 4, 37: 4, 38: 4, 39: 4, 40: 5, 41: 5, 42: 5, 43: 5, 44: 5, 45: 6, 46: 6, 47: 6, 48: 6, 49: 6, 50: 7, 51: 7, 52: 7, 53: 7, 54: 7, 55: 8, 56: 8, 57: 8, 58: 8, 59: 8, 60: 9, 61: 9, 62: 9, 63: 9, 64: 9, 65: 10, 66: 10, 67: 10, 68: 10, 69: 10, 70: 11, 71: 11, 72: 11, 73: 11, 74: 11, 75: 12, 76: 12, 77: 12, 78: 12, 79: 12, 80: 13, 81: 13, 82: 13, 83: 13, 84: 13, 85: 13, 86: 13, 87: 13, 88: 13, 89: 13, 90: 13, 91: 13, 92: 13, 93: 13, 94: 13, 95: 13, 96: 13, 97: 13, 98: 13, 99: 13}


In [None]:
# Define income mapping based on ranges
income_map = {}

# Populate income_map with ranges
income_ranges = [
    (1, range(0, 10000)),        # Less than $10,000
    (2, range(10000, 15000)),    # $10,000 to less than $15,000
    (3, range(15000, 20000)),    # $15,000 to less than $20,000
    (4, range(20000, 25000)),    # $20,000 to less than $25,000
    (5, range(25000, 35000)),    # $25,000 to less than $35,000
    (6, range(35000, 50000)),    # $35,000 to less than $50,000
    (7, range(50000, 75000)),    # $50,000 to less than $75,000
    (8, range(75000, 10**7))     # $75,000 or more (assuming no upper limit)
]

# Fill the dictionary
for category, income_range in income_ranges:
    for income in income_range:
        income_map[income] = category

# Example: Print income_map for verification
print(f"Mapping for $30,000: {income_map.get(30000)}")
print(f"Mapping for $80,000: {income_map.get(80000)}")


Mapping for $30,000: 5
Mapping for $80,000: 8


Define explanation corpus

In [None]:
explanations = {
    "high_blood_pressure": "High blood pressure means your blood pushes too hard against your arteries. It can lead to health problems if untreated.",
    "high_cholesterol": "High cholesterol means there's too much fat in your blood, which can raise the risk of heart problems.",
    "bmi": "BMI is a measure of weight compared to height to check if you're underweight, normal, overweight, or obese.",
    "smoking": "Smoking 100 cigarettes means you've smoked regularly, equal to about five packs.",
    "physical_activity": "This asks if you've done exercise or sports recently.",
    "fruit_consumption": "This asks if you eat fruit daily.",
    "vegetable_consumption": "This asks if you eat vegetables daily.",
    "mental_health_days": "This asks how many days in the past month you felt stressed or emotionally unwell.",
    "difficulty_walking": "This asks if you have trouble walking or climbing stairs.",
    "sex": "This asks if you are male or female.",
    "age_category": "This asks about your age.",
    "household_income": "This asks about your total yearly income."
}


Rule-based chatbot implementation

In [None]:
import spacy

# Load spaCy model
nlp = spacy.load("en_core_web_sm")

In [None]:
# Chatbot context and flow
context = {"step": 0, "responses": {}}

# Process user input
def process_input(user_input):
    doc = nlp(user_input)
    if context["step"] < len(questions)-1:
        current_key = list(questions.keys())[context["step"]]
        response = user_input.lower()

        # Example: Extract age or income and map to categories
        if current_key == "age":
            age = int(response)
            category = age_map.get(age, "Unknown")
            context["responses"]["age"] = category
        elif current_key == "household_income":
            income = int(response)
            category = income_map.get(income, "Unknown")
            context["responses"]["household_income"] = category
        else:
            context["responses"][current_key] = response

        # Explanation
        explanation = explanations.get(current_key, "")
        next_question = questions.get(list(questions.keys())[context["step"] + 1], "Thank you!")
        context["step"] += 1

        return f"{explanation}\n\n{next_question}"
    else:
        return "Thank you! Your responses have been recorded."

# Run chatbot interaction
def chatbot():
    print("Chatbot started! Type 'exit' to end.")
    print(questions[list(questions.keys())[context["step"]]])
    while True:
        user_input = input("You: ")
        if user_input.lower() == "exit":
            break
        response = process_input(user_input)
        print(f"Bot: {response}")

# Start the chatbot
chatbot()

Chatbot started! Type 'exit' to end.
Have you been told you have high blood pressure by a doctor?
You: exit


Prediction

In [None]:
classification = "diabetes positive(diabetic)" # get from model
print("Your diabetes classification is: ", classification)

Your diabetes classification is:  diabetes positive(diabetic)
