In [1]:
import pandas as pd
import nltk
from nltk.stem import WordNetLemmatizer
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np

# Constants
DATASET_FILE = 'chronic.csv'
VECTORIZER_FILE = 'vectorizer.pkl'
MODEL_FILE = 'model.pkl'
HEALTH_DATA = {
    'CT': {'2014': 375},
    'DC': {'2015': 23, '2017': 53, '2020': 57},
    'DE': {'2010': 77}
}

# Load dataset
def load_dataset(file_path):
    dataset = pd.read_csv(file_path)
    dataset.rename(columns={'Question': 'question', 'Response': 'response'}, inplace=True)
    return dataset[['question', 'response']]

# Preprocess text
def preprocess_text(text, lemmatizer):
    text = text.lower()
    tokens = nltk.word_tokenize(text)
    tokens = [lemmatizer.lemmatize(token) for token in tokens]
    return ' '.join(tokens)

# Create TF-IDF vectorizer
def create_vectorizer(dataset):
    vectorizer = TfidfVectorizer()
    vectorizer.fit(dataset['question'])
    return vectorizer

# Train model
def train_model(vectorizer, dataset):
    X = vectorizer.transform(dataset['question'])
    y = dataset['response']
    return X, y

# Get chatbot response
def get_response(user_input, vectorizer, X, y):
    user_input_processed = preprocess_text(user_input, WordNetLemmatizer())
    user_input_vector = vectorizer.transform([user_input_processed])
    similarities = cosine_similarity(user_input_vector, X)
    best_match_index = np.argmax(similarities)
    return y.iloc[best_match_index]

# Get health response
def get_health_response(state, year):
    if state in HEALTH_DATA and year in HEALTH_DATA[state]:
        return f"According to the National Vital Statistics System (NVSS) in {year}, {state} reported {HEALTH_DATA[state][year]} deaths from chronic liver disease."
    else:
        return "No data available for this state and year."

# Main chatbot loop
def main():
    dataset = load_dataset(DATASET_FILE)
    lemmatizer = WordNetLemmatizer()
    dataset['processed_question'] = dataset['question'].apply(lambda x: preprocess_text(x, lemmatizer))
    vectorizer = create_vectorizer(dataset)
    X, y = train_model(vectorizer, dataset)

    while True:
        user_input = input('You: ')
        if user_input.lower() in ['exit', 'quit', 'bye']:
            print('Chatbot: Goodbye!')
            break
        elif user_input.lower().startswith('health'):
            parts = user_input.split()
            if len(parts) == 3 and parts[1].upper() in HEALTH_DATA and parts[2] in HEALTH_DATA[parts[1].upper()]:
                response = get_health_response(parts[1].upper(), int(parts[2]))
            else:
                response = "Invalid health query. Please use format: 'health [state] [year]'."
        else:
            response = get_response(user_input, vectorizer, X, y)
        print('Chatbot:', response)

if __name__ == '__main__':
    main()

You:  What is chronic disease?


Chatbot: A long-term health condition, lasting 1 year or more, requiring ongoing medical attention and management, such as diabetes, heart disease, cancer, and liver disease.


You:  What is the treatments of chronicdisease


Chatbot: Here are common treatment options for chronic liver disease: Lifestyle Changes, Medications,Therapies,Alternative Therapies,Disease-Specific Treatments,Ongoing Care


You:  what Lifestyle Changes can help manage chronicdisease


Chatbot: Healthy diet (low-fat, high-fiber), Regular exercise, Weight management, Quit smoking, Limit alcohol consumption and Avoid illicit substances


You:  i want to know about alternative therapies


Chatbot: Acupuncture, Herbal supplements (e.g., milk thistle, turmeric) and Vitamin and mineral supplements


You:  what are the primary surgical options for chronicdisease


Chatbot:  Liver resection (removing damaged tissue) ,Transjugular intrahepatic portosystemic shunt (TIPS) for portal hypertension and Liver transplantation


You:  define Disease-Specific 


Chatbot: Hepatitis B/C: Antiviral medications, Non-alcoholic fatty liver disease (NAFLD): Weight loss, insulin sensitizers, Primary biliary cholangitis (PBC): Ursodeoxycholic acid and Primary sclerosing cholangitis (PSC): Immunomodulators, antibiotics


You:  i want to know about Ongoing Care


Chatbot:  Regular check-ups with a hepatologist/gastroenterologist, Monitoring liver function tests (LFTs) and Adjusting treatment plans as needed


You:  Mortality from heart failure i want rate


Chatbot: According to the National Vital Statistics System (NVSS) in 2010, Oregon reported: 30 deaths from heart failure mortality


You:  Binge drinking prevalence among youth


Chatbot: New Jersey YRBSS (2013): 23% of New Jersey youth reported binge drinking


You:  Alcohol use before pregnancy


Chatbot: According to the Pregnancy Risk Assessment Monitoring System (PRAMS) in 2015, 59.1% of Alabama women reported alcohol use before pregnancy.


You:  Chronic liver disease mortality


Chatbot: According to Alaska NVSS data: 2011: 99 deaths from chronic liver disease


You:  Alcohol use among youth


Chatbot: According to the Youth Risk Behavior Surveillance System (YRBSS) in 2019, Arizona reported: 29.5% of youth reported alcohol use


You:  Alcohol use before pregnancy


Chatbot: According to the Pregnancy Risk Assessment Monitoring System (PRAMS) in 2015, 59.1% of Alabama women reported alcohol use before pregnancy.


You:  bye


Chatbot: Goodbye!
