In [6]:
# Cell 1: imports and dataset creation
import random
import re
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, confusion_matrix
import joblib
import os
random.seed(42)

# Create labeled question dataset (sample realistic patient questions)
# Labels: pain, diet, mental, treatment, lifestyle, emergency, drug_query
data = [
    # Pain (8)
    ("My pain is about 3/10, is that ok?", "pain"),
    ("I have mild pain in my abdomen after chemo", "pain"),
    ("My pain suddenly spiked to 8", "pain"),
    ("I feel constant aching in my back", "pain"),
    ("How do I manage breakthrough pain at night?", "pain"),
    ("Is numbness after surgery normal?", "pain"),
    ("My pain is tolerable but annoying, any tips?", "pain"),
    ("What should I do when pain increases quickly?", "pain"),
    # Diet (8)
    ("What foods should I eat during chemotherapy?", "diet"),
    ("Can I eat spicy food after treatment?", "diet"),
    ("Which fruits are best for recovery?", "diet"),
    ("Is it ok to drink milk after my treatment?", "diet"),
    ("How much water should I drink daily?", "diet"),
    ("Are protein shakes helpful during chemo?", "diet"),
    ("Any diet tips to reduce nausea?", "diet"),
    ("Should I avoid raw salads while on chemo?", "diet"),
    # Mental health / counseling (8)
    ("I feel anxious about my scan results", "mental"),
    ("I am having trouble sleeping, any advice?", "mental"),
    ("I feel very low and unmotivated", "mental"),
    ("Do you have breathing exercises for stress?", "mental"),
    ("How can I manage my fear before appointments?", "mental"),
    ("I am feeling isolated and alone", "mental"),
    ("Can you give me quick relaxation tips?", "mental"),
    ("I am overwhelmed with news about my diagnosis", "mental"),
    # Treatment questions (but we will redirect drug/treatment to schedule) (8)
    ("Can I take paracetamol with my chemo drug?", "drug_query"),
    ("Is it safe to combine ibuprofen with my medication?", "drug_query"),
    ("What are the side effects of my chemotherapy?", "drug_query"),
    ("Should I skip my next chemo if I have fever?", "drug_query"),
    ("Can I have radiation and chemo on the same day?", "drug_query"),
    ("My doctor prescribed a new tablet—what is the dose?", "drug_query"),
    ("What should I do if I miss a treatment session?", "drug_query"),
    ("Are there interactions between my supplements and chemo?", "drug_query"),
    # Lifestyle (8)
    ("Can I exercise during treatment?", "lifestyle"),
    ("When can I return to work after surgery?", "lifestyle"),
    ("Is it safe to travel after chemotherapy?", "lifestyle"),
    ("How much rest should I take each day?", "lifestyle"),
    ("Can I drive after receiving anesthesia?", "lifestyle"),
    ("How to avoid infections at home?", "lifestyle"),
    ("Is sexual activity safe during treatment?", "lifestyle"),
    ("When can I resume my normal diet after treatment?", "lifestyle"),
    # Emergency / red flags (8)
    ("I have severe bleeding, what should I do?", "emergency"),
    ("I can't breathe well right now", "emergency"),
    ("My fever is 103°F and I'm dizzy", "emergency"),
    ("I am losing consciousness", "emergency"),
    ("I'm having chest pain and sweating", "emergency"),
    ("Severe allergic reaction with swelling and hives", "emergency"),
    ("Uncontrolled vomiting and dehydration", "emergency"),
    ("Sudden severe headache and confusion", "emergency"),
]

# Optionally augment data by paraphrasing (simple patterns) - small augmentation
paraphrases = []
for text,label in data:
    if label == "diet":
        paraphrases.append((text.replace("chemo","treatment"), label))
    if label == "pain":
        paraphrases.append((text.replace("pain","discomfort"), label))
data += paraphrases

df = pd.DataFrame(data, columns=["text","label"])
df = df.sample(frac=1, random_state=42).reset_index(drop=True)  # shuffle
print("Dataset size:", len(df))
df.head(8)

Dataset size: 64


Unnamed: 0,text,label
0,How do I manage breakthrough discomfort at night?,pain
1,Which fruits are best for recovery?,diet
2,"My pain is about 3/10, is that ok?",pain
3,I'm having chest pain and sweating,emergency
4,Is numbness after surgery normal?,pain
5,Can I drive after receiving anesthesia?,lifestyle
6,I feel anxious about my scan results,mental
7,How much water should I drink daily?,diet


In [8]:
# Cell 2: train/test split, vectorizer, classifier
X = df["text"].values
y = df["label"].values

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, stratify=y, random_state=42)

vectorizer = TfidfVectorizer(ngram_range=(1,2), max_features=2000)
X_train_vec = vectorizer.fit_transform(X_train)
X_test_vec = vectorizer.transform(X_test)

clf = LogisticRegression(max_iter=1000, class_weight='balanced', random_state=42)
clf.fit(X_train_vec, y_train)

y_pred = clf.predict(X_test_vec)
print("Classification report:\n")
print(classification_report(y_test, y_pred, zero_division=0))
print("Confusion matrix:\n", confusion_matrix(y_test, y_pred))

# persist model & vectorizer for reuse
os.makedirs("models", exist_ok=True)
joblib.dump(vectorizer, "models/tfidf_vectorizer.joblib")
joblib.dump(clf, "models/intent_clf.joblib")
print("\nSaved vectorizer and classifier to models/")

Classification report:

              precision    recall  f1-score   support

        diet       1.00      0.33      0.50         3
  drug_query       1.00      1.00      1.00         2
   emergency       0.00      0.00      0.00         1
   lifestyle       0.40      1.00      0.57         2
      mental       1.00      0.50      0.67         2
        pain       1.00      1.00      1.00         3

    accuracy                           0.69        13
   macro avg       0.73      0.64      0.62        13
weighted avg       0.83      0.69      0.69        13

Confusion matrix:
 [[1 0 0 2 0 0]
 [0 2 0 0 0 0]
 [0 0 0 1 0 0]
 [0 0 0 2 0 0]
 [0 0 1 0 1 0]
 [0 0 0 0 0 3]]

Saved vectorizer and classifier to models/


In [None]:
import random

# Predefined responses
diet_tips = [
    "Include more fresh fruits and vegetables in your diet.",
    "Stay hydrated and drink at least 8 glasses of water daily.",
    "Avoid processed foods and focus on whole, natural foods.",
    "Include protein-rich foods like beans, fish, and nuts."
]

counselling_tips = [
    "Remember, you're not alone in this journey.",
    "Take deep breaths and focus on small, positive steps.",
    "Engage in relaxing activities like meditation or reading.",
    "Stay connected with friends and family for emotional support."
]

def cancer_counselling_chatbot():
    print("🤖 Cancer Care Counselling Chatbot")
    print("Hello! I'm here to help with your doubts, pain assessment, diet tips, and counselling.")
    print("You can type 'exit' anytime to stop.\n")

    while True:
        user_input = input("You: ").lower()

        # Exit condition
        if user_input == "exit":
            print("Chatbot: Take care! Wishing you good health. 💙")
            break

        # Drug-related questions
        if "medicine" in user_input or "drug" in user_input or "tablet" in user_input:
            print("Chatbot: It's better to discuss medicines with your doctor. Shall I help you schedule an appointment?")
            continue

        # Pain-related queries
        if "pain" in user_input:
            pain_location = input("Chatbot: Can you tell me where you're feeling the pain? ")
            surgery_type = input("Chatbot: What kind of surgery or cancer treatment have you undergone? ")

            try:
                pain_intensity = int(input("Chatbot: On a scale of 0 to 10, how severe is the pain? "))
            except:
                print("Chatbot: Please enter a valid number between 0 and 10.")
                continue

            # Check pain intensity
            if pain_intensity < 4:
                print(f"Chatbot: Based on your inputs, it seems like mild pain at {pain_location}.")
                print("Chatbot: I recommend a counselling session to manage stress and discomfort.")
                print("Chatbot: Here's a tip for you: ", random.choice(counselling_tips))
                print("Chatbot: Also, here's a diet suggestion: ", random.choice(diet_tips))
            elif 4 <= pain_intensity <= 7:
                print(f"Chatbot: Your pain level is moderate at {pain_location}.")
                print("Chatbot: I suggest scheduling an appointment with your doctor soon.")
            else:
                print(f"Chatbot: Your pain seems severe at {pain_location}.")
                print("Chatbot: Please contact your doctor immediately or visit the nearest hospital.")
            continue

        # Diet-related queries
        if "diet" in user_input or "food" in user_input:
            print("Chatbot: Here's a healthy diet suggestion for you:")
            print(" - " + random.choice(diet_tips))
            continue

        # General counselling queries
        if "stress" in user_input or "anxiety" in user_input or "depression" in user_input:
            print("Chatbot: I understand this can be overwhelming.")
            print("Chatbot: Here's a counselling tip: ", random.choice(counselling_tips))
            continue

        # Appointment-related queries
        if "appointment" in user_input or "doctor" in user_input:
            print("Chatbot: I can help you schedule an appointment with your doctor. Would you like me to do that?")
            continue

        # Fallback response
        print("Chatbot: I'm here to help. Could you please elaborate on your concern?")

# Run the chatbot
cancer_counselling_chatbot()

🤖 Cancer Care Counselling Chatbot
Hello! I'm here to help with your doubts, pain assessment, diet tips, and counselling.
You can type 'exit' anytime to stop.

Chatbot: I'm here to help. Could you please elaborate on your concern?
Chatbot: Your pain level is moderate at my legs.
Chatbot: I suggest scheduling an appointment with your doctor soon.
Chatbot: I'm here to help. Could you please elaborate on your concern?
Chatbot: I'm here to help. Could you please elaborate on your concern?
Chatbot: I'm here to help. Could you please elaborate on your concern?
Chatbot: I'm here to help. Could you please elaborate on your concern?
Chatbot: I'm here to help. Could you please elaborate on your concern?
Chatbot: I'm here to help. Could you please elaborate on your concern?
Chatbot: I'm here to help. Could you please elaborate on your concern?
Chatbot: Based on your inputs, it seems like mild pain at Throat.
Chatbot: I recommend a counselling session to manage stress and discomfort.
Chatbot: Here'