In [4]:
# Step 1: Install Required Libraries
!pip install gradio scikit-learn langdetect

Collecting gradio
  Downloading gradio-5.29.0-py3-none-any.whl.metadata (16 kB)
Collecting langdetect
  Downloading langdetect-1.0.9.tar.gz (981 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/981.5 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━━━━━━━[0m[91m╸[0m[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m276.5/981.5 kB[0m [31m8.1 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m981.5/981.5 kB[0m [31m14.5 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting aiofiles<25.0,>=22.0 (from gradio)
  Downloading aiofiles-24.1.0-py3-none-any.whl.metadata (10 kB)
Collecting fastapi<1.0,>=0.115.2 (from gradio)
  Downloading fastapi-0.115.12-py3-none-any.whl.metadata (27 kB)
Collecting ffmpy (from gradio)
  Downloading ffmpy-0.5.0-py3-none-any.whl.metadata (3.0 kB)
Collecting gradio-client==1.10.0 (from gradio)
  Downloading gradio_client-1.10.0-py3-none-any.whl.meta

In [5]:
# Step 2: Create Enhanced Dataset
sample_data = """
symptoms,disease
"fever, chills, headache, vomiting, sweating",malaria
"persistent cough, chest pain, fatigue, weight loss, night sweats",tuberculosis
"diarrhea, dehydration, nausea, stomach cramps, vomiting",cholera
"runny nose, sore throat, cough, sneezing, body aches",flu
"high fever, joint pain, headache, muscle pain, rash",dengue
"weight loss, night sweats, prolonged fever, swollen lymph nodes",hiv
"kupoteza uzito, kutokwa jasho usiku, homa ya muda mrefu, tezi kuvimba",hiv
"kuharisha, kichefuchefu, maumivu ya tumbo, upungufu wa maji mwilini",cholera
"maumivu ya kichwa, homa kali, maumivu ya viungo, vipele",dengue
"kutokwa na makamasi, kikohozi, koo kuuma, mafua",flu
"""

with open("kenya_diseases_swahili.csv", "w") as f:
    f.write(sample_data.strip())

In [6]:
# Step 3: Load & Train Model
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import make_pipeline
from langdetect import detect

df = pd.read_csv("kenya_diseases_swahili.csv")

X = df["symptoms"]
y = df["disease"]

model = make_pipeline(TfidfVectorizer(), LogisticRegression())
model.fit(X, y)

In [7]:
# Step 4: Define Processing Function
def diagnose(symptom_input):
    if not symptom_input or len(symptom_input.split()) < 2:
        return "⚠️ Tafadhali andika au sema dalili halisi. / Please try again with valid symptoms."

    try:
        lang = detect(symptom_input)
    except:
        lang = 'en'

    prediction = model.predict([symptom_input])[0]
    return f"✅ Ugonjwa Unaowezekana / Likely Disease: **{prediction.upper()}**\n\n📌 Ushauri: Wasiliana na daktari kwa ushauri zaidi."


In [8]:
# Step 5: Define Gradio Interface with Voice & Validation
import gradio as gr

iface = gr.Interface(
    fn=diagnose,
    inputs=[
        gr.Textbox(lines=2, placeholder="Andika dalili hapa... / Type symptoms here"),
        gr.Audio("microphone", label="🎤 Rekodi sauti yako / Record your symptoms (English or Kiswahili)")  # Removed 'source=' and made "microphone" a positional argument
    ],
    outputs="markdown",
    title="🩺 Chatbot ya Ugonjwa Kenya - Kenya Disease Chatbot",
    description="⚕️ Andika au sema dalili zako kwa Kiswahili au Kiingereza. Chatbot itakupa utambuzi wa ugonjwa unaowezekana.",
    live=True
)



In [9]:
# Optional: Whisper-like transcribe audio (simulation with gradio's audio input)
!pip install SpeechRecognition

def process_input(text_input, audio_path):
    if audio_path:
        recognizer = sr.Recognizer()
        with sr.AudioFile(audio_path) as source:
            audio_data = recognizer.record(source)
        try:
            transcribed = recognizer.recognize_google(audio_data)
            return diagnose(transcribed)
        except:
            return "⚠️ Hatukuweza kuelewa sauti yako. / Unable to process your voice input."

    return diagnose(text_input)

Collecting SpeechRecognition
  Downloading speechrecognition-3.14.2-py3-none-any.whl.metadata (30 kB)
Downloading speechrecognition-3.14.2-py3-none-any.whl (32.9 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m32.9/32.9 MB[0m [31m40.7 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: SpeechRecognition
Successfully installed SpeechRecognition-3.14.2


In [10]:
# Final interface setup
iface = gr.Interface(
    fn=process_input,
    inputs=[
        gr.Textbox(label="📝 Dalili / Symptoms"),
        gr.Audio(type="filepath", label="🎤 Rekodi / Record Voice")  # Removed 'source="microphone"'
    ],
    outputs="markdown",
    title="🇰🇪 Kenya Disease Diagnosis Chatbot (Swahili + English)",
    description="🚑 Andika au sema dalili zako kama: 'kichwa kuuma, kutapika, homa' au 'fever, cough'.",
    theme="default"
)

In [11]:
# Launch app
iface.launch(share=True)

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://357c7713c7734b9521.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


