In [None]:
# !pip install gtts gradio
# !brew install ffmpeg  # macOS (Homebrew)
# !pip install openai-whisper ffmpeg-python
# !pip install sounddevice
#!pip install git+https://github.com/openai/whisper.git

** Import Libraries & Dependencies **

In [None]:
# Standard Libraries
import os
import re
import subprocess
import traceback
import pickle
import spacy
from collections import Counter

# Data Handling and Processing
import pandas as pd
import numpy as np
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize

# Visualization
import matplotlib.pyplot as plt
import seaborn as sns  

# Scikit-Learn: Preprocessing and Model Selection
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler, OneHotEncoder, LabelEncoder
from sklearn.compose import ColumnTransformer
from sklearn.impute import SimpleImputer

# Scikit-Learn: Models
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from xgboost import XGBClassifier
# Scikit-Learn: Evaluation Metrics
from sklearn.metrics import (
    accuracy_score,
    confusion_matrix,
    classification_report,
    roc_auc_score
)

# Scikit-Learn: Class Weights
from sklearn.utils.class_weight import compute_class_weight

# Scikit-Learn: Feature Engineering
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.pipeline import Pipeline
from sklearn.metrics.pairwise import cosine_similarity

# Transformers
from transformers import AutoModelForTokenClassification, AutoTokenizer
from transformers import pipeline   

# Imbalanced Data Handling
from imblearn.combine import SMOTEENN

# TensorFlow & Keras
import tensorflow as tf
from tensorflow.keras.models import Model, load_model
from tensorflow.keras.layers import (
    Input, Dense, Dropout, Concatenate, BatchNormalization
)
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import (
    EarlyStopping, ReduceLROnPlateau, ModelCheckpoint
)
from tensorflow.keras.regularizers import l2
from tensorflow.keras.preprocessing.sequence import pad_sequences

# Joblib (for saving/loading models)
import joblib

from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score
from itertools import combinations

import pandas as pd
from sklearn.preprocessing import LabelEncoder

from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, accuracy_score

import gradio as gr
import speech_recognition as sr
from gtts import gTTS

import datetime

import whisper
import sounddevice as sd  # Handles microphone input
import wave  # Built-in module for saving WAV files



** Load the data **

In [None]:
# 1. Load and examine the data
disease_symptoms_data = pd.read_csv('Resources/disease_symptoms.csv')


** Explore the data **

In [None]:
disease_symptoms_data.head()

In [None]:
#list Symptoms
disease_symptoms_data['Symptoms'].unique()

** Find a method to separate the symptoms from the disease column and create a new column for each symptom. **

In [None]:
# Expand the dataset while maintaining symptom relationships

def expand_dataset_simple(df):

    expanded_data = []
    
    for _, row in df.iterrows():
        disease = row['Disease']
        symptoms = row['Symptoms']
        
        # Clean symptoms
        symptoms = re.sub(r'[\[\]\'"]', '', symptoms)
        symptom_list = [s.strip().lower() for s in symptoms.split(',')]
        symptom_list = [s for s in symptom_list if s]
        
        # Original entry
        original_symptoms = ', '.join(symptom_list)
        expanded_data.append({
            'Disease': disease,
            'Symptoms': original_symptoms
        })
        # Create 5 variations for each disease
        for _ in range(5):
            if len(symptom_list) > 3:
                # Keep 80% of symptoms, randomly selected
                n_symptoms = max(3, int(0.8 * len(symptom_list)))
                selected_symptoms = np.random.choice(symptom_list, size=n_symptoms, replace=False)
            else:
                # For few symptoms, use all but in different order
                selected_symptoms = np.random.permutation(symptom_list)
            
            expanded_data.append({
                'Disease': disease,
                'Symptoms': ', '.join(selected_symptoms)
            })
    
    return pd.DataFrame(expanded_data)      

** Expand the dataset so lengths match so we can move forward with processing **

In [None]:
# Create expanded dataset
expanded_df = expand_dataset_simple(disease_symptoms_data)
print(f"Original dataset size: {len(disease_symptoms_data)}")
print(f"Expanded dataset size: {len(expanded_df)}")

** Increase the min/max samples **

In [None]:
# Print distribution of samples
disease_counts = expanded_df['Disease'].value_counts()
print("\nSamples per disease:")
print(f"Minimum samples: {disease_counts.min()}")
print(f"Maximum samples: {disease_counts.max()}")
print(f"Average samples: {disease_counts.mean():.1f}")

** Preprocess the data so the symptoms column  keeps together the words necesary to express common medical phrases, while removing the commas. **

In [None]:
# Preprocess and clean symptom text by removing stopwords and preserving important medical phrases
def preprocess_symptoms(text):

    if not isinstance(text, str):
        return ''


        # Define common medical phrases that should be kept together
    medical_phrases = [
        'pain in the chest',
        'chest pain',
        'shortness of breath',
        'ringing in the ears',
        'swelling of the joints',
        'pain in my right hand',
        'burning sensation in the chest',
        'shortness of breath on exertion',
        'difficulty in breathing',
        'loss of voice',
        'loss of height',
        'loss of appetite',
        'thinning of hair',
        'disorientation in time',
        'inability to sleep',
        'sensitivity to light',
        'burning sensation in the sternum',
        'bitter or sour taste',
        'difficulty establishing contact',
        'absence of gas',
        'absence of bowel movements',
        'bleeding outside menstruation',
        'difficulty breathing',
        'pain in the back',
        'back pain',
        'swelling of the joints',
        'joint swelling',
        'pain in the right hand',
        'right hand pain',
        'pain in the left hand',
        'left hand pain',
        'loss of appetite',
        'difficulty swallowing',
        'ringing in the ears',
        'blurred vision',
        'high blood pressure',
        'low blood pressure',
        'rapid heart rate',
        'irregular heartbeat',
        'muscle weakness',
        'chronic fatigue',
        'night sweats',
        'weight loss',
        'weight gain',
        'joint pain',
        'muscle pain',
        'sore throat',
        'runny nose',
        'stuffy nose',
        'abdominal pain',
        'stomach pain',
        'lower back pain',
        'upper back pain',
        'neck pain',
        'shoulder pain'
    ]
    
     
    # Sort phrases by length (longest first) to ensure longer phrases are matched before shorter ones
    medical_phrases.sort(key=len, reverse=True)

    # Clean the text
    text = re.sub(r'[\[\]\'\"]', '', text.lower())
    
    # Replace spaces with underscores in medical phrases
    for phrase in medical_phrases:
        if phrase in text:
            text = text.replace(phrase, phrase.replace(' ', '_'))

        # Split by commas and clean each symptom
    symptoms = [s.strip() for s in text.split(',')]
    
    # Define stopwords to remove (excluding those that might be important in medical context)
    stopwords = {'and', 'or', 'the', 'a', 'an', 'in', 'on', 'at', 'to', 'for', 'of', 'with', 'by'}
    
    # Process each symptom
    cleaned_symptoms = []
    for symptom in symptoms:
        # Skip empty symptoms
        if not symptom:
            continue

            
        # If symptom contains underscores (preserved phrase), keep as is
        if '_' in symptom:
            cleaned_symptoms.append(symptom)
            continue
            
        # Otherwise, split into words and remove stopwords
        words = symptom.split()
        words = [w for w in words if w not in stopwords]
        if words:
            cleaned_symptoms.append(' '.join(words))
    
    # Join all symptoms and replace underscores back with spaces
    result = ' '.join(cleaned_symptoms)
    for phrase in medical_phrases:
        result = result.replace(phrase.replace(' ', '_'), phrase)
    
    return result



** Test to see if the preprocessing worked **

In [None]:
test_text = "pain in the chest, swelling of the joints, difficulty in breathing"
print("Original:", test_text)
print("Cleaned:", preprocess_symptoms(test_text))

**Expand the Symptoms column to produce a more balanced dataset since SMOTE and SMOTEEN were not effective.**

In [None]:
expanded_df.head(60)

In [None]:
# Create features and target
X = expanded_df['Symptoms'].apply(preprocess_symptoms)
y = expanded_df['Disease']

In [None]:
# Create TF-IDF features
vectorizer = TfidfVectorizer(
    max_features=500,
    min_df=1,
    max_df=0.95,
    ngram_range=(1, 2)
)

X_vectorized = vectorizer.fit_transform(X)
print(f"\nFeature matrix shape: {X_vectorized.shape}")


In [None]:
# Split the data
X_train, X_test, y_train, y_test = train_test_split(
    X_vectorized, y,
    test_size=0.2,
    random_state=42
)

In [None]:
# Train Random Forest
rf_model = RandomForestClassifier(
    n_estimators=200,
    max_depth=None,
    min_samples_split=2,
    min_samples_leaf=1,
    class_weight='balanced',
    random_state=42,
    n_jobs=-1
)

In [None]:
# Fit the model
rf_model.fit(X_train, y_train)

In [None]:
# Make predictions
y_pred = rf_model.predict(X_test)
y_pred_proba = rf_model.predict_proba(X_test)

In [None]:
# Calculate accuracy
accuracy = accuracy_score(y_test, y_pred)
print(f"\nModel Accuracy: {accuracy:.3f}")

In [None]:
# Print detailed classification report
print("\nClassification Report:")
print(classification_report(y_test, y_pred))

** Create a function that dictates how the model will handle inputs and outputs**

In [None]:
#  Make  a definition for disease predictions and suggest additional symptoms to look for.
def predict_disease(symptoms, confidence_threshold=0.30):

    # Define medical phrases that should be preserved
    medical_phrases = [
        'pain in the chest',
        'shortness of breath',
        'ringing in the ears',
        'swelling of the joints',
        'pain in my right hand',
        'burning sensation in the chest',
        'difficulty in breathing',
        'loss of voice',
        'loss of height',
        'loss of appetite',
        'thinning of hair',
        'disorientation in time',
        'inability to sleep',
        'sensitivity to light',
        'shortness of breath on exertion'
        'burning sensation in the sternum',
        'bitter or sour taste',
        'difficulty establishing contact',
        'absence of gas',
        'absence of bowel movements',
        'shortness of breath on exertion',
        'pain in the chest',
        'chest pain',
        'shortness of breath',
        'ringing in the ears',
        'swelling of the joints',
        'pain in my right hand',
        'burning sensation in the chest',
        'difficulty in breathing',
        'loss of voice',
        'loss of height',
        'loss of appetite',
        'thinning of hair',
        'disorientation in time',
        'inability to sleep',
        'sensitivity to light',
        'burning sensation in the sternum',
        'bitter or sour taste',
        'difficulty establishing contact',
        'absence of gas',
        'absence of bowel movements',
        'bleeding outside menstruation',
        'difficulty breathing',
        'pain in the back',
        'back pain',
        'swelling of the joints',
        'joint swelling',
        'pain in the right hand',
        'right hand pain',
        'pain in the left hand',
        'left hand pain',
        'loss of appetite',
        'difficulty swallowing',
        'ringing in the ears',
        'blurred vision',
        'high blood pressure',
        'low blood pressure',
        'rapid heart rate',
        'irregular heartbeat',
        'muscle weakness',
        'chronic fatigue',
        'night sweats',
        'weight loss',
        'weight gain',
        'joint pain',
        'muscle pain',
        'sore throat',
        'runny nose',
        'stuffy nose',
        'abdominal pain',
        'stomach pain',
        'lower back pain',
        'upper back pain',
        'neck pain',
        'shoulder pain'
        'bleeding outside menstruation'
    ]
    
    # Preprocess symptoms
    processed_symptoms = preprocess_symptoms(symptoms)
    
    # Split input symptoms into a list while preserving phrases
    temp_text = processed_symptoms
    for phrase in medical_phrases:
        if phrase in temp_text:
            temp_text = temp_text.replace(phrase, phrase.replace(' ', '_'))
    
    input_symptoms = set()
    for symptom in temp_text.split():
        if '_' in symptom:
            # Convert back underscores to spaces for preserved phrases
            input_symptoms.add(symptom.replace('_', ' '))
        else:
            input_symptoms.add(symptom)
    
    # Get all known symptoms from the vectorizer
    known_symptoms = set(vectorizer.get_feature_names_out())
    
    # Check if there are any known symptoms in the input
    matching_symptoms = input_symptoms.intersection(known_symptoms)
    
    # Create a dictionary of all diseases and their symptoms from your original dataset
    disease_symptom_map = {}
    for _, row in disease_symptoms_data.iterrows():
        disease = row['Disease']
        # Process symptoms while preserving phrases
        temp_symptoms = preprocess_symptoms(row['Symptoms'])
        for phrase in medical_phrases:
            if phrase in temp_symptoms:
                temp_symptoms = temp_symptoms.replace(phrase, phrase.replace(' ', '_'))
        
        all_symptoms = set()
        for symptom in temp_symptoms.split():
            if '_' in symptom:
                all_symptoms.add(symptom.replace('_', ' '))
            else:
                all_symptoms.add(symptom)
        
        disease_symptom_map[disease] = all_symptoms
    
    # Vectorize
    X_new = vectorizer.transform([processed_symptoms])

    # Get probabilities
    proba = rf_model.predict_proba(X_new)[0]

    # Get all predictions with their probabilities
    predictions = []
    for idx, probability in enumerate(proba):
        disease = rf_model.classes_[idx]
        predictions.append((disease, probability))
    
    # Sort by probability
    predictions.sort(key=lambda x: x[1], reverse=True)
    
    # Format output
    output = "Based on the symptoms you described:\n\n"
    output += f"Your symptoms: {', '.join(sorted(input_symptoms))}\n\n"
    
    # Add predictions with additional symptoms
    output += "Possible conditions and related symptoms to watch for:\n\n"
    
    for disease, prob in predictions[:3]:  # Show top 3 predictions
        # Get all symptoms for this disease
        disease_symptoms = disease_symptom_map.get(disease, set())
        
        # Find symptoms not mentioned by the user
        missing_symptoms = disease_symptoms - input_symptoms
        
        # Calculate symptom match percentage
        if disease_symptoms:
            match_percentage = len(input_symptoms.intersection(disease_symptoms)) / len(disease_symptoms) * 100
        else:    
            match_percentage = 0

        output += f"▶ {disease} (Confidence: {prob:.1%}, Symptom Match: {match_percentage:.0f}%)\n"
        
        if missing_symptoms:
            output += "   Additional symptoms to look for:\n"
            # Sort missing symptoms to ensure consistent output
            sorted_missing = sorted(missing_symptoms)
            output += "   • " + "\n   • ".join(sorted_missing) + "\n"
        else:
            output += "   ✓ All typical symptoms present\n"
        output += "\n"
    
    output += "\nImportant Notes:\n"
    output += "• The presence of additional symptoms may increase or decrease the likelihood of each condition\n"
    output += "• Some symptoms may be more significant than others for specific conditions\n"
    output += "• This is not a medical diagnosis - please consult a healthcare professional\n"
    
    return output

** Save the Model **

In [None]:
# Save the model
model_components = {
    'model': rf_model,
    'vectorizer': vectorizer
}
joblib.dump(model_components, 'disease_prediction_model.joblib')


** Model Testing **

In [None]:
# Verify model and vectorizer are loaded
print("Model loaded:", rf_model is not None)
print("Vectorizer loaded:", vectorizer is not None)

In [None]:
# 1. Basic Model Testing
def test_model():
    """Test the model with various symptom combinations"""
    print("Testing Disease Prediction Model")
    print("=" * 50)
    
    # Test cases
    test_cases = [
        "fever, cough, headache",
        "joint pain, swelling, redness",
        "nausea, vomiting, dizziness",
        "chest pain, shortness of breath",
        "headache, blurred vision, dizziness"
    ]
    
    for symptoms in test_cases:
        print(f"\nTest Case: {symptoms}")
        print("-" * 50)
        
        # Get predictions
        result = predict_disease(symptoms)
        print(result)
        print("\n")


In [None]:
# 2. Test Edge Cases
def test_edge_cases():

    print("Testing Edge Cases")
    print("=" * 50)
    
    edge_cases = [
        "",  # Empty input
        "nonexistent symptom",  # Invalid symptom
        "fever",  # Single symptom
        "FEVER, COUGH, HEADACHE",  # All caps
        "fever,cough,headache",  # No spaces
        "   fever,    cough,   headache   "  # Extra spaces
    ]
    
    for case in edge_cases:
        print(f"\nEdge Case: '{case}'")
        print("-" * 50)
        try:
            result = predict_disease(case)
            print(result)
        except Exception as e:
            print(f"Error: {str(e)}")
        print("\n")


In [None]:
# 3. Test Real-World Scenarios
def test_real_scenarios():
    """Test with real-world symptom combinations"""
    print("Testing Real-World Scenarios")
    print("=" * 50)
    
    scenarios = [
        # Common cold symptoms
        "runny nose, sore throat, cough, mild fever",
        # Flu-like symptoms
        "high fever, body aches, fatigue, headache",
        # Allergic reaction
        "rash, itching, swelling, difficulty breathing",
        # Digestive issues
        "nausea, vomiting, abdominal pain, diarrhea"
    ]
    
    for scenario in scenarios:
        print(f"\nScenario: {scenario}")
        print("-" * 50)
        result = predict_disease(scenario)
        print(result)
        print("\n")


In [None]:
# Run all tests
print("Running Model Tests...")
print("\n1. Basic Model Testing")
test_model()

print("\n2. Edge Case Testing")
test_edge_cases()

print("\n3. Real-World Scenario Testing")
test_real_scenarios()

** Create the function of Speech-to-Text **

In [None]:
# Load Whisper Model for Speech-to-Text
model = whisper.load_model("base")

# Function to Convert Speech to Text
def transcribe(audio):
    if audio is None:
        return "No audio detected."
    audio_path = audio  # Gradio provides an audio file path
    result = model.transcribe(audio_path)
    return result["text"]

# Function to Generate AI Chatbot Response
def chatbot_response(text):
    # Simulate chatbot logic (replace with OpenAI API if needed)
    response = f"Chatbot says: {text}"  # Placeholder response
    return response

# Function to Convert Text to Speech (TTS)
def text_to_speech(text):
    tts = gTTS(text=text, lang="en")
    tts.save("response.mp3")
    return "response.mp3"




In [None]:
# Add the speech-to-text function 
def speech_to_text(audio):
    """Convert speech to text using speech recognition"""
    try:
        r = sr.Recognizer()
        with sr.AudioFile(audio) as source:
            audio_text = r.listen(source)
            text = r.recognize_google(audio_text)
            return text
    except Exception as e:
        return str(e)

def preprocess_symptoms(text):
    """Clean and preprocess the input symptoms"""
    if not isinstance(text, str):
        return ''
    text = re.sub(r'[\[\]\'"]', '', text)
    symptoms = [s.strip().lower() for s in text.split(',')]
    return ' '.join([s for s in symptoms if s])


In [None]:
# Create audio directory if it doesn't exist
AUDIO_DIR = "audio_outputs"
if not os.path.exists(AUDIO_DIR):
    os.makedirs(AUDIO_DIR)

**Create the function for Text-to-Speech**

In [None]:
# Format text to focus on symptoms and conditions while preserving medical phrases
def format_text_for_speech(text):

    # Define medical phrases that should be preserved
    medical_phrases = [
        'pain in the chest',
        'shortness of breath',
        'ringing in the ears',
        'swelling of the joints',
        'pain in my right hand',
        'burning sensation in the chest',
        'difficulty in breathing',
        'loss of voice',
        'loss of height',
        'loss of appetite',
        'thinning of hair',
        'disorientation in time',
        'inability to sleep',
        'sensitivity to light',
        'burning sensation in the sternum',
        'bitter or sour taste',
        'difficulty establishing contact',
        'absence of gas',
        'absence of bowel movements',
        'shortness of breath',
        'ringing in the ears',
        'swelling of the joints',
        'pain in my right hand',
        'burning sensation in the chest',
        'difficulty in breathing',
        'loss of voice',
        'loss of height',
        'loss of appetite',
        'thinning of hair',
        'disorientation in time',
        'inability to sleep',
        'sensitivity to light',
        'shortness of breath on exertion'
        'burning sensation in the sternum',
        'bitter or sour taste',
        'difficulty establishing contact',
        'absence of gas',
        'absence of bowel movements',
        'shortness of breath on exertion',
        'pain in the chest',
        'chest pain',
        'shortness of breath',
        'ringing in the ears',
        'swelling of the joints',
        'pain in my right hand',
        'burning sensation in the chest',
        'difficulty in breathing',
        'loss of voice',
        'loss of height',
        'loss of appetite',
        'thinning of hair',
        'disorientation in time',
        'inability to sleep',
        'sensitivity to light',
        'burning sensation in the sternum',
        'bitter or sour taste',
        'difficulty establishing contact',
        'absence of gas',
        'absence of bowel movements',
        'bleeding outside menstruation',
        'difficulty breathing',
        'pain in the back',
        'back pain',
        'swelling of the joints',
        'joint swelling',
        'pain in the right hand',
        'right hand pain',
        'pain in the left hand',
        'left hand pain',
        'loss of appetite',
        'difficulty swallowing',
        'ringing in the ears',
        'blurred vision',
        'high blood pressure',
        'low blood pressure',
        'rapid heart rate',
        'irregular heartbeat',
        'muscle weakness',
        'chronic fatigue',
        'night sweats',
        'weight loss',
        'weight gain',
        'joint pain',
        'muscle pain',
        'sore throat',
        'runny nose',
        'stuffy nose',
        'abdominal pain',
        'stomach pain',
        'lower back pain',
        'upper back pain',
        'neck pain',
        'shoulder pain'
        'bleeding outside menstruation'
    ]
        
    # Replace spaces in medical phrases with underscores
    temp_text = text
    for phrase in sorted(medical_phrases, key=len, reverse=True):  # Sort by length to handle longer phrases first
        if phrase in temp_text.lower():
            temp_text = temp_text.replace(phrase, phrase.replace(' ', '_'))
    
    lines = temp_text.split('\n')
    speech_parts = []

    for line in lines:
        if "Recognized symptoms:" in line:
            symptoms = line.split(":", 1)[1].strip()
            speech_parts.append(f"Based on your symptoms: {symptoms}")
            
        elif "Possible conditions" in line:
            speech_parts.append("Here are the most likely conditions")
            
        elif line.strip().startswith('▶'):
            # Extract condition and confidence
            condition_line = line.replace('▶', '').strip()
            condition_name = condition_line.split('(')[0].strip()
            confidence = condition_line.split('(')[1].rstrip(')')
            speech_parts.append(f"A possible condition is {condition_name}, with {confidence}")
            
        elif line.strip().startswith('•'):
            # Handle bullet points for additional symptoms
            symptom = line.replace('•', '').strip()
            if symptom:
                # Convert underscores back to spaces for speech
                symptom = symptom.replace('_', ' ')
                speech_parts.append(symptom)
    
    # Join all parts and convert any remaining underscores back to spaces
    speech_text = ". ".join(speech_parts)
    speech_text = speech_text.replace('_', ' ')
    
    return speech_text

** Test Text-to-Speech **

In [None]:
# Test text to speech
test_text = """Recognized symptoms: pain in the chest, shortness of breath, difficulty in breathing

Possible conditions:

▶ Angina (85% confidence)
Additional symptoms:
• burning sensation in the chest
• pain in the left arm
• sweating"""

formatted_text = format_text_for_speech(test_text)
print(formatted_text)

In [None]:
def text_to_speech(text):
    try:
        if not text:
            return None
        
        # Format the text for better speech
        speech_text = format_text_for_speech(text)
        
        # Create a unique filename using timestamp
        timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
        audio_file = os.path.join(AUDIO_DIR, f"audio_response_{timestamp}.mp3")
        
        # Generate speech
        tts = gTTS(text=speech_text, lang="en", slow=False)
        tts.save(audio_file)
        
        return audio_file if os.path.exists(audio_file) else None
            
    except Exception as e:
        print(f"TTS Error: {str(e)}")
        return None

In [None]:
# Define test text to speech testing
def test_tts():
    """Test the text-to-speech functionality"""
    test_text = """Recognized symptoms: fever, headache, cough

Possible conditions:

▶ Common Cold (85% confidence)
Additional symptoms:
• Runny nose
• Sore throat
• Fatigue"""
    
    print("Testing text-to-speech...")
    print("\nInput text:")
    print(test_text)
    
    output_file = text_to_speech(test_text)
    if output_file and os.path.exists(output_file):
        print(f"\n✓ Successfully generated audio file: {output_file}")
        print(f"✓ File size: {os.path.getsize(output_file)} bytes")
    else:
        print("\n✗ Failed to generate audio file")

In [None]:
# Test audio generation and playback
def test_audio_generation():

    test_text = "This is a test of the text to speech system."
    
    print("Testing audio generation...")
    audio_file = text_to_speech(test_text)
    
    if audio_file and os.path.exists(audio_file):
        print(f"✓ Audio file successfully generated: {audio_file}")
        print(f"✓ File size: {os.path.getsize(audio_file)} bytes")
    else:
        print("✗ Audio file generation failed")

# Run the test
test_audio_generation()

In [None]:
# Verify Audio Generation
def process_audio(audio):
    try:
        text = transcribe(audio)
        response = chatbot_response(text)
        speech_file = text_to_speech(response)
        
        if not verify_audio(speech_file):
            print("Audio generation failed verification")
            return text, response, None
            
        return text, response, speech_file
        
    except Exception as e:
        print(f"Error in process_audio: {str(e)}")
        return "Error processing audio", "Error", None

In [None]:
def transcribe(audio):
    if audio is None:
        return "No audio detected."
    audio_path = audio  # Gradio provides an audio file path
    result = model.transcribe(audio_path)
    return result["text"]

In [None]:
#  Verify audio file is valid
def verify_audio(audio_path):

    try:
        if not audio_path or not os.path.exists(audio_path):
            return False
            
        # Check file size
        if os.path.getsize(audio_path) < 100:  # Too small to be valid
            return False
            
        return True
    except Exception:
        return False


** Create ChatBot Function **

In [None]:
def chatbot(message, history):
    """Chatbot function for Gradio"""
    if not message:
        return "Please enter some symptoms."
    
    try:
        return predict_disease(message)
    except Exception as e:
        return f"Error processing symptoms: {str(e)}\nPlease enter symptoms separated by commas."

** Create Gradio Interface **

In [None]:
with gr.Blocks(theme=gr.themes.Soft()) as demo:
    gr.Markdown("""
    # Medical Symptom Checker
    Enter your symptoms below or use voice input, and I'll help identify possible conditions.
    """)
    
    with gr.Row():
        with gr.Column():
            # Text input for symptoms
            text_input = gr.Textbox(
                label="Enter symptoms",
                placeholder="Example: fever, cough, headache",
                lines=3
            )
            
            # Voice input - updated parameters
            audio_input = gr.Audio(
                label="Or describe symptoms by voice",
                type="filepath",
                sources=["microphone"],
                streaming=False
            )
            
            with gr.Row():
                check_button = gr.Button("Check Symptoms")
        
        with gr.Column():
            # Analysis results output
            output = gr.Textbox(
                label="Analysis Results",
                lines=15
            )
            
            # Audio output for TTS
            with gr.Row():
                tts_button = gr.Button("🔊 Listen to Results")
                audio_output = gr.Audio(label="Audio Results")
    
    # Set up event handlers
    check_button.click(
        fn=chatbot,
        inputs=[text_input],
        outputs=[output]
    )
    
    # Auto-transcribe when audio is recorded
    audio_input.change(
        fn=transcribe,
        inputs=[audio_input],
        outputs=[text_input]
    )
    
    tts_button.click(
        fn=text_to_speech,
        inputs=[output],
        outputs=[audio_output]
    )
    
    # Add examples
    gr.Examples(
        examples=[
            ["fever, cough, headache"],
            ["joint pain, swelling, redness"],
            ["nausea, vomiting, dizziness"]
        ],
        inputs=text_input
    )

    # Add detailed disclaimer
    gr.Markdown("""
    ### Important Notes:
    - This tool helps identify potential conditions based on symptoms
    - For each condition, additional symptoms are listed to help you better understand what to look for
    - The confidence score indicates how well your symptoms match known patterns
    - The symptom match percentage shows how many of the typical symptoms you're experiencing
    - This is not a diagnostic tool - always consult healthcare professionals for proper medical evaluation
    """)



** Launch the application **

In [None]:
# Launch the interface
if __name__ == "__main__":
    demo.launch(share=True)

### Tuning the parameters for the model using GridSearchCV

In [None]:
# Improving RF model with GSCV
# Create the parameter grid for the GridSearchCV model running Random Forest
param_grid_rf = {
    'n_estimators': [50, 100, 200],  # Number of trees in the forest
    'max_features': ['auto', 'sqrt', 'log2'],  # Number of features to consider at each split
    'max_depth': [None, 10, 20, 30],  # Maximum depth of the tree
    'min_samples_split': [2, 5, 10],  # Minimum number of samples required to split an internal node
    'min_samples_leaf': [1, 2, 4],  # Minimum number of samples required to be at a leaf node
    'class_weight': [None, 'balanced'], # Adjust weights inversely proportional to class frequencies
    'bootstrap': [True, False]  # Whether bootstrap samples are used when building trees
}

# create the grid_tuned model and the grid search estimator
grid_model_rf = RandomForestClassifier()
grid_clf_rf = GridSearchCV(grid_model_rf, param_grid_rf, verbose=3)

# Fit the model by using the grid search estimator.
# This will take the Random Forest model and try each combination of parameters.
grid_clf_rf.fit(X_train, y_train)

In [None]:

# List the best parameters for this dataset
print(f"Best Parameters for the Random Forest model is: \n{grid_clf_rf.best_params_}")

# Create Predictions with the best model
grid_y_pred_rf = grid_clf_rf.predict(X_test)

# Print the classification report for the best model
class_report_rf = classification_report(y_test, grid_y_pred_rf)
print(f"\n Classification Report for the best Random Forest model is: \n{class_report_rf}")

# Print the best models score
print(f"\n Random Forest Model Best Score is: {grid_clf_rf.best_score_}")

# Calculate Accuracy Score for the best model
accuracy_rf_best = accuracy_score(y_test, grid_y_pred_rf)
print(f"\n Accuracy Score for the best Random Forest model: {accuracy_rf_best}")