In [1]:
# Importing necessary libraries
import numpy as np
import pandas as pd
import gradio as gr
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences


In [2]:
# Step 1: Prepare the dataset
data = {
    'Symptoms': [
        'Fever, cough',
        'Headache, fever, fatigue',
        'Chest pain, shortness of breath',
        'Vomiting, nausea',
        'Cough, sore throat, fatigue',
        'Severe headache, nausea, vomiting',
        'Dizziness, nausea, blurred vision',
        'Joint pain, rash, fever',
        'Fatigue, swollen lymph nodes',
        'Itchy skin, rash, fever'
    ],
    'Disease': [
        'Common Cold',
        'Flu',
        'Heart Disease',
        'Food Poisoning',
        'Common Cold',
        'Migraine',
        'Vertigo',
        'Rheumatic Fever',
        'HIV/AIDS',
        'Chickenpox'
    ]
}

# Convert data into a pandas DataFrame
df = pd.DataFrame(data)

# Show the first few rows of the dataset
df.head()


Unnamed: 0,Symptoms,Disease
0,"Fever, cough",Common Cold
1,"Headache, fever, fatigue",Flu
2,"Chest pain, shortness of breath",Heart Disease
3,"Vomiting, nausea",Food Poisoning
4,"Cough, sore throat, fatigue",Common Cold


In [5]:
# Step 2: Preprocess the data
# Convert symptoms to lowercase and split by commas
df['Symptoms'] = df['Symptoms'].apply(lambda x: x.lower().split(', '))

# Encode the diseases into numerical labels
label_encoder = LabelEncoder()
df['Disease'] = label_encoder.fit_transform(df['Disease'])

# Show the processed data
df.head()


Unnamed: 0,Symptoms,Disease
0,"[fever, cough]",1
1,"[headache, fever, fatigue]",2
2,"[chest pain, shortness of breath]",5
3,"[vomiting, nausea]",3
4,"[cough, sore throat, fatigue]",1


In [7]:
# Step 3: Tokenize the symptoms
tokenizer = Tokenizer()
tokenizer.fit_on_texts(df['Symptoms'].apply(lambda x: ' '.join(x)))

# Convert symptoms to sequences of integers
X = tokenizer.texts_to_sequences(df['Symptoms'])
X = pad_sequences(X, padding='post')

# Labels (Disease)
y = df['Disease'].values

# Show the tokenized symptoms and diseases
X[:5], y[:5]


(array([[1, 4, 0],
        [5, 1, 2],
        [0, 0, 0],
        [7, 3, 0],
        [4, 2, 0]]),
 array([1, 2, 5, 3, 1]))

In [9]:
# Step 4: Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Show the shapes of the train/test sets
X_train.shape, X_test.shape, y_train.shape, y_test.shape


((8, 3), (2, 3), (8,), (2,))

In [33]:
# Step 5: Create the neural network model
model = Sequential()
model.add(Dense(128, input_dim=X_train.shape[1], activation='relu'))  # Input layer
model.add(Dense(64, activation='relu'))  # Hidden layer
model.add(Dense(len(label_encoder.classes_), activation='softmax'))  # Output layer

# Compile the model
model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

# Show the model summary to inspect the architecture
model.summary()


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [169]:
# Step 6: Train the model
history = model.fit(X_train, y_train, epochs=10, batch_size=4, validation_data=(X_test, y_test))

# Show training history (optional: can plot this later to visualize loss/accuracy over epochs)
history.history


Epoch 1/10
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 463ms/step - accuracy: 0.9167 - loss: 0.1779 - val_accuracy: 0.0000e+00 - val_loss: 9.2340
Epoch 2/10
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 181ms/step - accuracy: 0.9167 - loss: 0.1766 - val_accuracy: 0.0000e+00 - val_loss: 9.2367
Epoch 3/10
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 186ms/step - accuracy: 0.9167 - loss: 0.1748 - val_accuracy: 0.0000e+00 - val_loss: 9.2393
Epoch 4/10
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 167ms/step - accuracy: 0.9167 - loss: 0.1756 - val_accuracy: 0.0000e+00 - val_loss: 9.2421
Epoch 5/10
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 174ms/step - accuracy: 0.7500 - loss: 0.1835 - val_accuracy: 0.0000e+00 - val_loss: 9.2460
Epoch 6/10
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 185ms/step - accuracy: 0.9167 - loss: 0.1783 - val_accuracy: 0.0000e+00 - val_loss: 9.2517
Epoch 7/10
[1m2

{'accuracy': [0.875,
  0.875,
  0.875,
  0.875,
  0.75,
  0.875,
  0.875,
  0.875,
  0.875,
  0.875],
 'loss': [0.17949283123016357,
  0.17921561002731323,
  0.1788901686668396,
  0.17858070135116577,
  0.18190675973892212,
  0.181292325258255,
  0.17727896571159363,
  0.18043366074562073,
  0.17756488919258118,
  0.17929516732692719],
 'val_accuracy': [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
 'val_loss': [9.233972549438477,
  9.23666000366211,
  9.23934555053711,
  9.242137908935547,
  9.246033668518066,
  9.251728057861328,
  9.255892753601074,
  9.263792037963867,
  9.269096374511719,
  9.272887229919434]}

In [171]:
# Step 7: Evaluate the model
loss, accuracy = model.evaluate(X_test, y_test)
print(f"Test Accuracy: {accuracy}")


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 130ms/step - accuracy: 0.0000e+00 - loss: 9.2729
Test Accuracy: 0.0


In [172]:
# Step 8: Making Predictions
def predict_disease(symptoms_input):
    symptoms_input = symptoms_input.lower().split(', ')  # Preprocess input
    seq = tokenizer.texts_to_sequences([symptoms_input])
    seq = pad_sequences(seq, padding='post', maxlen=X_train.shape[1])

    prediction = model.predict(seq)
    predicted_class = np.argmax(prediction, axis=1)

    # Decode the predicted label
    predicted_disease = label_encoder.inverse_transform(predicted_class)

    return predicted_disease[0]

# Example usage
user_input = "fever, cough"
disease = predict_disease(user_input)
print(f"The predicted disease is: {disease}")


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 122ms/step
The predicted disease is: Common Cold


In [175]:
# Example of creating a simple chatbot interface
def chat():
    print("Hello! I am a chatbot that can help you predict diseases based on your symptoms.")
    while True:
        user_input = input("Please enter your symptoms (comma separated): ")
        if user_input.lower() == 'exit':
            print("Goodbye!")
            break
        disease = predict_disease(user_input)
        print(f"The predicted disease is: {disease}")

# To start the chat, simply call the chat function (uncomment the line below to test it)
# chat()


In [177]:
def predict_disease(symptoms_input, history): 
    symptoms_input = symptoms_input.lower().split(', ')  # Preprocess input
    seq = tokenizer.texts_to_sequences([symptoms_input])
    seq = pad_sequences(seq, padding='post', maxlen=X_train.shape[1])

    prediction = model.predict(seq)
    predicted_class = np.argmax(prediction, axis=1)

    predicted_disease = label_encoder.inverse_transform(predicted_class)

    return predicted_disease[0]

Textbox=gr.Textbox(label="Enter Symptoms")

interface = gr.ChatInterface(
    fn=predict_disease,
    #inputs=Textbox,
    #outputs=Textbox,
    title="DocRoboto",
    description="sickness predictor based on symptoms entered by the user."
)


interface.launch(share=True, inline=True)



* Running on local URL:  http://127.0.0.1:7861

Could not create share link. Please check your internet connection or our status page: https://status.gradio.app.


