# **Step 1: Import the necessary libraries**


In [7]:
import numpy as np 
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.svm import LinearSVC
from sklearn.pipeline import Pipeline

# **Step 2: Main dataset cleaning**


In [8]:
# Load the dataset
df = pd.read_csv('dataset.csv')
df.head()

# Fill missing values
df.isnull().sum()
df = df.fillna("")
df['Symptom'] = ""
for i in range(1, 18):
    df['s'] = df["Symptom_{}".format(i)]
    df['Symptom'] = df['Symptom'] + df['s']
df.head(10)

for i in range(1, 18):
    df = df.drop("Symptom_{}".format(i), axis=1)
df = df.drop("s", axis=1)

# **Step 3: Refining Description and Precaution Datasets**


In [9]:
# Load symptom description and precaution data
ds = pd.read_csv('symptom_description.csv')
ds.head(10)
ds.isnull().sum()

ds.index = ds['Disease']
ds = ds.drop('Disease', axis=1)
ds.head(10)

pr = pd.read_csv('symptom_precaution.csv')
pr.head(10)
pr.isnull().sum()
pr = pr.fillna("")
pr.head(10)

pr['precautions'] = ""
pr['punc'] = ', '
for i in range(1, 5):
    pr['s'] = pr["Precaution_{}".format(i)] + pr['punc']
    pr['precautions'] = pr['precautions'] + pr['s']
pr.head(10)

for i in range(1, 5):
    pr = pr.drop("Precaution_{}".format(i), axis=1)
pr = pr.drop(['s', 'punc'], axis=1)

pr.index = pr['Disease']
pr = pr.drop('Disease', axis=1)
pr.head()

Unnamed: 0_level_0,precautions
Disease,Unnamed: 1_level_1
Drug Reaction,"stop irritation, consult nearest hospital, sto..."
Malaria,"Consult nearest hospital, avoid oily food, avo..."
Allergy,"apply calamine, cover area with bandage, , use..."
Hypothyroidism,"reduce stress, exercise, eat healthy, get prop..."
Psoriasis,"wash hands with warm soapy water, stop bleedin..."


# **Step 4: Model Training**


In [10]:
# Prepare features and labels
X = df['Symptom']
y = df['Disease']
X.head(10)
y.head(10)

# Train-test split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, shuffle=True, random_state=44)

print('Training Data Shape:', X_train.shape)
print('Testing Data Shape: ', X_test.shape)

# Feature extraction using TF-IDF

vectorizer = TfidfVectorizer()

X_train_tfidf = vectorizer.fit_transform(X_train)
X_train_tfidf.shape

# Train the model

text_clf = Pipeline([('tfidf', TfidfVectorizer()), ('clf', LinearSVC())])
text_clf.fit(X_train, y_train)

Training Data Shape: (3690,)
Testing Data Shape:  (1230,)




# **Step 5: Prediction**


In [12]:
# Prediction function for input symptoms
def predict_disease(input_symptoms):
    try:
        prediction = text_clf.predict([input_symptoms])[0]
        return prediction
    except Exception as e:
        return str(e)
    
symptom_severity_df = pd.read_csv('symptom_severity.csv')
# Fetch severity based on symptoms
def calculate_severity(input_symptoms):
    severity_score = 0
    symptoms = input_symptoms.split(', ')
    
    for symptom in symptoms:
        if symptom in symptom_severity_df['Symptom'].values:
            severity_score += symptom_severity_df[symptom_severity_df['Symptom'] == symptom]['weight'].values[0]

    # Categorize severity score into 4-5 different categories
    if severity_score <= 5:
        return "Low"
    elif 5 < severity_score <= 10:
        return "Medium"
    elif 10 < severity_score <= 15:
        return "High"
    else:
        return "Very High"

# Example usage for prediction
input_symptoms = "itching,vomiting,dark_urine"
predicted_disease = predict_disease(input_symptoms)
severity = calculate_severity(input_symptoms)

# Fetch description and precautions for the predicted disease
description = ds.loc[predicted_disease].values[0] if predicted_disease in ds.index else "No description available."
precautions = pr.loc[predicted_disease].values[0] if predicted_disease in pr.index else "No precautions available."

print("Predicted Disease:", predicted_disease)
print("Description:", description)
print("Precautions:", precautions)
print("Severity:", severity)

Predicted Disease: Jaundice
Description: Yellow staining of the skin and sclerae (the whites of the eyes) by abnormally high blood levels of the bile pigment bilirubin. The yellowing extends to other tissues and body fluids. Jaundice was once called the "morbus regius" (the regal disease) in the belief that only the touch of a king could cure it
Precautions: drink plenty of water, consume milk thistle, eat fruits and high fiberous food, medication, 
Severity: Low
