In [1]:
import pandas as pd
data= pd.read_csv("dataset.csv")
data.head()

Unnamed: 0,Disease,Symptom_1,Symptom_2,Symptom_3,Symptom_4,Symptom_5,Symptom_6,Symptom_7,Symptom_8,Symptom_9,Symptom_10,Symptom_11,Symptom_12,Symptom_13,Symptom_14,Symptom_15,Symptom_16,Symptom_17
0,Fungal infection,itching,skin_rash,nodal_skin_eruptions,dischromic _patches,,,,,,,,,,,,,
1,Fungal infection,skin_rash,nodal_skin_eruptions,dischromic _patches,,,,,,,,,,,,,,
2,Fungal infection,itching,nodal_skin_eruptions,dischromic _patches,,,,,,,,,,,,,,
3,Fungal infection,itching,skin_rash,dischromic _patches,,,,,,,,,,,,,,
4,Fungal infection,itching,skin_rash,nodal_skin_eruptions,,,,,,,,,,,,,,


In [2]:
data.columns

Index(['Disease', 'Symptom_1', 'Symptom_2', 'Symptom_3', 'Symptom_4',
       'Symptom_5', 'Symptom_6', 'Symptom_7', 'Symptom_8', 'Symptom_9',
       'Symptom_10', 'Symptom_11', 'Symptom_12', 'Symptom_13', 'Symptom_14',
       'Symptom_15', 'Symptom_16', 'Symptom_17'],
      dtype='object')

In [3]:
data.fillna("", inplace=True)

In [4]:
#merging all symptoms into one cell for easier processing
symptom_cols = [col for col in data.columns if col != 'Disease']

data['all_symptoms_text'] = data[symptom_cols].apply(
    lambda row: " ".join(row.values.astype(str)), axis=1
)
data['all_symptoms_text'].head()

0    itching  skin_rash  nodal_skin_eruptions  disc...
1     skin_rash  nodal_skin_eruptions  dischromic _...
2    itching  nodal_skin_eruptions  dischromic _pat...
3    itching  skin_rash  dischromic _patches       ...
4    itching  skin_rash  nodal_skin_eruptions      ...
Name: all_symptoms_text, dtype: object

In [5]:
import nltk
import string
from nltk.corpus import stopwords

def clean_text(text):
    text = text.lower()
    text = ''.join(c for c in text if c not in string.punctuation)
    words = text.split()
    words = [w for w in words if w not in stopwords.words('english')]
    return " ".join(words)

data['cleaned_symptoms'] = data['all_symptoms_text'].apply(clean_text)

In [6]:
from sklearn.feature_extraction.text import TfidfVectorizer

tfidf = TfidfVectorizer(max_features=5000)
X = tfidf.fit_transform(data['cleaned_symptoms']).toarray()
y = data['Disease']

In [7]:
from sklearn.preprocessing import LabelEncoder

le = LabelEncoder()
y_encoded = le.fit_transform(y)

In [8]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(
    X, y_encoded, test_size=0.2, random_state=42
)

In [9]:
from sklearn.linear_model import LogisticRegression

model = LogisticRegression(max_iter=1000)
model.fit(X_train, y_train)

0,1,2
,penalty,'l2'
,dual,False
,tol,0.0001
,C,1.0
,fit_intercept,True
,intercept_scaling,1
,class_weight,
,random_state,
,solver,'lbfgs'
,max_iter,1000


In [10]:
from sklearn.metrics import accuracy_score

y_pred = model.predict(X_test)
print("Accuracy:", accuracy_score(y_test, y_pred))

Accuracy: 1.0


In [11]:
ss= pd.read_csv("Symptom-severity.csv")
sd= pd.read_csv("symptom_description.csv")
sp= pd.read_csv("symptom_precaution.csv")

build medical severity dictionary

In [12]:
severity_dict = dict(zip(ss['Symptom'].str.lower(), ss['weight']))

user symptom -> severity score fn

In [13]:
#cnvrt score to risk level using if-then logic
def severity_level(score):
    if score <= 6:
        return "Low"
    elif score <= 10:
        return "Moderate"
    elif score <= 14:
        return "High"
    else:
        return "Critical"

In [14]:
def calculate_severity(symptom_list):
    total_score = 0
    matched = []

    for symptom in symptom_list:
        s = symptom.lower().strip().replace(" ", "_")
        if s in severity_dict:
            total_score += severity_dict[s]
            matched.append((s, severity_dict[s]))

    level = severity_level(total_score)
    return total_score, level, matched

In [15]:
def symptom_severity_analyzer(user_symptoms):
    score,level, details = calculate_severity(user_symptoms)
    level = severity_level(score)

    print("Symptom Severity Analysis")
    print("-" * 40)
    for s, w in details:
        print(f"{s}  →  weight {w}")
    print("-" * 40)
    print("Total Risk Score :", score)
    print("Severity Level   :", level)
    print("Details : ",details)
    return score, level


In [16]:
user_input = ["mild_fever", "headache", "vomiting", "chest pain"]
symptom_severity_analyzer(user_input)

Symptom Severity Analysis
----------------------------------------
mild_fever  →  weight 5
headache  →  weight 3
vomiting  →  weight 5
chest_pain  →  weight 7
----------------------------------------
Total Risk Score : 20
Severity Level   : Critical
Details :  [('mild_fever', 5), ('headache', 3), ('vomiting', 5), ('chest_pain', 7)]


(20, 'Critical')

In [17]:
def predict_disease(user_symptoms_text):
    X = tfidf.transform([user_symptoms_text])
    probs = model.predict_proba(X)[0]
    class_index = probs.argmax()

    encoded_label = model.classes_[class_index]
    disease_name = le.inverse_transform([encoded_label])[0]
    confidence = probs[class_index]

    return disease_name, confidence


In [18]:
def get_description(disease):
    row = sd[sd['Disease'] == disease]
    if not row.empty:
        return row['Description'].values[0]
    return "No description available."


In [19]:
def get_precautions(disease):
    row = sp[sp['Disease'] == disease]
    if not row.empty:
        return row.iloc[0, 1:].values.tolist()
    return []


In [20]:
def health_report(user_symptoms_list):
    text_input = " ".join(user_symptoms_list)

    disease, confidence = predict_disease(text_input)
    score, level,details = calculate_severity(user_symptoms_list)
    description = get_description(disease)
    precautions = get_precautions(disease)

    print("\n AI HEALTH ANALYSIS REPORT\n")
    print("Predicted Disease :", disease)
    print("Confidence        :", round(confidence*100,2), "%")
    print("Severity Level    :", level)
    print("Risk Score        :", score)
    print("Details : ",details)

    print("\nDisease Description:")
    print(description)

    print("\nPrecautions:")
    for i, p in enumerate(precautions, 1):
        print(f"{i}. {p}")



In [21]:
user_symptoms = ["high_fever", "joint_pain", "headache", "vomiting"]
health_report(user_symptoms)
#malaria
#health_report(["high_fever", "chills", "headache", "vomiting"])


 AI HEALTH ANALYSIS REPORT

Predicted Disease : Paralysis (brain hemorrhage)
Confidence        : 18.85 %
Severity Level    : Critical
Risk Score        : 18
Details :  [('high_fever', 7), ('joint_pain', 3), ('headache', 3), ('vomiting', 5)]

Disease Description:
Intracerebral hemorrhage (ICH) is when blood suddenly bursts into brain tissue, causing damage to your brain. Symptoms usually appear suddenly during ICH. They include headache, weakness, confusion, and paralysis, particularly on one side of your body.

Precautions:
1. massage
2. eat healthy
3. exercise
4. consult doctor


In [22]:
med_df = pd.read_csv("medicine_dataset.csv", low_memory=False)

In [23]:
#convert txt cols to string
for col in med_df.columns:
    med_df[col] = med_df[col].astype(str)

In [24]:
#replace nan with empty
med_df.replace("nan", "", inplace=True)

In [25]:
med_df[['name','use0','use1','Therapeutic Class']].head()

Unnamed: 0,name,use0,use1,Therapeutic Class
0,augmentin 625 duo tablet,Treatment of Bacterial infections,,ANTI INFECTIVES
1,azithral 500 tablet,Treatment of Bacterial infections,,ANTI INFECTIVES
2,ascoril ls syrup,Treatment of Cough with mucus,,RESPIRATORY
3,allegra 120mg tablet,Treatment of Sneezing and runny nose due to al...,Treatment of Allergic conditions,RESPIRATORY
4,avil 25 tablet,Treatment of Allergic conditions,,RESPIRATORY


In [26]:
#mapping disease with medicine
def get_medicines_for_disease(disease):
    disease = disease.lower().replace(" ", "_")

    use_cols = [f'use{i}' for i in range(5)]
    
    mask = med_df[use_cols].apply(
        lambda row: any(disease in str(cell).lower() for cell in row),
        axis=1
    )
    
    return med_df[mask]

In [27]:
#testing
get_medicines_for_disease("malaria").head()

Unnamed: 0,id,name,substitute0,substitute1,substitute2,substitute3,substitute4,sideEffect0,sideEffect1,sideEffect2,...,sideEffect41,use0,use1,use2,use3,use4,Chemical Class,Habit Forming,Therapeutic Class,Action Class
1009,1010,azunate 60mg injection,Arteross 60mg Injection,Falciart 60mg Injection,Falsitis AR Injection,Leonate 60mg Injection,Neosunate 60mg Injection,Headache,Dizziness,Weakness,...,,Malaria,,,,,Sesquiterpene lactones,No,ANTI MALARIALS,Antimalarial- Artemisinin and derivatives
1779,1780,azunate l 480 mg/80 mg tablet,,,,,,Headache,Loss of appetite,Dizziness,...,,Treatment of Malaria,,,,,,No,ANTI MALARIALS,
1870,1871,azunate l 480 mg/80 mg tablet,,,,,,Headache,Loss of appetite,Dizziness,...,,Treatment of Malaria,,,,,,No,ANTI MALARIALS,
2004,2005,azunate 120mg injection,Tesunate 120mg Injection,Artigo 120mg Injection,Falsu 120mg Injection,Combither AT 120mg Injection,D-Sunate Injection,Headache,Dizziness,Weakness,...,,Malaria,,,,,Sesquiterpene lactones,No,ANTI MALARIALS,Antimalarial- Artemisinin and derivatives
2090,2091,artether l 80 mg/480 mg tablet,Combither Forte 80 mg/480 mg Tablet,Lumether Forte Tablet,Rezatrin 80 mg/480 mg Tablet,Actizo 80 mg/480 mg Tablet,Lumex Forte 80 mg/480 mg Tablet,Headache,Loss of appetite,Dizziness,...,,Treatment of Malaria,,,,,,No,ANTI MALARIALS,


In [28]:
#side effect risk scoring for print safe med first
side_cols = [f'sideEffect{i}' for i in range(42)]

def count_side_effects(row):
    return sum(1 for col in side_cols if row[col] != "")

In [29]:
#severity based filter
def severity_filter(df, severity):
    if severity == "Low":
        return df[df['Habit Forming'] == "No"]
    
    elif severity == "Moderate":
        return df
    
    elif severity == "High":
        return df[df['Therapeutic Class'].str.contains(
            "Antibiotic|Antiviral|Analgesic|Antipyretic", case=False, na=False)]
    
    else:  # Critical
        return df[df['Action Class'].str.contains(
            "Injection|IV|Emergency|Hospital", case=False, na=False)]


In [30]:
#med recm ngn
def recommend_medicines(disease, severity):
    candidates = get_medicines_for_disease(disease)
    
    if candidates.empty:
        return "No safe medicine found. Immediate doctor consultation required."
    
    filtered = severity_filter(candidates, severity)
    
    if filtered.empty:
        return "Disease detected but severity requires hospital care."
    
    filtered = filtered.copy()
    filtered.loc[:, 'side_effect_count'] = filtered.apply(count_side_effects, axis=1)
    best = filtered.sort_values('side_effect_count').head(3)
    
    return best[['name', 'Therapeutic Class', 'side_effect_count', 
                 'substitute0', 'substitute1', 'substitute2']]


In [31]:
def health_report(user_symptoms):
    text_input = " ".join(user_symptoms)

    disease, confidence = predict_disease(text_input)
    score, level,details = calculate_severity(user_symptoms)
    description = get_description(disease)
    precautions = get_precautions(disease)
    medicines = recommend_medicines(disease, level)

    print("\nAI CLINICAL REPORT \n")
    print("Predicted Disease :", disease)
    print("Confidence        :", round(confidence*100,2), "%")
    print("Severity Level    :", level)
    print("Risk Score        :", score)
    print("Details : ",details)
    print("\nDisease Info:")
    print(description)

    print("\nPrecautions:")
    for p in precautions:
        print("•", p)

    print("\nRecommended Medicines:")
    print(medicines)

    print("\nThis system is for educational use only. Consult a doctor.")


In [32]:
health_report(["high_fever", "chills", "vomiting", "headache"])


AI CLINICAL REPORT 

Predicted Disease : Malaria
Confidence        : 26.81 %
Severity Level    : Critical
Risk Score        : 18
Details :  [('high_fever', 7), ('chills', 3), ('vomiting', 5), ('headache', 3)]

Disease Info:
An infectious disease caused by protozoan parasites from the Plasmodium family that can be transmitted by the bite of the Anopheles mosquito or by a contaminated needle or transfusion. Falciparum malaria is the most deadly type.

Precautions:
• Consult nearest hospital
• avoid oily food
• avoid non veg food
• keep mosquitos out

Recommended Medicines:
                         name Therapeutic Class  side_effect_count  \
157400     malart 60mg tablet    ANTI MALARIALS                  3   
212163     sunate 50mg tablet    ANTI MALARIALS                  3   
233348  versaquin 50mg tablet    ANTI MALARIALS                  3   

                substitute0            substitute1        substitute2  
157400  Arsunex 60mg Tablet   Barinate 60mg Tablet  Nomal 60mg Table

In [33]:
health_report(["high_fever", "joint_pain", "muscle_pain", "headache", "skin_rash", "vomiting"])


AI CLINICAL REPORT 

Predicted Disease : Paralysis (brain hemorrhage)
Confidence        : 18.85 %
Severity Level    : Critical
Risk Score        : 23
Details :  [('high_fever', 7), ('joint_pain', 3), ('muscle_pain', 2), ('headache', 3), ('skin_rash', 3), ('vomiting', 5)]

Disease Info:
Intracerebral hemorrhage (ICH) is when blood suddenly bursts into brain tissue, causing damage to your brain. Symptoms usually appear suddenly during ICH. They include headache, weakness, confusion, and paralysis, particularly on one side of your body.

Precautions:
• massage
• eat healthy
• exercise
• consult doctor

Recommended Medicines:
No safe medicine found. Immediate doctor consultation required.

This system is for educational use only. Consult a doctor.


In [34]:
health_report(["high_fever", "abdominal_pain", "constipation", "fatigue", "headache"])


AI CLINICAL REPORT 

Predicted Disease : Typhoid
Confidence        : 30.02 %
Severity Level    : Critical
Risk Score        : 22
Details :  [('high_fever', 7), ('abdominal_pain', 4), ('constipation', 4), ('fatigue', 4), ('headache', 3)]

Disease Info:
An acute illness characterized by fever caused by infection with the bacterium Salmonella typhi. Typhoid fever has an insidious onset, with fever, headache, constipation, malaise, chills, and muscle pain. Diarrhea is uncommon, and vomiting is not usually severe.

Precautions:
• eat high calorie vegitables
• antiboitic therapy
• consult doctor
• medication

Recommended Medicines:
                              name Therapeutic Class  side_effect_count  \
2321    ampicillin 125mg dry syrup   ANTI INFECTIVES                  3   
202499        roscillin 100mg drop   ANTI INFECTIVES                  3   
199619   roscillin 125mg dry syrup   ANTI INFECTIVES                  3   

                    substitute0              substitute1       s

In [35]:
health_report(["chest_pain", "breathlessness", "cough", "high_fever", "fatigue"])


AI CLINICAL REPORT 

Predicted Disease : Bronchial Asthma
Confidence        : 39.9 %
Severity Level    : Critical
Risk Score        : 26
Details :  [('chest_pain', 7), ('breathlessness', 4), ('cough', 4), ('high_fever', 7), ('fatigue', 4)]

Disease Info:
Bronchial asthma is a medical condition which causes the airway path of the lungs to swell and narrow. Due to this swelling, the air path produces excess mucus making it hard to breathe, which results in coughing, short breath, and wheezing. The disease is chronic and interferes with daily working.

Precautions:
• switch to loose cloothing
• take deep breaths
• get away from trigger
• seek help

Recommended Medicines:
No safe medicine found. Immediate doctor consultation required.

This system is for educational use only. Consult a doctor.


In [36]:
health_report(["joint_pain", "swelling", "stiffness", "movement_pain"])


AI CLINICAL REPORT 

Predicted Disease : Urinary tract infection
Confidence        : 3.32 %
Severity Level    : Low
Risk Score        : 3
Details :  [('joint_pain', 3)]

Disease Info:
Urinary tract infection: An infection of the kidney, ureter, bladder, or urethra. Abbreviated UTI. Not everyone with a UTI has symptoms, but common symptoms include a frequent urge to urinate and pain or burning when urinating.

Precautions:
• drink plenty of water
• increase vitamin c intake
• drink cranberry juice
• take probiotics

Recommended Medicines:
No safe medicine found. Immediate doctor consultation required.

This system is for educational use only. Consult a doctor.


In [37]:
import pickle

pickle.dump(model, open("disease_model.pkl", "wb"))
pickle.dump(tfidf, open("vectorizer.pkl", "wb"))
pickle.dump(le, open("label_encoder.pkl", "wb"))

In [39]:

med = pd.read_csv("medicine_dataset.csv")
diseases = pd.read_csv("dataset.csv")['Disease'].str.lower().unique()

use_cols = ['use0','use1','use2','use3','use4']

mask = med[use_cols].apply(
    lambda row: any(d in " ".join(row.astype(str)).lower() for d in diseases),
    axis=1
)

filtered_med = med[mask]
filtered_med.to_csv("medicine_filtered.csv", index=False)

  med = pd.read_csv("medicine_dataset.csv")
