In [8]:
import pickle
import pandas as pd
import numpy as np 
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.feature_extraction.text import CountVectorizer
from flask import Flask, request, render_template, jsonify

Importing all important packages for the model

In [9]:
df = pd.read_csv("./dataset.csv", encoding="latin-1")
print(df.head(5))

                       disease  \
0               Panic disorder   
1             Vocal cord polyp   
2              Turner syndrome   
3               Cryptorchidism   
4  Ethylene glycol poisoning-1   

                                            symptoms  \
0  Palpitations, Sweating, Trembling, Shortness o...   
1           Hoarseness, Vocal Changes, Vocal Fatigue   
2  Short stature, Gonadal dysgenesis, Webbed neck...   
3  Absence or undescended testicle(s), empty scro...   
4  Nausea, vomiting, abdominal pain, General mala...   

                                         precautions  
0  Antidepressant medications, Cognitive Behavior...  
1       Voice Rest, Speech Therapy, Surgical Removal  
2  Growth hormone therapy, Estrogen replacement t...  
3  Observation and monitoring (in cases of mild o...  
4  Supportive Measures, Gastric Decontamination, ...  


initializing and printing out all the important files required for the project

In [10]:
model = LinearRegression()

with open('model.pkl','wb') as file:
    pickle.dump(model,file)

opening a file model.pkl

In [11]:
with open('model.pkl','rb') as file:
    model = pickle.load(file)

    print("Model loaded successfully!")
    print("Loaded Model:", model)

Model loaded successfully!
Loaded Model: LinearRegression()


data spliting and fitting the model

In [12]:
label_encoder = LabelEncoder()
df['disease_encoded'] = label_encoder.fit_transform(df['disease'])

# Tokenize symptoms
vectorizer = CountVectorizer(tokenizer=lambda x: x.split(', '))
X = vectorizer.fit_transform(df['symptoms'])

# Step 3: Split data into features and target
y = df['disease_encoded']

# Step 4: Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Step 5: Fit the linear regression model
model = LinearRegression()
model.fit(X_train, y_train)




In [13]:
 # Sample input symptom for prediction
input_symptom = "Abdominal or back pain, unexplained weight loss, hormonal imbalances"

# Preprocess the input symptom using the trained CountVectorizer
X_input = vectorizer.transform([input_symptom])

print(X_input)

# Predict the disease based on the input symptom
predicted_disease_code = model.predict(X_input)
predicted_disease = label_encoder.inverse_transform([int(predicted_disease_code)])

print(predicted_disease_code, predicted_disease)

print(f"The predicted disease based on the input symptom is: {predicted_disease[0]}")

  (0, 2)	1
  (0, 522)	1
  (0, 1124)	1
[18.0006158] ['Adrenal Cancer']
The predicted disease based on the input symptom is: Adrenal Cancer


  predicted_disease = label_encoder.inverse_transform([int(predicted_disease_code)])


In [14]:

# constant_symptoms = "itching,skin_rash,nodal_skin_eruptions,continuous_sneezing,shivering,chills,joint_pain,stomach_pain,acidity,ulcers_on_tongue,muscle_wasting,vomiting,burning_micturition,spotting_ urination,fatigue,weight_gain,anxiety,cold_hands_and_feets,mood_swings,weight_loss,restlessness,lethargy,patches_in_throat,irregular_sugar_level,cough,high_fever,sunken_eyes,breathlessness,sweating,dehydration,indigestion,headache,yellowish_skin,dark_urine,nausea,loss_of_appetite,pain_behind_the_eyes,back_pain,constipation,abdominal_pain,diarrhoea,mild_fever,yellow_urine,yellowing_of_eyes,acute_liver_failure,fluid_overload,swelling_of_stomach,swelled_lymph_nodes,malaise,blurred_and_distorted_vision,phlegm,throat_irritation,redness_of_eyes,sinus_pressure,runny_nose,congestion,chest_pain,weakness_in_limbs,fast_heart_rate,pain_during_bowel_movements,pain_in_anal_region,bloody_stool,irritation_in_anus,neck_pain,dizziness,cramps,bruising,obesity,swollen_legs,swollen_blood_vessels,puffy_face_and_eyes,enlarged_thyroid,brittle_nails,swollen_extremeties,excessive_hunger,extra_marital_contacts,drying_and_tingling_lips,slurred_speech,knee_pain,hip_joint_pain,muscle_weakness,stiff_neck,swelling_joints,movement_stiffness,spinning_movements,loss_of_balance,unsteadiness,weakness_of_one_body_side,loss_of_smell,bladder_discomfort,foul_smell_of urine,continuous_feel_of_urine,passage_of_gases,internal_itching,toxic_look_(typhos),depression,irritability,muscle_pain,altered_sensorium,red_spots_over_body,belly_pain,abnormal_menstruation,dischromic _patches,watering_from_eyes,increased_appetite,polyuria,family_history,mucoid_sputum,rusty_sputum,lack_of_concentration,visual_disturbances,receiving_blood_transfusion,receiving_unsterile_injections,coma,stomach_bleeding,distention_of_abdomen,history_of_alcohol_consumption,fluid_overload,blood_in_sputum,prominent_veins_on_calf,palpitations,painful_walking,pus_filled_pimples,blackheads,scurring,skin_peeling,silver_like_dusting,small_dents_in_nails,inflammatory_nails,blister,red_sore_around_nose,yellow_crust_ooze".split(",")

# print("constant_symptoms",constant_symptoms)

# patients_symptoms =["cold_hands_and_feets","cough"]
# value_to_pass = list(range(len(constant_symptoms)))


# for symptom in constant_symptoms:
#     if symptom in patients_symptoms:
#         value_to_pass =[]
#     else:
#         value_to_pass =[]

# print("value_to_pass",value_to_pass)

# df = pd.read_csv("./dataset.csv", encoding="latin-1")
