# Title: Personalized Medical Recommendation System with Machine Learning

# Description:

Welcome to our cutting-edge Personalized Medical Recommendation System, a powerful platform designed to assist users in understanding and managing their health. Leveraging the capabilities of machine learning, our system analyzes user-input symptoms to predict potential diseases accurately.

# load dataset & tools

In [None]:
import  pandas as pd

In [26]:
dataset = pd.read_csv('Training.csv')

In [27]:
dataset

Unnamed: 0,itching,skin_rash,nodal_skin_eruptions,continuous_sneezing,shivering,chills,joint_pain,stomach_pain,acidity,ulcers_on_tongue,...,blackheads,scurring,skin_peeling,silver_like_dusting,small_dents_in_nails,inflammatory_nails,blister,red_sore_around_nose,yellow_crust_ooze,prognosis
0,1,1,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,Fungal infection
1,0,1,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,Fungal infection
2,1,0,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,Fungal infection
3,1,1,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,Fungal infection
4,1,1,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,Fungal infection
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4915,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,(vertigo) Paroymsal Positional Vertigo
4916,0,1,0,0,0,0,0,0,0,0,...,1,1,0,0,0,0,0,0,0,Acne
4917,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,Urinary tract infection
4918,0,1,0,0,0,0,1,0,0,0,...,0,0,1,1,1,1,0,0,0,Psoriasis


In [24]:
# vals = dataset.values.flatten()

In [28]:
dataset.shape

(4920, 133)

# train test split

In [29]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

In [30]:
X = dataset.drop('prognosis', axis=1)
y = dataset['prognosis']

# ecoding prognonsis
le = LabelEncoder()
le.fit(y)
Y = le.transform(y)
    
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.3, random_state=20)

# Training top models

SVC Accuracy: 0.8100
SVC Confusion Matrix:
[[87, 19],
 [19, 75]]


RandomForest Accuracy: 0.9000
RandomForest Confusion Matrix:
[[94, 12],
 [ 8, 86]]




# single prediction

In [63]:
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

# Create dataset
X, y = make_classification(n_samples=500, n_features=20, random_state=42)

# Split dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Using SVC (your original code)
svc = SVC(kernel='linear')
svc.fit(X_train, y_train)
ypred_svc = svc.predict(X_test)
print("SVC Accuracy:", accuracy_score(y_test, ypred_svc))

# Now using Random Forest instead of SVC
rf = RandomForestClassifier(n_estimators=100, random_state=42)
rf.fit(X_train, y_train)
ypred_rf = rf.predict(X_test)
print("Random Forest Accuracy:", accuracy_score(y_test, ypred_rf))


SVC Accuracy: 0.88
Random Forest Accuracy: 0.9333333333333333


In [64]:
import pickle
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

# Create dataset
X, y = make_classification(n_samples=500, n_features=20, random_state=42)

# Split dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Train Random Forest
rf = RandomForestClassifier(n_estimators=100, random_state=42)
rf.fit(X_train, y_train)

# Save the trained Random Forest model to a file
pickle.dump(rf, open('random_forest.pkl', 'wb'))

# Optional: Test and print accuracy
ypred_rf = rf.predict(X_test)
print("Random Forest Accuracy:", accuracy_score(y_test, ypred_rf))


Random Forest Accuracy: 0.9333333333333333


In [65]:
rf = pickle.load(open('random_forest.pkl', 'rb'))


In [None]:
import pandas as pd
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier

# 1. Create dataset
X, y = make_classification(
    n_samples=500, n_features=20, n_informative=15, n_redundant=5,
    n_classes=2, random_state=42
)

# 2. Split dataset
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.3, random_state=42
)

# 3. Convert test sets to pandas DataFrame/Series for iloc usage
X_test = pd.DataFrame(X_test)
y_test = pd.Series(y_test)

# 4. Train Random Forest
rf = RandomForestClassifier(n_estimators=100, random_state=42)
rf.fit(X_train, y_train)

# 5. Predict and print for test indices 0 and 100
print("predicted disease :", rf.predict(X_test.iloc[[0]])[0])
print("Actual Disease    :", y_test.iloc[0])

print("predicted disease :", rf.predict(X_test.iloc[[100]])[0])
print("Actual Disease    :", y_test.iloc[100])



predicted disease : 0
Actual Disease    : 0
predicted disease : 0
Actual Disease    : 0


In [71]:
# test 2:
print("predicted disease :",rf.predict(X_test.iloc[[100]].values.reshape(1,-1)))
print("Actual Disease :", y_test[100])

predicted disease : [0]
Actual Disease : 0


# Recommendation System and Prediction

# load database and use logic for recommendations

In [72]:
sym_des = pd.read_csv("symtoms_df.csv")
precautions = pd.read_csv("precautions_df.csv")
workout = pd.read_csv("workout_df.csv")
description = pd.read_csv("description.csv")
medications = pd.read_csv('medications.csv')
diets = pd.read_csv("diets.csv")

In [73]:
def get_predicted_value(patient_symptoms, rf=rf, symptoms_index=symptoms_index, cols_order=cols_order, diseases_list=diseases_list):
    normalized_input = [normalize_sym(s) for s in patient_symptoms]
    mapped = []
    unknowns = []
    for ns in normalized_input:
        orig = normalized_to_orig.get(ns)
        if orig is not None:
            mapped.append(orig)
        else:
            unknowns.append(ns)
    if unknowns:
        print("Warning: unknown symptoms (ignored):", unknowns)
    input_vector = np.zeros(len(cols_order), dtype=int)
    for orig_sym in mapped:
        idx = symptoms_index.get(orig_sym)
        if idx is not None and 0 <= idx < len(input_vector):
            input_vector[idx] = 1
        else:
            print(f"Warning: symptom '{orig_sym}' has invalid index in symptoms_dict")
    X_input = pd.DataFrame([input_vector], columns=cols_order)
    pred_idx = rf.predict(X_input)[0]  # <-- use rf here
    disease_name = diseases_list.get(pred_idx, f"Unknown disease id {pred_idx}")
    return disease_name



In [74]:
# Test 1
# Split the user's input into a list of symptoms (assuming they are comma-separated) # itching,skin_rash,nodal_skin_eruptions
symptoms = input("Enter your symptoms.......")
user_symptoms = [s.strip() for s in symptoms.split(',')]
# Remove any extra characters, if any
user_symptoms = [symptom.strip("[]' ") for symptom in user_symptoms]
predicted_disease = get_predicted_value(user_symptoms)

desc, pre, med, die, wrkout = helper(predicted_disease)

print("=================predicted disease============")
print(predicted_disease)
print("=================description==================")
print(desc)
print("=================precautions==================")
i = 1
for p_i in pre[0]:
    print(i, ": ", p_i)
    i += 1

print("=================medications==================")
for m_i in med:
    print(i, ": ", m_i)
    i += 1

print("=================workout==================")
for w_i in wrkout:
    print(i, ": ", w_i)
    i += 1

print("=================diets==================")
for d_i in die:
    print(i, ": ", d_i)
    i += 1


Enter your symptoms....... high_fever,cough




ValueError: X has 132 features, but RandomForestClassifier is expecting 20 features as input.

In [48]:
# Test 1
# Split the user's input into a list of symptoms (assuming they are comma-separated) # yellow_crust_ooze,red_sore_around_nose,small_dents_in_nails,inflammatory_nails,blister
symptoms = input("Enter your symptoms.......")
user_symptoms = [s.strip() for s in symptoms.split(',')]
# Remove any extra characters, if any
user_symptoms = [symptom.strip("[]' ") for symptom in user_symptoms]
predicted_disease = get_predicted_value(user_symptoms)

desc, pre, med, die, wrkout = helper(predicted_disease)

print("=================predicted disease============")
print(predicted_disease)
print("=================description==================")
print(desc)
print("=================precautions==================")
i = 1
for p_i in pre[0]:
    print(i, ": ", p_i)
    i += 1

print("=================medications==================")
for m_i in med:
    print(i, ": ", m_i)
    i += 1

print("=================workout==================")
for w_i in wrkout:
    print(i, ": ", w_i)
    i += 1

print("=================diets==================")
for d_i in die:
    print(i, ": ", d_i)
    i += 1


Enter your symptoms....... fatigue,weight_gain,anxiety


Urinary tract infection
Urinary tract infection is an infection in any part of the urinary system.
1 :  drink plenty of water
2 :  increase vitamin c intake
3 :  drink cranberry juice
4 :  take probiotics
5 :  ['Antibiotics', 'Urinary analgesics', 'Phenazopyridine', 'Antispasmodics', 'Probiotics']
6 :  Stay hydrated
7 :  Consume cranberry products
8 :  Include vitamin C-rich foods
9 :  Limit caffeine and alcohol
10 :  Consume probiotics
11 :  Avoid spicy and acidic foods
12 :  Consult a healthcare professional
13 :  Follow medical recommendations
14 :  Maintain good hygiene
15 :  Limit sugary foods and beverages
16 :  ['UTI Diet', 'Hydration', 'Cranberry juice', 'Probiotics', 'Vitamin C-rich foods']




In [123]:
# let's use pycharm flask app
# but install this version in pycharm
import sklearn
print(sklearn.__version__)

1.3.2
