# Ayurvedic Data

In [1]:
import numpy as np
import pandas as pd
from sklearn.metrics import accuracy_score
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
import matplotlib.pyplot as plt

In [2]:
# Load the data into a Pandas dataframe
data_2 = pd.read_csv("final_Ayurvedic Dataset.csv")
symptoms_list = [symptom.lower().replace(' ', '') for symptom in data_2.columns.tolist() if symptom != 'Remedy']



In [3]:
# Split the data into input features (symptoms) and target variable (Medicines)
X = data_2.drop("Remedy", axis=1)
y = data_2["Remedy"]

In [4]:
data_2.head()

Unnamed: 0,Remedy,Stress,Anxiety,Depression,Insomnia,Fatigue,Constipation,Indigestion/Digestive Issues,Bloating,Gas,...,Fertility Issues,Bad Breath,Muscle Pain,High Blood Pressure,High Cholesterol,Sinus Congestion,Weight Loss,Cold,Fever,Skin Health
0,Ashwagandha,1,1,1,1,1,0,0,0.0,0,...,0,0,0,0,0,0,0,0,0,0
1,Triphala,0,0,0,0,0,1,1,1.0,1,...,0,0,0,0,0,0,0,0,0,0
2,Turmeric,0,0,0,0,0,0,0,0.0,0,...,0,0,0,0,1,0,0,0,0,0
3,Ginger,0,0,0,0,0,0,1,0.0,0,...,0,1,1,1,0,0,0,0,0,0
4,Neem,0,0,0,0,0,0,0,0.0,0,...,0,0,0,0,0,0,0,0,0,0


In [5]:
# Encode the categorical target variable `Medicines` into numerical labels
label_encoder = LabelEncoder()
y = label_encoder.fit_transform(y)

In [6]:
label_map = {label: i for i, label in enumerate(label_encoder.classes_)}
print('Label Map:', label_map)

# print a table of labels and their corresponding encoded values
print('Label\tEncoded Value')
for label, value in zip(label_encoder.classes_, label_encoder.transform(label_encoder.classes_)):
    print('{}\t{}'.format(label, value))

Label Map: {'Amla': 0, 'Ashwagandha': 1, 'Brahmi': 2, 'Cardamom': 3, 'Cinnamon': 4, 'Coriander': 5, 'Cumin': 6, 'Fennel': 7, 'Fenugreek': 8, 'Garlic': 9, 'Ginger': 10, 'Gotu Kola': 11, 'Guduchi': 12, 'Hing': 13, 'Holy Basil': 14, 'Lemon': 15, 'Licorice': 16, 'Mint': 17, 'Neem': 18, 'Peppermint': 19, 'Rose': 20, 'Saffron': 21, 'Sandalwood': 22, 'Sesame Oil': 23, 'Shatavari': 24, 'Trikatu': 25, 'Triphala': 26, 'Triphala Guggul': 27, 'Tulsi': 28, 'Turmeric': 29, 'Vacha': 30}
Label	Encoded Value
Amla	0
Ashwagandha	1
Brahmi	2
Cardamom	3
Cinnamon	4
Coriander	5
Cumin	6
Fennel	7
Fenugreek	8
Garlic	9
Ginger	10
Gotu Kola	11
Guduchi	12
Hing	13
Holy Basil	14
Lemon	15
Licorice	16
Mint	17
Neem	18
Peppermint	19
Rose	20
Saffron	21
Sandalwood	22
Sesame Oil	23
Shatavari	24
Trikatu	25
Triphala	26
Triphala Guggul	27
Tulsi	28
Turmeric	29
Vacha	30


In [7]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [8]:
y_train

array([20, 11,  1, 18,  4,  2, 12, 24, 17, 26, 29, 21, 10, 15, 22,  7, 30,
       13, 14, 25,  6, 27,  9, 16])

In [9]:
y_train.shape

(24,)

In [10]:
imputer = SimpleImputer(strategy="median")
X_train = imputer.fit_transform(X_train)
y_train = imputer.fit_transform(y_train.reshape(-1, 1)).flatten()
X_test = imputer.fit_transform(X_test)
y_test = imputer.fit_transform(y_test.reshape(-1, 1)).flatten()

In [11]:
# Instantiate a DecisionTreeClassifier algorithm
model_DT = DecisionTreeClassifier(random_state=42)
model_RF = RandomForestClassifier(random_state=42)
model_KNN = KNeighborsClassifier(n_neighbors=3)

# Train the model on the training set
model_DT.fit(X_train, y_train.ravel())
model_RF.fit(X_train, y_train.ravel())
model_KNN.fit(X_train, y_train.ravel())

# Test the model on the testing set
y_pred_DT = model_DT.predict(X_test)
y_pred_RF = model_RF.predict(X_test)
y_pred_KNN = model_KNN.predict(X_test)

In [11]:
# y_train.reshape(,

In [12]:
# y_train = y_train.flatten()
# y_test = y_test.flatten()
y_train.shape

(24,)

In [13]:
X_train.shape

(24, 42)

In [14]:
models = [model_DT, model_RF, model_KNN]
model_data = {model_DT : y_pred_DT, model_RF : y_pred_RF, model_KNN : y_pred_KNN}

for model in models:
    print("\nTrain Accuracy of Model {} = {}".format(model, accuracy_score(y_train, model.predict(X_train))))
    print("Test Accuracy of Model {} = {}".format(model, accuracy_score(y_test, model_data[model])))


Train Accuracy of Model DecisionTreeClassifier(random_state=42) = 0.9583333333333334
Test Accuracy of Model DecisionTreeClassifier(random_state=42) = 0.0

Train Accuracy of Model RandomForestClassifier(random_state=42) = 0.9583333333333334
Test Accuracy of Model RandomForestClassifier(random_state=42) = 0.0

Train Accuracy of Model KNeighborsClassifier(n_neighbors=3) = 0.2916666666666667
Test Accuracy of Model KNeighborsClassifier(n_neighbors=3) = 0.0


In [15]:
# Use the trained model to make predictions on new input symptoms
new = input("Enter symptoms separated by comma: ")
new = new.lower().replace(" ", "")
new_symptoms = [1 if symptom.strip() in new.split(',') else 0 for symptom in symptoms_list]

print(new_symptoms)
# print(len(new_symptoms))

# Predict the probabilities of each class using the `predict_proba()` method
predicted_probabilities_DT = model_DT.predict_proba([new_symptoms])[0]
predicted_probabilities_RF = model_RF.predict_proba([new_symptoms])[0]
predicted_probabilities_KNN = model_KNN.predict_proba([new_symptoms])[0]

# Sort the probabilities in descending order and get the top 3 predictions
top_3_predictions_DT = label_encoder.inverse_transform(predicted_probabilities_DT.argsort()[::-1][:3])
top_3_predictions_RF = label_encoder.inverse_transform(predicted_probabilities_RF.argsort()[::-1][:3])
top_3_predictions_KNN = label_encoder.inverse_transform(predicted_probabilities_KNN.argsort()[::-1][:3])


print("Top 3 predicted medicines - Decision Tree:", top_3_predictions_DT)
print("Top 3 predicted medicines - Random Forest:", top_3_predictions_RF)
print("Top 3 predicted medicines - KNN:", top_3_predictions_KNN)



[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
Top 3 predicted medicines - Decision Tree: ['Peppermint' 'Sesame Oil' 'Ginger']
Top 3 predicted medicines - Random Forest: ['Hing' 'Guduchi' 'Fenugreek']
Top 3 predicted medicines - KNN: ['Sesame Oil' 'Peppermint' 'Hing']


In [17]:
import joblib

joblib.dump(model, 'ayurvedic.joblib')


['ayurvedic.joblib']