<h3>Training and Saving Model for Predecting Treatment using Random Forest Classifier</h3>

In [2]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.ensemble import RandomForestClassifier
from sklearn.multioutput import MultiOutputClassifier
import joblib

# 1. Load CSV data
df = pd.read_csv('symptom_precaution.csv')

# 2. Handle missing values if any
df.fillna('No Data', inplace=True)

# 3. Combine precaution columns into a single text column with $ separation
df['Precautions'] = df[['Precaution_1', 'Precaution_2', 'Precaution_3', 'Precaution_4']].apply(lambda row: '$'.join(row), axis=1)

# 4. Split the precautions column into separate columns
precautions_split = df['Precautions'].str.split('$', expand=True)

# Assign new column names for each precaution
precautions_split.columns = ['Precaution_1', 'Precaution_2', 'Precaution_3', 'Precaution_4']

# 5. Define Features (X) and Labels (y)
X = df['Disease']  # Disease names
y = precautions_split  # Separate precaution columns as labels

# 6. Vectorize the disease names using an enhanced TF-IDF vectorizer
vectorizer = TfidfVectorizer(
    max_features=10000,        # Increased vocabulary size
    ngram_range=(1, 3),        # Include unigrams, bigrams, and trigrams
    stop_words='english',      # Remove common stopwords
    sublinear_tf=True          # Sublinear term frequency scaling
)

X_tfidf = vectorizer.fit_transform(X)

# 7. Multi-output Classifier with RandomForest (Enhanced Model)
rf_model = RandomForestClassifier(n_estimators=200, random_state=42)
multi_rf_model = MultiOutputClassifier(rf_model, n_jobs=-1)

# 8. Train the model on the entire dataset
multi_rf_model.fit(X_tfidf, y)

# 9. Save the model and vectorizer for future use
joblib.dump(multi_rf_model, 'enhanced_treatment_prediction_model.joblib')
joblib.dump(vectorizer, 'enhanced_vectorizer.joblib')

print("Enhanced model and vectorizer saved successfully!")


Enhanced model and vectorizer saved successfully!


<h3>Testing Model</h3>

In [2]:
import joblib

dis = "Dengue"  # Input disease name

# Load the saved model and vectorizer
loaded_model = joblib.load('enhanced_treatment_prediction_model.joblib')
loaded_vectorizer = joblib.load('enhanced_vectorizer.joblib')

# Transform the input disease using the loaded vectorizer
sample_disease_tfidf = loaded_vectorizer.transform([dis])

# Predict the treatment using the loaded model
predicted_precautions = loaded_model.predict(sample_disease_tfidf)

# Output the result
print(f"Predicted treatment for '{dis}':")
precaution_index = 1
for precaution in predicted_precautions[0]:
    # Ensure no leading/trailing spaces and handle cases of empty strings
    if precaution.strip():
        print(f" {precaution_index}. {precaution.strip()}")
    precaution_index += 1



Predicted treatment for 'Dengue':
 1. drink papaya leaf juice
 2. avoid fatty spicy food
 3. keep mosquitos away
 4. keep hydrated
