In [45]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split, StratifiedKFold, cross_val_score
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report

In [46]:
file_path = "C:/Users/bharg/Desktop/Hackathon/Medicine.csv"  # Update with your file path
df = pd.read_csv(file_path)


In [47]:
print("Dataset Shape:", df.shape)

Dataset Shape: (4920, 133)


In [48]:
df_cleaned = df.drop_duplicates().reset_index(drop=True)
print("After Removing Duplicates:", df_cleaned.shape)

After Removing Duplicates: (304, 133)


In [49]:
label_encoder = LabelEncoder()
df_cleaned.loc[:, 'prognosis_encoded'] = label_encoder.fit_transform(df_cleaned['prognosis'])


In [50]:
correlation_matrix = df_cleaned.drop(columns=['prognosis']).corr()

In [51]:
correlation_threshold = 0.1
important_features = correlation_matrix[
    abs(correlation_matrix['prognosis_encoded']) > correlation_threshold
].index.tolist()
important_features.remove('prognosis_encoded')

In [52]:
df_selected = df_cleaned[important_features + ['prognosis_encoded']]


In [53]:
X = df_selected.drop(columns=['prognosis_encoded'])
y = df_selected['prognosis_encoded']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)


In [54]:
model = RandomForestClassifier(n_estimators=100, random_state=42)


In [55]:
skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
cv_scores = cross_val_score(model, X, y, cv=skf, scoring='accuracy')


In [56]:
model.fit(X_train, y_train)


In [57]:
y_pred = model.predict(X_test)

In [58]:
accuracy = accuracy_score(y_test, y_pred)
print("\nModel Accuracy:", accuracy)
print("\nCross-Validation Scores:", cv_scores)
print("\nMean CV Accuracy:", np.mean(cv_scores))
print("\nClassification Report:\n", classification_report(y_test, y_pred, zero_division=1))



Model Accuracy: 0.8688524590163934

Cross-Validation Scores: [0.8852459  0.86885246 0.90163934 0.91803279 0.85      ]

Mean CV Accuracy: 0.8847540983606557

Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00         1
           1       1.00      1.00      1.00         1
           2       1.00      1.00      1.00         1
           3       1.00      1.00      1.00         2
           4       1.00      1.00      1.00         1
           5       1.00      0.00      0.00         1
           6       1.00      1.00      1.00         1
           7       1.00      1.00      1.00         1
           8       1.00      1.00      1.00         2
           9       1.00      1.00      1.00         2
          10       1.00      1.00      1.00         2
          11       1.00      1.00      1.00         2
          12       1.00      1.00      1.00         2
          13       0.50      1.00      0.67         1
       

In [59]:
medication_file = "C:/Users/bharg/Downloads/MEDICINE RECOMMENDATION/MEDICINE RECOMMENDATION/medications.csv"  # Update with your file path
medications_df = pd.read_csv(medication_file)

In [60]:
medications_df.columns = medications_df.columns.str.strip().str.lower()

In [61]:

disease_col = medications_df.columns[0]  # First column = Disease Name
medication_col = medications_df.columns[1]  # Second column = Medication Name


In [62]:
def recommend_disease(symptoms):
    input_data = np.zeros(len(important_features))
    
    for symptom in symptoms:
        if symptom in important_features:
            input_data[important_features.index(symptom)] = 1
    
    input_df = pd.DataFrame([input_data], columns=important_features)
    predicted_label = model.predict(input_df)[0]
    
    return label_encoder.inverse_transform([predicted_label])[0]

In [63]:
def get_medication(disease):
    meds = medications_df.loc[medications_df[disease_col].str.lower() == disease.lower()]
    return meds[medication_col].tolist() if not meds.empty else ["No medication found"]


In [64]:
def recommend_disease_and_medication(symptoms):
    predicted_disease = recommend_disease(symptoms)
    recommended_meds = get_medication(predicted_disease)
    return predicted_disease, recommended_meds


In [65]:
user_symptoms = ["itching", "skin_rash", "nodal_skin_eruptions"]
predicted_disease, medications = recommend_disease_and_medication(user_symptoms)

print("\nPredicted Disease:", predicted_disease)
print("Recommended Medications:", medications)


Predicted Disease: Drug Reaction
Recommended Medications: ["['Antihistamines', 'Epinephrine', 'Corticosteroids', 'Antibiotics', 'Antifungal Cream']"]
