In [None]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, accuracy_score

In [None]:
data = pd.read_csv('/content/Symptom2Disease.csv')

# Check the first few rows of the dataset
print(data.head())

# Features and target
X = data['text']
y = data['label']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

   Unnamed: 0      label                                               text
0           0  Psoriasis  I have been experiencing a skin rash on my arm...
1           1  Psoriasis  My skin has been peeling, especially on my kne...
2           2  Psoriasis  I have been experiencing joint pain in my fing...
3           3  Psoriasis  There is a silver like dusting on my skin, esp...
4           4  Psoriasis  My nails have small dents or pits in them, and...


In [None]:

# Text vectorization using TF-IDF
vectorizer = TfidfVectorizer(stop_words='english')
X_train_vec = vectorizer.fit_transform(X_train)
X_test_vec = vectorizer.transform(X_test)

# Model training
model = LogisticRegression(max_iter=1000)
model.fit(X_train_vec, y_train)

# Predictions
y_pred = model.predict(X_test_vec)

# Evaluation
print(f"Accuracy: {accuracy_score(y_test, y_pred)}")
print("Classification Report:")
print(classification_report(y_test, y_pred))


Accuracy: 0.9611111111111111
Classification Report:
                                 precision    recall  f1-score   support

                           Acne       1.00      1.00      1.00        13
                      Arthritis       1.00      1.00      1.00        14
               Bronchial Asthma       0.94      1.00      0.97        15
           Cervical spondylosis       1.00      1.00      1.00        14
                    Chicken pox       0.67      1.00      0.80        12
                    Common Cold       1.00      0.94      0.97        17
                         Dengue       1.00      0.72      0.84        18
          Dimorphic Hemorrhoids       1.00      1.00      1.00        13
               Fungal infection       1.00      1.00      1.00        17
                   Hypertension       1.00      1.00      1.00        15
                       Impetigo       1.00      1.00      1.00        17
                       Jaundice       1.00      1.00      1.00        1

In [None]:
# Model evaluation
def evaluate_model():
    # Take custom input from the user
    input_text = input("Enter the symptom description: ")

    # Transform the input text using the same TF-IDF vectorizer
    input_vec = vectorizer.transform([input_text])

    # Make a prediction
    predicted_label = model.predict(input_vec)

    # Output the result
    print(f"Predicted Disease: {predicted_label[0]}")

# Example evaluation with custom input
evaluate_model()

Enter the symptom description: My skin has been peeling, especially on my knee
Predicted Disease: Psoriasis


In [None]:
import joblib

# Save the trained model and vectorizer
joblib.dump(model, 'disease_prediction_model.pkl')
joblib.dump(vectorizer, 'vectorizer.pkl')


['vectorizer.pkl']