In [None]:
import pandas as pd
import string
from sklearn.feature_extraction.text import TfidfVectorizer

dataset = {
    "symptom_text": [
        "mild headache and slight fever",
        "cough and mild fatigue",
        "high fever and chest pain",
        "difficulty breathing and dizziness",
        "runny nose and sneezing",
        "persistent cough and high fever",
        "mild sore throat",
        "vomiting and severe abdominal pain",
        "fatigue and body ache",
        "chest pain and shortness of breath",
        "slight cough and sore throat",
        "nausea and light dizziness",
        "severe headache and high temperature",
        "loss of consciousness and heavy sweating",
        "sneezing and mild throat irritation",
        "continuous vomiting and dehydration",
        "low energy and mild fever",
        "sharp chest pain and rapid heartbeat",
        "itchy eyes and runny nose",
        "severe breathing difficulty and bluish lips"
    ],
    "severity": [
        "mild",
        "mild",
        "severe",
        "severe",
        "mild",
        "severe",
        "mild",
        "severe",
        "mild",
        "severe",
        "mild",
        "mild",
        "severe",
        "severe",
        "mild",
        "severe",
        "mild",
        "severe",
        "mild",
        "severe"
    ]
}


df=pd.DataFrame(dataset)
#print(df)
#print("New line")
print(df.head)
#print(df.tail)

df.to_csv("symptoms_dataset.csv", index=False)

<bound method NDFrame.head of                                    symptom_text severity
0                mild headache and slight fever     mild
1                        cough and mild fatigue     mild
2                     high fever and chest pain   severe
3            difficulty breathing and dizziness   severe
4                       runny nose and sneezing     mild
5               persistent cough and high fever   severe
6                              mild sore throat     mild
7            vomiting and severe abdominal pain   severe
8                         fatigue and body ache     mild
9            chest pain and shortness of breath   severe
10                 slight cough and sore throat     mild
11                   nausea and light dizziness     mild
12         severe headache and high temperature   severe
13     loss of consciousness and heavy sweating   severe
14          sneezing and mild throat irritation     mild
15          continuous vomiting and dehydration   severe
1

In [None]:
def preprocess_text(text):
  text=text.lower()
  text = text.translate(str.maketrans('', '', string.punctuation))
  return text

df['clean_text']=df['symptom_text'].apply(preprocess_text)

print(df[['symptom_text', 'clean_text', 'severity']])

                                   symptom_text  \
0                mild headache and slight fever   
1                        cough and mild fatigue   
2                     high fever and chest pain   
3            difficulty breathing and dizziness   
4                       runny nose and sneezing   
5               persistent cough and high fever   
6                              mild sore throat   
7            vomiting and severe abdominal pain   
8                         fatigue and body ache   
9            chest pain and shortness of breath   
10                 slight cough and sore throat   
11                   nausea and light dizziness   
12         severe headache and high temperature   
13     loss of consciousness and heavy sweating   
14          sneezing and mild throat irritation   
15          continuous vomiting and dehydration   
16                    low energy and mild fever   
17         sharp chest pain and rapid heartbeat   
18                    itchy eye

In [None]:
vectorize=TfidfVectorizer()
X=vectorize.fit_transform(df['clean_text'])

print(X.shape)
print("New line")
print(vectorize.get_feature_names_out()) #list will be sorted
print(X)

(20, 46)
New line
['abdominal' 'ache' 'and' 'bluish' 'body' 'breath' 'breathing' 'chest'
 'consciousness' 'continuous' 'cough' 'dehydration' 'difficulty'
 'dizziness' 'energy' 'eyes' 'fatigue' 'fever' 'headache' 'heartbeat'
 'heavy' 'high' 'irritation' 'itchy' 'light' 'lips' 'loss' 'low' 'mild'
 'nausea' 'nose' 'of' 'pain' 'persistent' 'rapid' 'runny' 'severe' 'sharp'
 'shortness' 'slight' 'sneezing' 'sore' 'sweating' 'temperature' 'throat'
 'vomiting']
<Compressed Sparse Row sparse matrix of dtype 'float64'
	with 96 stored elements and shape (20, 46)>
  Coords	Values
  (0, 28)	0.4150395268552875
  (0, 18)	0.5427420334589101
  (0, 2)	0.19322466659601228
  (0, 39)	0.5427420334589101
  (0, 17)	0.4486296798032576
  (1, 28)	0.4811721962349707
  (1, 2)	0.22401321121684567
  (1, 10)	0.5677763082935787
  (1, 16)	0.6292229036766278
  (2, 2)	0.20149829779675335
  (2, 17)	0.4678394245102271
  (2, 21)	0.5107107702667382
  (2, 7)	0.5107107702667382
  (2, 32)	0.4678394245102271
  (3, 2)	0.201336598

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score,classification_report

y=df['severity']

X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.2,random_state=42,stratify=y)
model=LogisticRegression()
model.fit(X_train,y_train)
y_pred=model.predict(X_test)

print("Accuracy:",accuracy_score(y_test,y_pred))
print("\nClassification Report:\n",classification_report(y_test,y_pred))

Accuracy: 0.5

Classification Report:
               precision    recall  f1-score   support

        mild       0.50      0.50      0.50         2
      severe       0.50      0.50      0.50         2

    accuracy                           0.50         4
   macro avg       0.50      0.50      0.50         4
weighted avg       0.50      0.50      0.50         4



In [17]:
import numpy as np

def predict_severity(symptom_text):
  clean_text=preprocess_text(symptom_text)
  text_vector=vectorize.transform([clean_text])
  prediction=model.predict(text_vector)[0]
  confidence=np.max(model.predict_proba(text_vector))

  print(f"Input Symtoms:{symptom_text}")
  print(f"Predicted Severity: {prediction.capitalize()} (Confidence:{confidence:.2f})")

predict_severity("high fever and chest pain")
predict_severity("mild sore throat and fatique")
predict_severity("mild sore throat")


Input Symtoms:high fever and chest pain
Predicted Severity: Severe (Confidence:0.65)
Input Symtoms:mild sore throat and fatique
Predicted Severity: Mild (Confidence:0.64)
Input Symtoms:mild sore throat
Predicted Severity: Mild (Confidence:0.64)
