## Loading Neccessary Libraries

In [1]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import MultiLabelBinarizer
from nltk.corpus import stopwords 
from skmultilearn.problem_transform import BinaryRelevance
from sklearn.pipeline import make_pipeline
from sklearn.feature_extraction.text import CountVectorizer,TfidfTransformer,TfidfVectorizer
from sklearn.multiclass import OneVsRestClassifier
from sklearn.linear_model import LogisticRegression,SGDClassifier
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.utils import shuffle
from sklearn.naive_bayes import GaussianNB,MultinomialNB
from sklearn.feature_selection import SelectKBest, chi2
import re,joblib
from skmultilearn.problem_transform import LabelPowerset

In [2]:
lb = MultiLabelBinarizer()
sw = stopwords.words("english")

In [3]:
#Function to convert the txt data to array

def load_txt(filename):
    data = []
    with open(filename,'r') as f:
        file_data = f.readlines()
        for i in file_data:
            data.append(i.rstrip())
    return data

In [4]:
train = np.array(load_txt("data_x.txt"))

In [5]:
targets = [
    ["Viral Bronchitis"],
    ["Viral Bronchitis"],
    ["Viral Bronchitis"],
    
    
    ["Bacterial Pneumonia","Asthma","Chronic Heart Failure","Chronic Lung Disease"],
    ["Bacterial Pneumonia","Asthma","Chronic Heart Failure","Chronic Lung Disease"],
    ["Bacterial Pneumonia","Asthma","Chronic Heart Failure","Chronic Lung Disease"],
    
    
    ["Bacterial Pneumonia","Chronic Lung Disease"],
    ["Bacterial Pneumonia","Chronic Lung Disease"],
    ["Bacterial Pneumonia","Chronic Lung Disease"],
    
    
    ["Bacterial Pneumonia","Chronic Lung Disease","Lung Cancer"],
    ["Bacterial Pneumonia","Chronic Lung Disease","Lung Cancer"],
    ["Bacterial Pneumonia","Chronic Lung Disease","Lung Cancer"],
    
    
    ["Viral Bronchitis"],
    ["Viral Bronchitis"],
    ["Viral Bronchitis"],
    
    
    ["Asthma"],
    ["Asthma"],
    ["Asthma"],
    
    
    ["Chronic Heart Failure","Chronic Lung Disease"],
    ["Chronic Heart Failure","Chronic Lung Disease"],
    ["Chronic Heart Failure","Chronic Lung Disease"],
    
    
    ["Viral Bronchitis","Bacterial Pneumonia"],
    ["Viral Bronchitis","Bacterial Pneumonia"],
    ["Viral Bronchitis","Bacterial Pneumonia"],
    
    
    ["Viral Bronchitis","Asthma"],
    ["Viral Bronchitis","Asthma"],
    ["Viral Bronchitis","Asthma"],
    
    ["Asthma","Chronic Lung Disease"],
    ["Asthma","Chronic Lung Disease"],
    ["Asthma","Chronic Lung Disease"],
    
    ["Bacterial Pneumonia"],
    ["Bacterial Pneumonia"],
    ["Bacterial Pneumonia"],
    
    
    ["Chronic Heart Failure","Chronic Lung Disease"],
    ["Chronic Heart Failure","Chronic Lung Disease"],
    ["Chronic Heart Failure","Chronic Lung Disease"],
    
    
    ["Viral Bronchitis"],
    ["Viral Bronchitis"],
    ["Viral Bronchitis"],
    
    
    ["Lung Cancer"],
    ["Lung Cancer"],
    ["Lung Cancer"],
    
    
    ["Chronic Lung Disease"],
    ["Chronic Lung Disease"],
    ["Chronic Lung Disease"],


    
    
]




In [6]:
df  = pd.DataFrame({"data":train,"target":targets})
df  = shuffle(df).reset_index(drop=True)


In [7]:
df.head()

Unnamed: 0,data,target
0,Do you breathe quicker than usual?,"[Viral Bronchitis, Asthma]"
1,Do you have noisy breathing when you breathe?,"[Viral Bronchitis, Bacterial Pneumonia]"
2,Do you breathe noisily when you breathe?,"[Viral Bronchitis, Bacterial Pneumonia]"
3,Do you easily get breathless during activities...,[Chronic Lung Disease]
4,Do you have any allergies?,[Asthma]


In [9]:
x_train,x_test,y_train,y_test =  train_test_split(df['data'],df['target'],test_size=0.2,random_state=42)

In [10]:
Y= lb.fit_transform(y_train)
Y_test  =  lb.transform(y_test)

In [11]:
import joblib 
joblib.dump(lb,"lb.pkl")

['lb.pkl']

In [12]:
x_test

39                    Do you have difficulty breathing?
25                                 Does the chest hurt?
26    Do you have a sore throat, runny nose and snee...
43              Have you lost your appetite and weight?
35                   Do you have difficulty to swallow?
41               Did you lose your appetite and weight?
4                            Do you have any allergies?
12                          Is it difficult to swallow?
8                               Two weeks and counting.
Name: data, dtype: object

In [13]:
clf  = make_pipeline(CountVectorizer(),TfidfTransformer(),BinaryRelevance(GaussianNB()))
#clf  =  joblib.load("model.pkl")

In [32]:
clf.fit(x_train,Y)

Pipeline(steps=[('countvectorizer', CountVectorizer()),
                ('tfidftransformer', TfidfTransformer()),
                ('binaryrelevance',
                 BinaryRelevance(classifier=GaussianNB(),
                                 require_dense=[True, True]))])

In [33]:
clf.score(x_test,Y_test)

0.6666666666666666

In [19]:
joblib.dump(clf,"_model_.pkl")

['_model_.pkl']

In [20]:
lb.inverse_transform(clf.predict(x_test))

[('Chronic Heart Failure', 'Chronic Lung Disease'),
 ('Chronic Heart Failure', 'Chronic Lung Disease'),
 ('Viral Bronchitis',),
 ('Lung Cancer',),
 ('Chronic Heart Failure', 'Chronic Lung Disease'),
 ('Lung Cancer',),
 ('Asthma',),
 ('Chronic Heart Failure', 'Chronic Lung Disease'),
 ('Viral Bronchitis',)]

In [21]:
y_test

39        [Chronic Heart Failure, Chronic Lung Disease]
25        [Chronic Heart Failure, Chronic Lung Disease]
26                                   [Viral Bronchitis]
43                                        [Lung Cancer]
35                                   [Viral Bronchitis]
41                                        [Lung Cancer]
4                                              [Asthma]
12                                   [Viral Bronchitis]
8     [Bacterial Pneumonia, Asthma, Chronic Heart Fa...
Name: target, dtype: object

In [22]:
sample  = "Are you breathing loudly while breathing?"


In [23]:
lb.inverse_transform(clf.predict([sample]).toarray())

[('Bacterial Pneumonia', 'Viral Bronchitis')]

# Model Usage 

In [27]:
def make_prediction(sample):
    return lb.inverse_transform(clf.predict([sample]).toarray()) # FUNCTION FOR EASE
    

In [28]:
sample = "Are you breathing loudly while breathing?"

make_prediction(sample)   # MAKE PREDICTIONS 

[('Bacterial Pneumonia', 'Viral Bronchitis')]

In [29]:
sample2  = "Are you coughing up mucus?"

make_prediction(sample2)

[('Bacterial Pneumonia', 'Chronic Lung Disease')]

## Load And Then USE 

In [34]:
model = joblib.load("_model_.pkl")
lb = joblib.load("lb.pkl")

In [38]:
def make_prediction(sample):
    return lb.inverse_transform(model.predict([sample]).toarray()) # FUNCTION FOR EASE

#see here i use model.predict instaed of clf.predict 

In [39]:
make_prediction(sample)

[('Bacterial Pneumonia', 'Viral Bronchitis')]

In [40]:
make_prediction(sample2)

[('Bacterial Pneumonia', 'Chronic Lung Disease')]