# Importing Libraries

In [2]:
import pandas as pd # data preprocessing
import itertools # confusion matrix
import string
import numpy as np
import seaborn as sns
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.linear_model import PassiveAggressiveClassifier
from sklearn.naive_bayes import MultinomialNB
from sklearn import metrics
import matplotlib.pyplot as plt
%matplotlib inline
# To show all the rows of pandas dataframe
pd.set_option('display.max_rows', None)

In [4]:
import nltk
import sklearn
import bs4
print('nltk version {}'.format(nltk.__version__))
print('scikit-learn version {}'.format(sklearn.__version__))
print('scikit-learn version {}'.format(bs4.__version__))

nltk version 3.8.1
scikit-learn version 1.2.0
scikit-learn version 4.11.2


In [5]:
df=pd.read_csv('data/drugsComTrain_raw.tsv', sep='\t')

In [6]:
df.to_csv('data/drugsComTrain.csv',index=False)

In [8]:
df.head()

Unnamed: 0.1,Unnamed: 0,drugName,condition,review,rating,date,usefulCount
0,206461,Valsartan,Left Ventricular Dysfunction,"""It has no side effect, I take it in combinati...",9.0,"May 20, 2012",27
1,95260,Guanfacine,ADHD,"""My son is halfway through his fourth week of ...",8.0,"April 27, 2010",192
2,92703,Lybrel,Birth Control,"""I used to take another oral contraceptive, wh...",5.0,"December 14, 2009",17
3,138000,Ortho Evra,Birth Control,"""This is my first time using any form of birth...",8.0,"November 3, 2015",10
4,35696,Buprenorphine / naloxone,Opiate Dependence,"""Suboxone has completely turned my life around...",9.0,"November 27, 2016",37


In [9]:
df_cond = df.condition.value_counts()
df_cond

condition
Birth Control                                                          28788
Depression                                                              9069
Pain                                                                    6145
Anxiety                                                                 5904
Acne                                                                    5588
Bipolar Disorde                                                         4224
Insomnia                                                                3673
Weight Loss                                                             3609
Obesity                                                                 3568
ADHD                                                                    3383
Diabetes, Type 2                                                        2554
Emergency Contraception                                                 2463
High Blood Pressure                                               

In [10]:
df_train = df_cond.iloc[:200]

In [11]:
df.shape

(161297, 7)

In [12]:
df_train.shape

(200,)

In [17]:
df_train

Birth Control                                                 28788
Depression                                                     9069
Pain                                                           6145
Anxiety                                                        5904
Acne                                                           5588
Bipolar Disorde                                                4224
Insomnia                                                       3673
Weight Loss                                                    3609
Obesity                                                        3568
ADHD                                                           3383
Diabetes, Type 2                                               2554
Emergency Contraception                                        2463
High Blood Pressure                                            2321
Vaginal Yeast Infection                                        2274
Abnormal Uterine Bleeding                       

In [14]:
my_array = list(df_train.keys())
len(my_array)

200

In [15]:
#df_cond_train = df[i for i in my_array]
#df_cond_train

matched_rows = pd.DataFrame(columns=df.columns)
for i in my_array:
    matched_rows = pd.concat([matched_rows, df[df['condition'] == i]])

# Reset the index of the matched_rows DataFrame
matched_rows.reset_index(drop=True, inplace=True)

# Print the rows where items are present
print("Rows where items are present:")
print(matched_rows.head())

matched_rows.value_counts()

Rows where items are present:
  Unnamed: 0                            drugName      condition   
0      92703                              Lybrel  Birth Control  \
1     138000                          Ortho Evra  Birth Control   
2      48928  Ethinyl estradiol / levonorgestrel  Birth Control   
3      98494                           Nexplanon  Birth Control   
4     227020                        Etonogestrel  Birth Control   

                                              review  rating   
0  "I used to take another oral contraceptive, wh...     5.0  \
1  "This is my first time using any form of birth...     8.0   
2  "I had been on the pill for many years. When m...     8.0   
3  "Started Nexplanon 2 months ago because I have...     3.0   
4  "Nexplanon does its job. I can have worry free...     9.0   

                date usefulCount  
0  December 14, 2009          17  
1   November 3, 2015          10  
2   December 8, 2016           1  
3     August 7, 2014          10  
4    Au

MemoryError: 

In [16]:
X = matched_rows.drop(['Unnamed: 0','drugName','rating','date','usefulCount'], axis=1)
X.condition.value_counts()

condition
Birth Control                                                 28788
Depression                                                     9069
Pain                                                           6145
Anxiety                                                        5904
Acne                                                           5588
Bipolar Disorde                                                4224
Insomnia                                                       3673
Weight Loss                                                    3609
Obesity                                                        3568
ADHD                                                           3383
Diabetes, Type 2                                               2554
Emergency Contraception                                        2463
High Blood Pressure                                            2321
Vaginal Yeast Infection                                        2274
Abnormal Uterine Bleeding             

In [17]:
X.head()

Unnamed: 0,condition,review
0,Birth Control,"""I used to take another oral contraceptive, wh..."
1,Birth Control,"""This is my first time using any form of birth..."
2,Birth Control,"""I had been on the pill for many years. When m..."
3,Birth Control,"""Started Nexplanon 2 months ago because I have..."
4,Birth Control,"""Nexplanon does its job. I can have worry free..."


# Data processing

In [18]:
for i, col in enumerate(X.columns):
    X.iloc[:, i] = X.iloc[:, i].str.replace('"', '')

In [19]:
# To set the width of the column to maximum
pd.set_option('max_colwidth', None)

In [20]:
X.head()

Unnamed: 0,condition,review
0,Birth Control,"I used to take another oral contraceptive, which had 21 pill cycle, and was very happy- very light periods, max 5 days, no other side effects. But it contained hormone gestodene, which is not available in US, so I switched to Lybrel, because the ingredients are similar. When my other pills ended, I started Lybrel immediately, on my first day of period, as the instructions said. And the period lasted for two weeks. When taking the second pack- same two weeks. And now, with third pack things got even worse- my third period lasted for two weeks and now it&#039;s the end of the third week- I still have daily brown discharge.\r\nThe positive side is that I didn&#039;t have any other side effects. The idea of being period free was so tempting... Alas."
1,Birth Control,"This is my first time using any form of birth control. I&#039;m glad I went with the patch, I have been on it for 8 months. At first It decreased my libido but that subsided. The only downside is that it made my periods longer (5-6 days to be exact) I used to only have periods for 3-4 days max also made my cramps intense for the first two days of my period, I never had cramps before using birth control. Other than that in happy with the patch"
2,Birth Control,"I had been on the pill for many years. When my doctor changed my RX to chateal, it was as effective. It really did help me by completely clearing my acne, this takes about 6 months though. I did not gain extra weight, or develop any emotional health issues. I stopped taking it bc I started using a more natural method of birth control, but started to take it bc I hate that my acne came back at age 28. I really hope symptoms like depression, or weight gain do not begin to affect me as I am older now. I&#039;m also naturally moody, so this may worsen things. I was in a negative mental rut today. Also I hope this doesn&#039;t push me over the edge, as I believe I am depressed. Hopefully it&#039;ll be just like when I was younger."
3,Birth Control,"Started Nexplanon 2 months ago because I have a minimal amount of contraception&#039;s I can take due to my inability to take the hormone that is used in most birth controls. I&#039;m trying to give it time because it is one of my only options right now. But honestly if I had options I&#039;d get it removed.\r\nI&#039;ve never had acne problems in my life, and immediately broke out after getting it implanted. Sex drive is completely gone, and I used to have sex with my boyfriend a few days a week, now its completely forced and not even fun for me anymore. I mean I&#039;m on birth control because I like having sex but don&#039;t want to get pregnant, why take a birth control that takes away sex? Very unhappy and hope that I get it back with time or I&#039;m getting it removed."
4,Birth Control,"Nexplanon does its job. I can have worry free sex. The only thing is that my periods are sometimes light and sometimes heavy. Sometimes they go away and sometimes they show up unexpected. I also feel somewhat depressed. Not sure if its Nexplanon or not. I&#039;ve had Nexplanont for about 2 months now, but despite the side effects its the most effective birth control I&#039;ve ever used and I do not plan on taking it out."


# Stopwords

In [22]:
import nltk
nltk.download('stopwords')

[nltk_data] Downloading package stopwords to C:\Users\Krishnendu
[nltk_data]     Pal\AppData\Roaming\nltk_data...
[nltk_data]   Unzipping corpora\stopwords.zip.


True

In [23]:
from nltk.corpus import stopwords

stop = stopwords.words('english')

In [19]:
stop

['i',
 'me',
 'my',
 'myself',
 'we',
 'our',
 'ours',
 'ourselves',
 'you',
 "you're",
 "you've",
 "you'll",
 "you'd",
 'your',
 'yours',
 'yourself',
 'yourselves',
 'he',
 'him',
 'his',
 'himself',
 'she',
 "she's",
 'her',
 'hers',
 'herself',
 'it',
 "it's",
 'its',
 'itself',
 'they',
 'them',
 'their',
 'theirs',
 'themselves',
 'what',
 'which',
 'who',
 'whom',
 'this',
 'that',
 "that'll",
 'these',
 'those',
 'am',
 'is',
 'are',
 'was',
 'were',
 'be',
 'been',
 'being',
 'have',
 'has',
 'had',
 'having',
 'do',
 'does',
 'did',
 'doing',
 'a',
 'an',
 'the',
 'and',
 'but',
 'if',
 'or',
 'because',
 'as',
 'until',
 'while',
 'of',
 'at',
 'by',
 'for',
 'with',
 'about',
 'against',
 'between',
 'into',
 'through',
 'during',
 'before',
 'after',
 'above',
 'below',
 'to',
 'from',
 'up',
 'down',
 'in',
 'out',
 'on',
 'off',
 'over',
 'under',
 'again',
 'further',
 'then',
 'once',
 'here',
 'there',
 'when',
 'where',
 'why',
 'how',
 'all',
 'any',
 'both',
 'each

# Lemmitization

In [24]:
from nltk.stem import WordNetLemmatizer
from nltk.stem import PorterStemmer

porter = PorterStemmer()

lemmatizer = WordNetLemmatizer()

In [31]:
print(lemmatizer.lemmatize("sportingly"))
print(lemmatizer.lemmatize("very"))
print(lemmatizer.lemmatize("troubled"))

sportingly
very
troubled


In [33]:
from bs4 import BeautifulSoup
import re

In [35]:
def review_to_words(raw_review):
    # 1. Delete HTML
    review_text = BeautifulSoup(raw_review, 'html.parser').get_text()
    # 2. Make a space
    letters_only = re.sub('[^a-zA-Z]', ' ', review_text)
    # 3. Lowercase letters
    words = letters_only.lower().split()
    # 4. Stopwords
    meaningful_words = [w for w in words if not w in stop]
    # 5. Lemmitization
    lemmitize_words = [lemmatizer.lemmatize(w) for w in meaningful_words]
    # 6. space join words
    return(' '.join(lemmitize_words))

In [36]:
X['review_clean'] = X['review'].apply(review_to_words)

  review_text = BeautifulSoup(raw_review, 'html.parser').get_text()


In [37]:
X.head()

Unnamed: 0,condition,review,review_clean
0,Birth Control,"I used to take another oral contraceptive, which had 21 pill cycle, and was very happy- very light periods, max 5 days, no other side effects. But it contained hormone gestodene, which is not available in US, so I switched to Lybrel, because the ingredients are similar. When my other pills ended, I started Lybrel immediately, on my first day of period, as the instructions said. And the period lasted for two weeks. When taking the second pack- same two weeks. And now, with third pack things got even worse- my third period lasted for two weeks and now it&#039;s the end of the third week- I still have daily brown discharge.\r\nThe positive side is that I didn&#039;t have any other side effects. The idea of being period free was so tempting... Alas.",used take another oral contraceptive pill cycle happy light period max day side effect contained hormone gestodene available u switched lybrel ingredient similar pill ended started lybrel immediately first day period instruction said period lasted two week taking second pack two week third pack thing got even worse third period lasted two week end third week still daily brown discharge positive side side effect idea period free tempting ala
1,Birth Control,"This is my first time using any form of birth control. I&#039;m glad I went with the patch, I have been on it for 8 months. At first It decreased my libido but that subsided. The only downside is that it made my periods longer (5-6 days to be exact) I used to only have periods for 3-4 days max also made my cramps intense for the first two days of my period, I never had cramps before using birth control. Other than that in happy with the patch",first time using form birth control glad went patch month first decreased libido subsided downside made period longer day exact used period day max also made cramp intense first two day period never cramp using birth control happy patch
2,Birth Control,"I had been on the pill for many years. When my doctor changed my RX to chateal, it was as effective. It really did help me by completely clearing my acne, this takes about 6 months though. I did not gain extra weight, or develop any emotional health issues. I stopped taking it bc I started using a more natural method of birth control, but started to take it bc I hate that my acne came back at age 28. I really hope symptoms like depression, or weight gain do not begin to affect me as I am older now. I&#039;m also naturally moody, so this may worsen things. I was in a negative mental rut today. Also I hope this doesn&#039;t push me over the edge, as I believe I am depressed. Hopefully it&#039;ll be just like when I was younger.",pill many year doctor changed rx chateal effective really help completely clearing acne take month though gain extra weight develop emotional health issue stopped taking bc started using natural method birth control started take bc hate acne came back age really hope symptom like depression weight gain begin affect older also naturally moody may worsen thing negative mental rut today also hope push edge believe depressed hopefully like younger
3,Birth Control,"Started Nexplanon 2 months ago because I have a minimal amount of contraception&#039;s I can take due to my inability to take the hormone that is used in most birth controls. I&#039;m trying to give it time because it is one of my only options right now. But honestly if I had options I&#039;d get it removed.\r\nI&#039;ve never had acne problems in my life, and immediately broke out after getting it implanted. Sex drive is completely gone, and I used to have sex with my boyfriend a few days a week, now its completely forced and not even fun for me anymore. I mean I&#039;m on birth control because I like having sex but don&#039;t want to get pregnant, why take a birth control that takes away sex? Very unhappy and hope that I get it back with time or I&#039;m getting it removed.",started nexplanon month ago minimal amount contraception take due inability take hormone used birth control trying give time one option right honestly option get removed never acne problem life immediately broke getting implanted sex drive completely gone used sex boyfriend day week completely forced even fun anymore mean birth control like sex want get pregnant take birth control take away sex unhappy hope get back time getting removed
4,Birth Control,"Nexplanon does its job. I can have worry free sex. The only thing is that my periods are sometimes light and sometimes heavy. Sometimes they go away and sometimes they show up unexpected. I also feel somewhat depressed. Not sure if its Nexplanon or not. I&#039;ve had Nexplanont for about 2 months now, but despite the side effects its the most effective birth control I&#039;ve ever used and I do not plan on taking it out.",nexplanon job worry free sex thing period sometimes light sometimes heavy sometimes go away sometimes show unexpected also feel somewhat depressed sure nexplanon nexplanont month despite side effect effective birth control ever used plan taking


# Creating features and Target Variable

In [38]:
X_feat = X['review_clean']
y = X['condition']

In [39]:
X_train, X_test, y_train, y_test = train_test_split(X_feat, y, stratify=y, test_size=0.2, random_state=0)

In [29]:
def plot_confusion_matrix(cm, classes, 
                          normalize=False, 
                          title='Confusion matrix', 
                          cmap=plt.cm.Blues):
    plt.imshow(cm, interpolation='nearest', cmap=cmap)
    plt.title(title)
    plt.colorbar()
    tick_marks = np.arange(len(classes))
    plt.xticks(tick_marks, classes, rotation=45)
    plt.yticks(tick_marks, classes)
    
    if normalize:
        cm = cm.astype('float') / cm.sum(axis = 1)[:, np.newaxis]
        print("Normalized confusion matrix")
    else:
        print('Confusion matrix, without normalization')
    
    thresh = cm.max() / 2
    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
        plt.text(j, i, cm[i, j],
                 horizontalalignment="center",
                 color="white" if cm[i, j] > thresh else "black")
    
    plt.tight_layout()
    plt.ylabel('True label')
    plt.xlabel('Predicted label')

#  Bag of Words

In [40]:
count_vectorizer = CountVectorizer( stop_words='english')

count_train = count_vectorizer.fit_transform(X_train)

count_test = count_vectorizer.transform(X_test)

In [41]:
count_train

<122049x37086 sparse matrix of type '<class 'numpy.int64'>'
	with 3621005 stored elements in Compressed Sparse Row format>

# Machine Learning Model: Naive Bayes

In [42]:
mnb = MultinomialNB()
mnb.fit(count_train, y_train)
pred = mnb.predict(count_test)
score = metrics.accuracy_score(y_test, pred)
print("accuracy:   %0.3f" % score)

# cm = metrics.confusion_matrix(y_test, pred, labels=my_array)
# plot_confusion_matrix(cm, classes=my_array)

accuracy:   0.591


# Passive Aggressive Classifier

In [43]:
from sklearn.linear_model import PassiveAggressiveClassifier,LogisticRegression

passive = PassiveAggressiveClassifier()
passive.fit(count_train, y_train)
pred = passive.predict(count_test)
score = metrics.accuracy_score(y_test, pred)
print("accuracy:   %0.3f" % score)
# cm = metrics.confusion_matrix(y_test, pred, labels=my_array)
# plot_confusion_matrix(cm, classes=my_array)

accuracy:   0.725


# Importing TFIDFVectorizer

In [44]:
from sklearn.feature_extraction.text import TfidfVectorizer

tfidf_vectorizer = TfidfVectorizer(stop_words='english', max_df=0.8)
tfidf_train_2 = tfidf_vectorizer.fit_transform(X_train)
tfidf_test_2 = tfidf_vectorizer.transform(X_test)

# Machine Learning Model: Naive Bayes using TFIDFVectorizer

In [45]:
mnb_tf = MultinomialNB()
mnb_tf.fit(tfidf_train_2, y_train)
pred = mnb_tf.predict(tfidf_test_2)
score = metrics.accuracy_score(y_test, pred)
print("accuracy:   %0.3f" % score)
# cm = metrics.confusion_matrix(y_test, pred, labels=['Birth Control', 'Depression','Diabetes, Type 2','High Blood Pressure'])
# plot_confusion_matrix(cm, classes=['Birth Control', 'Depression','Diabetes, Type 2','High Blood Pressure'])

accuracy:   0.371


# Passive Aggressive Classifier using TFIDFVectorizer

In [46]:
tfidf_vectorizer = TfidfVectorizer(stop_words='english', max_df=0.8)
tfidf_train = tfidf_vectorizer.fit_transform(X_train)
tfidf_test = tfidf_vectorizer.transform(X_test)

pass_tf = PassiveAggressiveClassifier()
pass_tf.fit(tfidf_train, y_train)
pred = pass_tf.predict(tfidf_test)
score = metrics.accuracy_score(y_test, pred)
print("accuracy:   %0.3f" % score)
# cm = metrics.confusion_matrix(y_test, pred, labels=['Birth Control', 'Depression','Diabetes, Type 2','High Blood Pressure'])
# plot_confusion_matrix(cm, classes=['Birth Control', 'Depression','Diabetes, Type 2','High Blood Pressure'])

accuracy:   0.767


# TFIDF: Bigrams

In [47]:
tfidf_vectorizer2 = TfidfVectorizer(stop_words='english', max_df=0.8, ngram_range=(1,2))
tfidf_train_2 = tfidf_vectorizer2.fit_transform(X_train)
tfidf_test_2 = tfidf_vectorizer2.transform(X_test)

# Passive Aggressive Classifier using TFIDF Bigrams

In [48]:
pass_tf = PassiveAggressiveClassifier()
pass_tf.fit(tfidf_train_2, y_train)
pred = pass_tf.predict(tfidf_test_2)
score = metrics.accuracy_score(y_test, pred)
print("accuracy:   %0.3f" % score)
# cm = metrics.confusion_matrix(y_test, pred, labels=['Birth Control', 'Depression','Diabetes, Type 2','High Blood Pressure'])
# plot_confusion_matrix(cm, classes=['Birth Control', 'Depression','Diabetes, Type 2','High Blood Pressure'])

accuracy:   0.821


# TFIDF: Trigrams

In [63]:
tfidf_vectorizer3 = TfidfVectorizer(stop_words='english', max_df=0.8, ngram_range=(1,3))
tfidf_train_3 = tfidf_vectorizer3.fit_transform(X_train)
tfidf_test_3 = tfidf_vectorizer3.transform(X_test)

In [64]:
pass_tf = PassiveAggressiveClassifier()
pass_tf.fit(tfidf_train_3, y_train)
pred = pass_tf.predict(tfidf_test_3)
score = metrics.accuracy_score(y_test, pred)
print("accuracy:   %0.3f" % score)
# cm = metrics.confusion_matrix(y_test, pred, labels=['Birth Control', 'Depression','Diabetes, Type 2','High Blood Pressure'])
# plot_confusion_matrix(cm, classes=['Birth Control', 'Depression','Diabetes, Type 2','High Blood Pressure'])

MemoryError: Unable to allocate 5.33 GiB for an array with shape (715197200,) and data type float64

# Most Important Features

In [49]:
def most_informative_feature_for_class(vectorizer, classifier, classlabel, n=10):
    labelid = list(classifier.classes_).index(classlabel)
    feature_names = vectorizer.get_feature_names_out()
    topn = sorted(zip(classifier.coef_[labelid], feature_names))[-n:]

    for coef, feat in topn:
        print (classlabel, feat, coef)



most_informative_feature_for_class(tfidf_vectorizer2, pass_tf, 'Birth Control')

Birth Control ring 4.290782351544041
Birth Control zarah 4.29934832192671
Birth Control pregnant 4.673330022076409
Birth Control lutera 4.804481335612524
Birth Control insertion 4.841336439316019
Birth Control skyla 5.455225071885894
Birth Control nuvaring 5.818197353159621
Birth Control implanon 6.2505303605427365
Birth Control implant 7.185589376596565
Birth Control nexplanon 9.267121879449354


In [51]:
most_informative_feature_for_class(tfidf_vectorizer2, pass_tf, 'Depression')

Depression wonder vyvanse 3.7666346693085164
Depression day positive 4.0414095928906795
Depression wellbutrin 4.081869523327257
Depression parnate 4.569088190063134
Depression viibryd 4.779397358834598
Depression brintellix 4.9168103200883335
Depression nardil 5.100794381287207
Depression pristiq 5.346562628961239
Depression deplin 5.3667798342346025
Depression depression 7.689478338537653


In [39]:
most_informative_feature_for_class(tfidf_vectorizer2, pass_tf, 'High Blood Pressure')

High Blood Pressure norvasc 3.983948032278024
High Blood Pressure pressure 4.169995935955796
High Blood Pressure azor 4.590559465432264
High Blood Pressure benicar 4.731867484337865
High Blood Pressure diovan 4.816575810051169
High Blood Pressure amlodipine 4.838998399257763
High Blood Pressure bystolic 5.39168429556869
High Blood Pressure lisinopril 5.786084919515093
High Blood Pressure losartan 5.941956325674568
High Blood Pressure bp 6.027031183236749


In [40]:
most_informative_feature_for_class(tfidf_vectorizer2, pass_tf, 'Diabetes, Type 2')

Diabetes, Type 2 glucose 4.1409995472144185
Diabetes, Type 2 actos 4.382076256222514
Diabetes, Type 2 victoza 5.1582587351336
Diabetes, Type 2 metformin 5.281759573435402
Diabetes, Type 2 januvia 5.5182334223394625
Diabetes, Type 2 byetta 5.95488577735035
Diabetes, Type 2 bydureon 6.097999686990457
Diabetes, Type 2 sugar 6.353679494748429
Diabetes, Type 2 invokana 6.655957261923151
Diabetes, Type 2 trulicity 8.550641129135533


In [52]:
X.tail()

Unnamed: 0,condition,review,review_clean
152557,Hypersomnia,"I have been on this medicine for 7 years and cannot imagine life without it. It doesn&#039;t seem to work for everyone, but is a lifesaver for me. I had a headache for 3 days when I started it and lost my appetite for a week. After that it was fine. I can get by on 200mg if I need to, but feel tired still most of the day (but won&#039;t fall asleep unless I want to). My usual dose is 300mg which keeps me alert and I can&#039;t even take a nap if I want to.",medicine year cannot imagine life without seem work everyone lifesaver headache day started lost appetite week fine get mg need feel tired still day fall asleep unless want usual dose mg keep alert even take nap want
152558,Hypersomnia,"I have found it to be very smooth, have needed to take two per day, one early am and second one about noon to make it all day. No more trouble sleeping than normal. No shakiness or jitters, appetite seems normal. One tablet (150mg) was no help. I seem to be more productive during the day. No nausea or headaches, dizziness. I feel like I get along with it just fine.",found smooth needed take two per day one early second one noon make day trouble sleeping normal shakiness jitter appetite seems normal one tablet mg help seem productive day nausea headache dizziness feel like get along fine
152559,Hypersomnia,"Diagnosed with Hypersomnia. Prescribed 150 mg Nuvigil 1 x day. This medication made me extremely talkative for the first few days to the point of annoyance to others and even myself. Though it curbed my extreme need for napping and did make me feel somewhat more &#039;alive&#039; during the day, the side effects, for me, far outweighed the benefits. Chills, daily headaches, feeling feverish with no temperature throughout the two months I took Nuvigil. Daily stomach pain and irritability and changes in my behavior to include sadness and crying episodes, feeling overwhelmed, feelings as if I were &#039;losing it&#039;. I stopped taking Nuvigil two wks ago and no more side effects!!!!",diagnosed hypersomnia prescribed mg nuvigil x day medication made extremely talkative first day point annoyance others even though curbed extreme need napping make feel somewhat alive day side effect far outweighed benefit chill daily headache feeling feverish temperature throughout two month took nuvigil daily stomach pain irritability change behavior include sadness cry episode feeling overwhelmed feeling losing stopped taking nuvigil two wks ago side effect
152560,Hypersomnia,"There no material in side effects list that you may have a hard time with Novocaine. My dentist spent hours trying to figure out why he could not get me numb before he made the connection. Be forewarned that this is a potential problem. This drug all in all has improved the quality of my life drastically. 7 years and topped at 90 mg. I hit tolerance a long time ago, but it still does what it was designed to do. I am not looking for a high, I am looking for a life with alertness and this medicine has given me that. I personally trust this older proven medication.",material side effect list may hard time novocaine dentist spent hour trying figure could get numb made connection forewarned potential problem drug improved quality life drastically year topped mg hit tolerance long time ago still designed looking high looking life alertness medicine given personally trust older proven medication
152561,Hypersomnia,"In the 8 hours of taking one tablet,i had about as many experiences of feeling i was going to have a stroke or heart attack.And you can buy in the supermarket,never again.",hour taking one tablet many experience feeling going stroke heart attack buy supermarket never


In [53]:
## Function for Extracting Top drugs

def top_drugs_extractor(condition):
    df_top = df[(df['rating']>=8) | (df['usefulCount']>=100)].sort_values(by = ['rating', 'usefulCount'], ascending = [False, False])
    drug_lst = df_top[df_top['condition']==condition]['drugName'].head(3).tolist()
    return drug_lst

In [54]:
def predict_text(lst_text):
    df_test = pd.DataFrame(lst_text, columns = ['test_sent'])
    df_test["test_sent"] = df_test["test_sent"].apply(review_to_words)
    tfidf_bigram = tfidf_vectorizer2.transform(lst_text)
    prediction = pass_tf.predict(tfidf_bigram)
    df_test['prediction']=prediction
    return df_test

In [55]:
sentences = [
  "I have only been on Tekturna for 9 days. The effect was immediate. I am also on a calcium channel blocker (Tiazac) and hydrochlorothiazide. I was put on Tekturna because of palpitations experienced with Diovan (ugly drug in my opinion, same company produces both however). The palpitations were pretty bad on Diovan, 24 hour monitor by EKG etc. After a few days of substituting Tekturna for Diovan, there are no more palpitations.",
    "This is the third med I&#039;ve tried for anxiety and mild depression. Been on it for a week and I hate it so much. I am so dizzy, I have major diarrhea and feel worse than I started. Contacting my doc in the am and changing asap.",
    "I just got diagnosed with type 2. My doctor prescribed Invokana and metformin from the beginning. My sugars went down to normal by the second week. I am losing so much weight. No side effects yet. Miracle medicine for me",
    "I am suffering from throat pain from last two days. Today I am having headache and my body temperature is high.",
    "A week after using Lastacaft, like with some of the other reviewers, I thought I had conjunctivitis and went in to see the doctor, indicating that it started after using the drops. He said it was due to dry eye issues and being around a lot of dust/debris, which made sense at the time. The antibiotic drops did the trick, but I was told to keep using Lastacaft, and sure enough, the redness returned. I continued with the just the antibiotics and I was back to normal. Well, this morning, I thought I&#039;d start with Lastacaft again, since the doctor indicated that they weren&#039;t the problem, and I&#039;m back to having pink eye. ",
    "Stomach pain for 3 days, took Neopeptine. Doctor suggests it is food poisoning."
  ]

In [56]:
tfidf_bigram = tfidf_vectorizer2.transform(sentences)


predictions = pass_tf.predict(tfidf_bigram)

for text, label in zip(sentences, predictions):
        target = label
        top_drugs = top_drugs_extractor(label)
        print("text:", text, "\nCondition:", target)
        print("Top Suggested Drugs:")
        print(top_drugs)


text: I have only been on Tekturna for 9 days. The effect was immediate. I am also on a calcium channel blocker (Tiazac) and hydrochlorothiazide. I was put on Tekturna because of palpitations experienced with Diovan (ugly drug in my opinion, same company produces both however). The palpitations were pretty bad on Diovan, 24 hour monitor by EKG etc. After a few days of substituting Tekturna for Diovan, there are no more palpitations. 
Condition: High Blood Pressure
Top Suggested Drugs:
['Losartan', 'Aldactone', 'Spironolactone']
text: This is the third med I&#039;ve tried for anxiety and mild depression. Been on it for a week and I hate it so much. I am so dizzy, I have major diarrhea and feel worse than I started. Contacting my doc in the am and changing asap. 
Condition: Depression
Top Suggested Drugs:
['Sertraline', 'Zoloft', 'Viibryd']
text: I just got diagnosed with type 2. My doctor prescribed Invokana and metformin from the beginning. My sugars went down to normal by the second w

In [57]:
df_testsent = predict_text(sentences)
df_testsent

Unnamed: 0,test_sent,prediction
0,tekturna day effect immediate also calcium channel blocker tiazac hydrochlorothiazide put tekturna palpitation experienced diovan ugly drug opinion company produce however palpitation pretty bad diovan hour monitor ekg etc day substituting tekturna diovan palpitation,High Blood Pressure
1,third med tried anxiety mild depression week hate much dizzy major diarrhea feel worse started contacting doc changing asap,Depression
2,got diagnosed type doctor prescribed invokana metformin beginning sugar went normal second week losing much weight side effect yet miracle medicine,"Diabetes, Type 2"
3,suffering throat pain last two day today headache body temperature high,Tonsillitis/Pharyngitis
4,week using lastacaft like reviewer thought conjunctivitis went see doctor indicating started using drop said due dry eye issue around lot dust debris made sense time antibiotic drop trick told keep using lastacaft sure enough redness returned continued antibiotic back normal well morning thought start lastacaft since doctor indicated problem back pink eye,"Conjunctivitis, Allergic"
5,stomach pain day took neopeptine doctor suggests food poisoning,Constipation


In [59]:
import joblib
joblib.dump(tfidf_vectorizer2, 'model/tfidfvectorizer.pkl')
joblib.dump(pass_tf, 'model/passmodel.pkl')

['model/passmodel.pkl']

In [60]:
vectorizer = joblib.load('model/tfidfvectorizer.pkl')
model = joblib.load('model/passmodel.pkl')

test = model.predict(vectorizer.transform(["stomach pain day took neopeptine doctor suggests food poisoning"]))
test[0]

'Constipation'