### NLP on Drug Reviews with Python
+ Sentiment Analysis
+ Named Entity Recognition

In [1]:
# Load Pkgs
import pandas as pd
import numpy as np

In [33]:
# Load NLP Pkgs
import spacy
from wordcloud import WordCloud, STOPWORDS
from spacy.util import minibatch, compounding

In [34]:
import matplotlib.pyplot as plt
import re
import random

In [35]:
# Load Dataset
df = pd.read_csv("drug_review_dataset_with_sentiment.csv")

In [43]:
df.head()

Unnamed: 0.1,Unnamed: 0,drugName,condition,review,rating,date,usefulCount,drug_class,sentiment,sentiment_label
0,206461,Valsartan,Left Ventricular Dysfunction,"""It has no side effect, I take it in combinati...",9.0,"May 20, 2012",27,arb blocker,0.0,neutral
1,95260,Guanfacine,ADHD,"""My son is halfway through his fourth week of ...",8.0,"April 27, 2010",192,,0.168333,positive
2,92703,Lybrel,Birth Control,"""I used to take another oral contraceptive, wh...",5.0,"December 14, 2009",17,,0.06721,positive
3,138000,Ortho Evra,Birth Control,"""This is my first time using any form of birth...",8.0,"November 3, 2015",10,,0.179545,positive
4,35696,Buprenorphine / naloxone,Opiate Dependence,"""Suboxone has completely turned my life around...",9.0,"November 27, 2016",37,,0.194444,positive


In [44]:
# NER
nlp0 = spacy.load('en_core_web_sm')

In [45]:
# Get All Components of this NLP Object
nlp0.pipe_names

['tagger', 'parser', 'ner']

In [46]:
ner0 = nlp0.get_pipe('ner')

In [None]:
ner0.add_label()

In [47]:
# Example
ex1 = "James went to London to buy Ibuprofen last year 2019"

In [48]:
docx = nlp0(ex1)

In [49]:
type(docx)

spacy.tokens.doc.Doc

In [50]:
# Check for entities
for entity in docx.ents:
  print(entity,entity.label_)

James PERSON
London GPE
Ibuprofen ORG
last year 2019 DATE


#### Preparing the data
+ Training data must be a tuple

TRAIN_DATA = [
    ("Who is Shaka Khan?", {"entities": [(START, STOP, "LABEL")]})
]

TRAIN_DATA = [
    ("Who is Shaka Khan?", {"entities": [(7, 17, "PERSON")]}),
    ("I like London and Berlin.", {"entities": [(7, 13, "LOC"), (18, 24, "LOC")]}),
]

In [52]:
def process_review(review):
    processed_token = []
    for token in review.split():
        token = ''.join(e.lower() for e in token if e.isalnum())
        processed_token.append(token)
    return ' '.join(processed_token)

In [54]:
# Drugs Names
all_drugs = df['drugName'].unique().tolist()

In [56]:
all_drugs = [x.lower() for x in all_drugs]

In [57]:
all_drugs

['valsartan',
 'guanfacine',
 'lybrel',
 'ortho evra',
 'buprenorphine / naloxone',
 'cialis',
 'levonorgestrel',
 'aripiprazole',
 'keppra',
 'ethinyl estradiol / levonorgestrel',
 'topiramate',
 'l-methylfolate',
 'pentasa',
 'dextromethorphan',
 'nexplanon',
 'liraglutide',
 'trimethoprim',
 'amitriptyline',
 'lamotrigine',
 'nilotinib',
 'atripla',
 'trazodone',
 'etonogestrel',
 'etanercept',
 'tioconazole',
 'azithromycin',
 'eflornithine',
 'daytrana',
 'ativan',
 'imitrex',
 'sertraline',
 'toradol',
 'viberzi',
 'mobic',
 'dulcolax',
 'morphine',
 'moviprep',
 'trilafon',
 'fluconazole',
 'contrave',
 'clonazepam',
 'metaxalone',
 'venlafaxine',
 'ledipasvir / sofosbuvir',
 'symbyax',
 'tamsulosin',
 'doxycycline',
 'dulaglutide',
 'intuniv',
 'buprenorphine',
 'qvar',
 'opdivo',
 'pyridium',
 'latuda',
 'bupropion',
 'implanon',
 'effexor xr',
 'drospirenone / ethinyl estradiol',
 'nuvaring',
 'prepopik',
 'tretinoin',
 'gildess fe 1 / 20',
 'ethinyl estradiol / norgestimate'

In [58]:
df['review']

0         "It has no side effect, I take it in combinati...
1         "My son is halfway through his fourth week of ...
2         "I used to take another oral contraceptive, wh...
3         "This is my first time using any form of birth...
4         "Suboxone has completely turned my life around...
                                ...                        
161292    "I wrote my first report in Mid-October of 201...
161293    "I was given this in IV before surgey. I immed...
161294    "Limited improvement after 4 months, developed...
161295    "I&#039;ve been on thyroid medication 49 years...
161296    "I&#039;ve had chronic constipation all my adu...
Name: review, Length: 161297, dtype: object

In [59]:
count = 0
TRAIN_DATA = []
for _, item in df.iterrows():
    ent_dict = {}
    if count < 1000:
        review = process_review(item['review'])
        #Locate drugs and their positions once and add to the visited items.
        visited_items = []
        entities = []
        for token in review.split():
            if token in all_drugs:
                for i in re.finditer(token, review):
                    if token not in visited_items:
                        entity = (i.span()[0], i.span()[1], 'DRUG')
                        visited_items.append(token)
                        entities.append(entity)
        if len(entities) > 0:
            ent_dict['entities'] = entities
            train_item = (review, ent_dict)
            TRAIN_DATA.append(train_item)
            count+=1

In [60]:
TRAIN_DATA

[('it has no side effect i take it in combination of bystolic 5 mg and fish oil',
  {'entities': [(50, 58, 'DRUG')]}),
 ('my son is halfway through his fourth week of intuniv we became concerned when he began this last week when he started taking the highest dose he will be on for two days he could hardly get out of bed was very cranky and slept for nearly 8 hours on a drive home from school vacation very unusual for him i called his doctor on monday morning and she said to stick it out a few days see how he did at school and with getting up in the morning the last two days have been problem free he is much more agreeable than ever he is less emotional a good thing less cranky he is remembering all the things he should overall his behavior is better we have tried many different medications and so far this is the most effective',
  {'entities': [(45, 52, 'DRUG')]}),
 ('i used to take another oral contraceptive which had 21 pill cycle and was very happy very light periods max 5 days no o

### Training the NER Model

In [61]:
n_iter = 10
def train_ner(training_data):
    """Steps
    Create a Blank NLP  model object
    Create and add NER to the NLP model
    Add Labels from your training data
    Train  
    """
    TRAIN_DATA = training_data
    nlp = spacy.blank("en")  # create blank Language class
    print("Created blank 'en' model")
    
    if "ner" not in nlp.pipe_names:
        ner = nlp.create_pipe("ner")
        nlp.add_pipe(ner, last=True)
    # otherwise, get it so we can add labels
    else:
        ner = nlp.get_pipe("ner")
        
    # add labels
    for _, annotations in TRAIN_DATA:
        for ent in annotations.get("entities"):
            ner.add_label(ent[2])
            
    nlp.begin_training()
    for itn in range(n_iter):
        random.shuffle(TRAIN_DATA)
        losses = {}
        # batch up the examples using spaCy's minibatch
        batches = minibatch(TRAIN_DATA, size=compounding(4.0, 32.0, 1.001))
        for batch in batches:
            texts, annotations = zip(*batch)
            nlp.update(
                texts,  # batch of texts
                annotations,  # batch of annotations
                drop=0.5,  # dropout - make it harder to memorise data
                losses=losses,
            )
        print("Losses", losses)
    return nlp

In [62]:
# Let training
nlp2 = train_ner(TRAIN_DATA)

Created blank 'en' model
Losses {'ner': 4470.239881779828}
Losses {'ner': 1375.166944752246}
Losses {'ner': 1002.1384818403991}
Losses {'ner': 875.8723420466125}
Losses {'ner': 730.9513784748066}
Losses {'ner': 647.3403123510484}
Losses {'ner': 574.9313971189416}
Losses {'ner': 549.9483773217411}
Losses {'ner': 530.4141523685903}
Losses {'ner': 468.15319591115053}


In [63]:
ex1

'James went to London to buy Ibuprofen last year 2019'

In [64]:
docx2 = nlp2(ex1)

In [65]:
for entity in docx2.ents:
  print(entity,entity.label_)

Ibuprofen DRUG


In [66]:
# Test
for text,_ in TRAIN_DATA[:5]:
  doc = nlp2(text)
  result = [(ent,ent.label_) for ent in doc.ents]
  print(result)

[(saxenda, 'DRUG')]
[(nexplanon, 'DRUG')]
[(inderal, 'DRUG')]
[(inderal, 'DRUG'), (inderal, 'DRUG')]
[(aviane, 'DRUG')]


In [67]:
def extract_drug_entity(text):
  docx =  nlp2(text)
  result = [(ent,ent.label_) for ent in docx.ents]
  return result

In [69]:
df['review'][0:10]

0    "It has no side effect, I take it in combinati...
1    "My son is halfway through his fourth week of ...
2    "I used to take another oral contraceptive, wh...
3    "This is my first time using any form of birth...
4    "Suboxone has completely turned my life around...
5    "2nd day on 5mg started to work with rock hard...
6    "He pulled out, but he cummed a bit in me. I t...
7    "Abilify changed my life. There is hope. I was...
8    " I Ve had  nothing but problems with the Kepp...
9    "I had been on the pill for many years. When m...
Name: review, dtype: object

In [70]:
df['review'][0:10].apply(extract_drug_entity)

0                                           []
1                          [((Intuniv), DRUG)]
2         [((Lybrel), DRUG), ((Lybrel), DRUG)]
3                                [(((), DRUG)]
4    [((Suboxone), DRUG), ((oxycontin), DRUG)]
5                                           []
6                                           []
7      [((Zoloft), DRUG), ((Clonidine), DRUG)]
8                                           []
9                          [((chateal), DRUG)]
Name: review, dtype: object

In [None]:
# Credits
# spacy_docs
# curiousprogrammer


In [None]:
# Thanks For Your Time
# Jesus Saves @JCharisTech
# Jesse E.Agbe(JCharis)