In [None]:
!wget https://archive.ics.uci.edu/ml/machine-learning-databases/00462/drugsCom_raw.zip

--2023-01-30 10:49:14--  https://archive.ics.uci.edu/ml/machine-learning-databases/00462/drugsCom_raw.zip
Resolving archive.ics.uci.edu (archive.ics.uci.edu)... 128.195.10.252
Connecting to archive.ics.uci.edu (archive.ics.uci.edu)|128.195.10.252|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 42989872 (41M) [application/x-httpd-php]
Saving to: ‘drugsCom_raw.zip’


2023-01-30 10:49:19 (11.6 MB/s) - ‘drugsCom_raw.zip’ saved [42989872/42989872]



# Dataset and Library
The dataset that will be used as a sample in this notebook is the [Drug Review Dataset](https://archive.ics.uci.edu/dataset/462/drug+review+dataset+drugs+com) from the open source UCI Machine Learning Repository and [transformer](https://en.wikipedia.org/wiki/Transformer_(machine_learning_model)) models from the Python library,[huggingface](https://huggingface.co/transformers)

In [None]:
!unzip drugsCom_raw.zip

Archive:  drugsCom_raw.zip
  inflating: drugsComTest_raw.tsv    
  inflating: drugsComTrain_raw.tsv   


In [None]:
%%capture
!pip install transformers

In [None]:
from transformers import pipeline
import pandas as pd
from tqdm import tqdm

##Sentiment analysis using pretrained transformer models

In [None]:
medication_review_df = pd.read_csv('/content/drugsComTest_raw.tsv',sep="\t")
medication_review_df

Unnamed: 0.1,Unnamed: 0,drugName,condition,review,rating,date,usefulCount
0,163740,Mirtazapine,Depression,"""I&#039;ve tried a few antidepressants over th...",10.0,"February 28, 2012",22
1,206473,Mesalamine,"Crohn's Disease, Maintenance","""My son has Crohn&#039;s disease and has done ...",8.0,"May 17, 2009",17
2,159672,Bactrim,Urinary Tract Infection,"""Quick reduction of symptoms""",9.0,"September 29, 2017",3
3,39293,Contrave,Weight Loss,"""Contrave combines drugs that were used for al...",9.0,"March 5, 2017",35
4,97768,Cyclafem 1 / 35,Birth Control,"""I have been on this birth control for one cyc...",9.0,"October 22, 2015",4
...,...,...,...,...,...,...,...
53761,159999,Tamoxifen,"Breast Cancer, Prevention","""I have taken Tamoxifen for 5 years. Side effe...",10.0,"September 13, 2014",43
53762,140714,Escitalopram,Anxiety,"""I&#039;ve been taking Lexapro (escitaploprgra...",9.0,"October 8, 2016",11
53763,130945,Levonorgestrel,Birth Control,"""I&#039;m married, 34 years old and I have no ...",8.0,"November 15, 2010",7
53764,47656,Tapentadol,Pain,"""I was prescribed Nucynta for severe neck/shou...",1.0,"November 28, 2011",20


In [None]:
medication_review_df.rename(columns={"Unnamed: 0": "uniqueId"},inplace=True)

In [None]:
medication_review_df = medication_review_df[:500]

In [None]:
bio_clinibert_classifier = pipeline("sentiment-analysis",model="okho0653/Bio_ClinicalBERT-zero-shot-sentiment-model",max_length=512, truncation =True)

Downloading (…)lve/main/config.json:   0%|          | 0.00/706 [00:00<?, ?B/s]

Downloading (…)"pytorch_model.bin";:   0%|          | 0.00/433M [00:00<?, ?B/s]

Downloading (…)okenizer_config.json:   0%|          | 0.00/358 [00:00<?, ?B/s]

Downloading (…)solve/main/vocab.txt:   0%|          | 0.00/213k [00:00<?, ?B/s]

Downloading (…)/main/tokenizer.json:   0%|          | 0.00/669k [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

In [None]:
rubert_classifier = pipeline("sentiment-analysis",model="blanchefort/rubert-base-cased-sentiment-med",max_length=512, truncation =True)

Downloading (…)lve/main/config.json:   0%|          | 0.00/944 [00:00<?, ?B/s]

Downloading (…)"pytorch_model.bin";:   0%|          | 0.00/712M [00:00<?, ?B/s]

Downloading (…)okenizer_config.json:   0%|          | 0.00/495 [00:00<?, ?B/s]

Downloading (…)solve/main/vocab.txt:   0%|          | 0.00/1.40M [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

In [None]:
roberta_classifier = pipeline("sentiment-analysis",model="cardiffnlp/twitter-roberta-base-sentiment",max_length=512, truncation =True)

Downloading (…)lve/main/config.json:   0%|          | 0.00/747 [00:00<?, ?B/s]

Downloading (…)"pytorch_model.bin";:   0%|          | 0.00/499M [00:00<?, ?B/s]

Downloading (…)olve/main/vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

Downloading (…)olve/main/merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/150 [00:00<?, ?B/s]

In [None]:
medication_review_df['review'][0]

'"I&#039;ve tried a few antidepressants over the years (citalopram, fluoxetine, amitriptyline), but none of those helped with my depression, insomnia &amp; anxiety. My doctor suggested and changed me onto 45mg mirtazapine and this medicine has saved my life. Thankfully I have had no side effects especially the most common - weight gain, I&#039;ve actually lost alot of weight. I still have suicidal thoughts but mirtazapine has saved me."'

In [None]:
bio_clinibert_classifier.predict(medication_review_df['review'][0])

[{'label': 'LABEL_0', 'score': 0.5981781482696533}]

In [None]:
rubert_classifier.predict(medication_review_df['review'][0])

[{'label': 'POSITIVE', 'score': 0.7634797692298889}]

In [None]:
roberta_classifier.predict(medication_review_df['review'][0])

[{'label': 'LABEL_2', 'score': 0.7081274390220642}]

In [None]:
medication_review_df['review'][10]

'"Holy Hell is exactly how I feel. I had been taking Brisdelle for 1.5 years. The hot flashes did indeed subside - however, the side affects of this medicine coupled with the fact Noven was acquired by YET another pharmaceutical company - YOU CAN&#039;T PLACE A REP IN THE AREA, DISTRIBUTE YOUR DRUGS, AND THEN FIRE HER-AND NOT REPLACE THEREFORE there is NO medicine or support here. You dumped this drug in the Dr&#039;s hands and walked away. After calling Sebula - you act like you don&#039;t even care. You have made it impossible to obtain this. I happen to think this is illegal.  I just decided to wean myself off this and Premarin. It has been nothing short of a nightmare. If you don&#039;t need this drug- DON&#039;T START. Seriously."'

In [None]:
bio_clinibert_classifier.predict(medication_review_df['review'][10])

[{'label': 'LABEL_0', 'score': 0.5748332142829895}]

In [None]:
rubert_classifier.predict(medication_review_df['review'][10])

[{'label': 'NEUTRAL', 'score': 0.41431960463523865}]

In [None]:
roberta_classifier.predict(medication_review_df['review'][10])

[{'label': 'LABEL_0', 'score': 0.9392552375793457}]

In [None]:
medication_review_df['review'][100]

'"My 5 year old son was diagnosed with ADHD just yesterday, the Behavior Specialist said his was one of the worst cases that she had seen in a while, she had suggested putting him on a stimulant medication, I told her i would like to  a non-stimulant medication first and she prescribed him Kapvay. My son took it for the first time last night before bed, he went right to sleep and when he woke up this morning he was the calmest most pleasant, helpful and nicest he had ever been in his life. I could not believe the overnight change. I&#039;m so glad it worked so fast, he has not gotten in trouble once today which is a new record! His teachers are going to be thrilled on Monday! Thank you to the makers of Clonidine!"'

In [None]:
bio_clinibert_classifier.predict(medication_review_df['review'][100])

[{'label': 'LABEL_0', 'score': 0.582754909992218}]

In [None]:
rubert_classifier.predict(medication_review_df['review'][100])

[{'label': 'POSITIVE', 'score': 0.914295494556427}]

In [None]:
roberta_classifier.predict(medication_review_df['review'][100])

[{'label': 'LABEL_2', 'score': 0.9514226913452148}]

In [None]:
tqdm.pandas(desc='My bar!')
medication_review_df['rubert_sentiment'] = medication_review_df['review'].progress_apply(lambda x : rubert_classifier.predict(x))

My bar!: 100%|██████████| 500/500 [03:35<00:00,  2.32it/s]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  medication_review_df['sentiment'] = medication_review_df['review'].progress_apply(lambda x : rubert_classifier.predict(x))


In [None]:
medication_review_df

Unnamed: 0,uniqueId,drugName,condition,review,rating,date,usefulCount,sentiment
0,163740,Mirtazapine,Depression,"""I&#039;ve tried a few antidepressants over th...",10.0,"February 28, 2012",22,"[{'label': 'POSITIVE', 'score': 0.763479769229..."
1,206473,Mesalamine,"Crohn's Disease, Maintenance","""My son has Crohn&#039;s disease and has done ...",8.0,"May 17, 2009",17,"[{'label': 'POSITIVE', 'score': 0.978934466838..."
2,159672,Bactrim,Urinary Tract Infection,"""Quick reduction of symptoms""",9.0,"September 29, 2017",3,"[{'label': 'POSITIVE', 'score': 0.722799718379..."
3,39293,Contrave,Weight Loss,"""Contrave combines drugs that were used for al...",9.0,"March 5, 2017",35,"[{'label': 'POSITIVE', 'score': 0.694053471088..."
4,97768,Cyclafem 1 / 35,Birth Control,"""I have been on this birth control for one cyc...",9.0,"October 22, 2015",4,"[{'label': 'POSITIVE', 'score': 0.449853628873..."
...,...,...,...,...,...,...,...,...
495,12695,Etanercept,Rheumatoid Arthritis,"""My husband has RA. His doctor sent him to ...",3.0,"May 31, 2015",35,"[{'label': 'NEGATIVE', 'score': 0.571891605854..."
496,54827,Bupropion / naltrexone,Weight Loss,"""Well just have start by saying so far so good...",9.0,"February 16, 2017",30,"[{'label': 'POSITIVE', 'score': 0.963343679904..."
497,230865,Milnacipran,ibromyalgia,"""Very effective for fibromyalgia pain. Does no...",9.0,"September 7, 2015",38,"[{'label': 'POSITIVE', 'score': 0.381169945001..."
498,30138,Klonopin,Bipolar Disorde,"""Great.""",10.0,"September 27, 2013",23,"[{'label': 'NEGATIVE', 'score': 0.927029550075..."


In [None]:
medication_review_df['review'][498]

'"Great."'

In [None]:
medication_review_df['rubert_sentiment'][498]

[{'label': 'NEGATIVE', 'score': 0.927029550075531}]

In [None]:
medication_review_df['rubert_label'] = medication_review_df['rubert_sentiment'].apply(lambda x : x[0]['label'])

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  medication_review_df['label'] = medication_review_df['sentiment'].apply(lambda x : x[0]['label'])


In [None]:
medication_review_df

Unnamed: 0,uniqueId,drugName,condition,review,rating,date,usefulCount,sentiment,label
0,163740,Mirtazapine,Depression,"""I&#039;ve tried a few antidepressants over th...",10.0,"February 28, 2012",22,"[{'label': 'POSITIVE', 'score': 0.763479769229...",POSITIVE
1,206473,Mesalamine,"Crohn's Disease, Maintenance","""My son has Crohn&#039;s disease and has done ...",8.0,"May 17, 2009",17,"[{'label': 'POSITIVE', 'score': 0.978934466838...",POSITIVE
2,159672,Bactrim,Urinary Tract Infection,"""Quick reduction of symptoms""",9.0,"September 29, 2017",3,"[{'label': 'POSITIVE', 'score': 0.722799718379...",POSITIVE
3,39293,Contrave,Weight Loss,"""Contrave combines drugs that were used for al...",9.0,"March 5, 2017",35,"[{'label': 'POSITIVE', 'score': 0.694053471088...",POSITIVE
4,97768,Cyclafem 1 / 35,Birth Control,"""I have been on this birth control for one cyc...",9.0,"October 22, 2015",4,"[{'label': 'POSITIVE', 'score': 0.449853628873...",POSITIVE
...,...,...,...,...,...,...,...,...,...
495,12695,Etanercept,Rheumatoid Arthritis,"""My husband has RA. His doctor sent him to ...",3.0,"May 31, 2015",35,"[{'label': 'NEGATIVE', 'score': 0.571891605854...",NEGATIVE
496,54827,Bupropion / naltrexone,Weight Loss,"""Well just have start by saying so far so good...",9.0,"February 16, 2017",30,"[{'label': 'POSITIVE', 'score': 0.963343679904...",POSITIVE
497,230865,Milnacipran,ibromyalgia,"""Very effective for fibromyalgia pain. Does no...",9.0,"September 7, 2015",38,"[{'label': 'POSITIVE', 'score': 0.381169945001...",POSITIVE
498,30138,Klonopin,Bipolar Disorde,"""Great.""",10.0,"September 27, 2013",23,"[{'label': 'NEGATIVE', 'score': 0.927029550075...",NEGATIVE


In [None]:
medication_review_df[:50]

Unnamed: 0,uniqueId,drugName,condition,review,rating,date,usefulCount,sentiment,label
0,163740,Mirtazapine,Depression,"""I&#039;ve tried a few antidepressants over th...",10.0,"February 28, 2012",22,"[{'label': 'POSITIVE', 'score': 0.763479769229...",POSITIVE
1,206473,Mesalamine,"Crohn's Disease, Maintenance","""My son has Crohn&#039;s disease and has done ...",8.0,"May 17, 2009",17,"[{'label': 'POSITIVE', 'score': 0.978934466838...",POSITIVE
2,159672,Bactrim,Urinary Tract Infection,"""Quick reduction of symptoms""",9.0,"September 29, 2017",3,"[{'label': 'POSITIVE', 'score': 0.722799718379...",POSITIVE
3,39293,Contrave,Weight Loss,"""Contrave combines drugs that were used for al...",9.0,"March 5, 2017",35,"[{'label': 'POSITIVE', 'score': 0.694053471088...",POSITIVE
4,97768,Cyclafem 1 / 35,Birth Control,"""I have been on this birth control for one cyc...",9.0,"October 22, 2015",4,"[{'label': 'POSITIVE', 'score': 0.449853628873...",POSITIVE
5,208087,Zyclara,Keratosis,"""4 days in on first 2 weeks. Using on arms an...",4.0,"July 3, 2014",13,"[{'label': 'NEGATIVE', 'score': 0.373242884874...",NEGATIVE
6,215892,Copper,Birth Control,"""I&#039;ve had the copper coil for about 3 mon...",6.0,"June 6, 2016",1,"[{'label': 'POSITIVE', 'score': 0.516627132892...",POSITIVE
7,169852,Amitriptyline,Migraine Prevention,"""This has been great for me. I&#039;ve been on...",9.0,"April 21, 2009",32,"[{'label': 'POSITIVE', 'score': 0.954068064689...",POSITIVE
8,23295,Methadone,Opiate Withdrawal,"""Ive been on Methadone for over ten years and ...",7.0,"October 18, 2016",21,"[{'label': 'NEGATIVE', 'score': 0.709616601467...",NEGATIVE
9,71428,Levora,Birth Control,"""I was on this pill for almost two years. It d...",2.0,"April 16, 2011",3,"[{'label': 'POSITIVE', 'score': 0.701550185680...",POSITIVE


In [None]:
tqdm.pandas(desc='My bar!')
medication_review_df['roberta_sentiment'] = medication_review_df['review'].progress_apply(lambda x : roberta_classifier.predict(x))

My bar!: 100%|██████████| 500/500 [02:49<00:00,  2.95it/s]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  medication_review_df['roberta_sentiment'] = medication_review_df['review'].progress_apply(lambda x : roberta_classifier.predict(x))


In [None]:
medication_review_df

Unnamed: 0,uniqueId,drugName,condition,review,rating,date,usefulCount,roberta_sentiment
0,163740,Mirtazapine,Depression,"""I&#039;ve tried a few antidepressants over th...",10.0,"February 28, 2012",22,"[{'label': 'LABEL_2', 'score': 0.7081274390220..."
1,206473,Mesalamine,"Crohn's Disease, Maintenance","""My son has Crohn&#039;s disease and has done ...",8.0,"May 17, 2009",17,"[{'label': 'LABEL_2', 'score': 0.9489813446998..."
2,159672,Bactrim,Urinary Tract Infection,"""Quick reduction of symptoms""",9.0,"September 29, 2017",3,"[{'label': 'LABEL_1', 'score': 0.7108300924301..."
3,39293,Contrave,Weight Loss,"""Contrave combines drugs that were used for al...",9.0,"March 5, 2017",35,"[{'label': 'LABEL_2', 'score': 0.4569104015827..."
4,97768,Cyclafem 1 / 35,Birth Control,"""I have been on this birth control for one cyc...",9.0,"October 22, 2015",4,"[{'label': 'LABEL_2', 'score': 0.9641554951667..."
...,...,...,...,...,...,...,...,...
495,12695,Etanercept,Rheumatoid Arthritis,"""My husband has RA. His doctor sent him to ...",3.0,"May 31, 2015",35,"[{'label': 'LABEL_0', 'score': 0.6601070165634..."
496,54827,Bupropion / naltrexone,Weight Loss,"""Well just have start by saying so far so good...",9.0,"February 16, 2017",30,"[{'label': 'LABEL_2', 'score': 0.8409104943275..."
497,230865,Milnacipran,ibromyalgia,"""Very effective for fibromyalgia pain. Does no...",9.0,"September 7, 2015",38,"[{'label': 'LABEL_1', 'score': 0.4641806185245..."
498,30138,Klonopin,Bipolar Disorde,"""Great.""",10.0,"September 27, 2013",23,"[{'label': 'LABEL_2', 'score': 0.7443180680274..."


In [None]:
medication_review_df['roberta_label'] = medication_review_df['roberta_sentiment'].apply(lambda x : x[0]['label'])

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  medication_review_df['roberta_label'] = medication_review_df['roberta_sentiment'].apply(lambda x : x[0]['label'])


In [None]:
medication_review_df

Unnamed: 0,uniqueId,drugName,condition,review,rating,date,usefulCount,roberta_sentiment,roberta_label
0,163740,Mirtazapine,Depression,"""I&#039;ve tried a few antidepressants over th...",10.0,"February 28, 2012",22,"[{'label': 'LABEL_2', 'score': 0.7081274390220...",LABEL_2
1,206473,Mesalamine,"Crohn's Disease, Maintenance","""My son has Crohn&#039;s disease and has done ...",8.0,"May 17, 2009",17,"[{'label': 'LABEL_2', 'score': 0.9489813446998...",LABEL_2
2,159672,Bactrim,Urinary Tract Infection,"""Quick reduction of symptoms""",9.0,"September 29, 2017",3,"[{'label': 'LABEL_1', 'score': 0.7108300924301...",LABEL_1
3,39293,Contrave,Weight Loss,"""Contrave combines drugs that were used for al...",9.0,"March 5, 2017",35,"[{'label': 'LABEL_2', 'score': 0.4569104015827...",LABEL_2
4,97768,Cyclafem 1 / 35,Birth Control,"""I have been on this birth control for one cyc...",9.0,"October 22, 2015",4,"[{'label': 'LABEL_2', 'score': 0.9641554951667...",LABEL_2
...,...,...,...,...,...,...,...,...,...
495,12695,Etanercept,Rheumatoid Arthritis,"""My husband has RA. His doctor sent him to ...",3.0,"May 31, 2015",35,"[{'label': 'LABEL_0', 'score': 0.6601070165634...",LABEL_0
496,54827,Bupropion / naltrexone,Weight Loss,"""Well just have start by saying so far so good...",9.0,"February 16, 2017",30,"[{'label': 'LABEL_2', 'score': 0.8409104943275...",LABEL_2
497,230865,Milnacipran,ibromyalgia,"""Very effective for fibromyalgia pain. Does no...",9.0,"September 7, 2015",38,"[{'label': 'LABEL_1', 'score': 0.4641806185245...",LABEL_1
498,30138,Klonopin,Bipolar Disorde,"""Great.""",10.0,"September 27, 2013",23,"[{'label': 'LABEL_2', 'score': 0.7443180680274...",LABEL_2


In [None]:
medication_review_df.replace({'LABEL_2':'Positive','LABEL_0':'Negative','LABEL_1':'Neutral'})

Unnamed: 0,uniqueId,drugName,condition,review,rating,date,usefulCount,roberta_sentiment,roberta_label
0,163740,Mirtazapine,Depression,"""I&#039;ve tried a few antidepressants over th...",10.0,"February 28, 2012",22,"[{'label': 'LABEL_2', 'score': 0.7081274390220...",Positive
1,206473,Mesalamine,"Crohn's Disease, Maintenance","""My son has Crohn&#039;s disease and has done ...",8.0,"May 17, 2009",17,"[{'label': 'LABEL_2', 'score': 0.9489813446998...",Positive
2,159672,Bactrim,Urinary Tract Infection,"""Quick reduction of symptoms""",9.0,"September 29, 2017",3,"[{'label': 'LABEL_1', 'score': 0.7108300924301...",Neutral
3,39293,Contrave,Weight Loss,"""Contrave combines drugs that were used for al...",9.0,"March 5, 2017",35,"[{'label': 'LABEL_2', 'score': 0.4569104015827...",Positive
4,97768,Cyclafem 1 / 35,Birth Control,"""I have been on this birth control for one cyc...",9.0,"October 22, 2015",4,"[{'label': 'LABEL_2', 'score': 0.9641554951667...",Positive
...,...,...,...,...,...,...,...,...,...
495,12695,Etanercept,Rheumatoid Arthritis,"""My husband has RA. His doctor sent him to ...",3.0,"May 31, 2015",35,"[{'label': 'LABEL_0', 'score': 0.6601070165634...",Negative
496,54827,Bupropion / naltrexone,Weight Loss,"""Well just have start by saying so far so good...",9.0,"February 16, 2017",30,"[{'label': 'LABEL_2', 'score': 0.8409104943275...",Positive
497,230865,Milnacipran,ibromyalgia,"""Very effective for fibromyalgia pain. Does no...",9.0,"September 7, 2015",38,"[{'label': 'LABEL_1', 'score': 0.4641806185245...",Neutral
498,30138,Klonopin,Bipolar Disorde,"""Great.""",10.0,"September 27, 2013",23,"[{'label': 'LABEL_2', 'score': 0.7443180680274...",Positive


In [None]:
medication_review_df['roberta_sentiment'][498]

[{'label': 'LABEL_2', 'score': 0.7443180680274963}]

In [None]:
print(medication_review_df['review'][497])
medication_review_df['roberta_sentiment'][497]

"Very effective for fibromyalgia pain. Does not help with fatigue."


[{'label': 'LABEL_1', 'score': 0.4641806185245514}]