## Scraping [drugs.com](https://www.drugs.com/condition/contraception.html) for the drug names, rating, and number of reviews/link to reviews

In [53]:
from bs4 import BeautifulSoup
import requests
import pandas as pd
from collections import defaultdict
import time
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
import random

In [2]:
#the url and the number of pages I'm going to scrape
url = 'https://www.drugs.com/condition/contraception.html?page_number='
pages = [1,2,3,4,5,6]

html = []
for i in pages:
    response = requests.get(url + str(i))
    html.append(response)
    #wait a second after each page since this website doesn't like scrapers
    time.sleep(1.5)
    

In [3]:
len(html)

6

In [4]:
html[0].text[:1000]

'<!DOCTYPE html>\n<html lang=\'en\'>\n\n<head>\n<meta charset="utf-8">\n<title>List of Birth Control (Contraception) Medications (251 Compared) - Drugs.com</title>\n<meta name="viewport" content="width=device-width, initial-scale=1">\n<meta name="description" content="Contraception is the deliberate use of a medicine, device, or a technique to prevent pregnancy that has the potential to happen during sexual intercourse. C">\n<meta name="referrer" content="origin-when-cross-origin">\n<link rel=\'canonical\' href=\'https://www.drugs.com/condition/contraception.html\'>\n<link rel=\'search\' type=\'application/opensearchdescription+xml\' href=\'/opensearch.xml\' title=\'Drugs.com\'>\n<!-- Facebook Open Graph Summary -->\n<meta property="fb:app_id" content="1470988726468809">\n<meta property="article:author" content="https://www.facebook.com/Drugscom">\n<meta property="article:publisher" content="https://www.facebook.com/Drugscom">\n<meta property="og:title" content="List of Birth Control (

### I grabbed the html for the first 6 pages

#### Next I'm going to parse it using beautiful soup

In [5]:
soup1 = BeautifulSoup(html[0].text, 'lxml')

In [6]:
print(soup1.prettify())

<!DOCTYPE html>
<html lang="en">
 <head>
  <meta charset="utf-8"/>
  <title>
   List of Birth Control (Contraception) Medications (251 Compared) - Drugs.com
  </title>
  <meta content="width=device-width, initial-scale=1" name="viewport"/>
  <meta content="Contraception is the deliberate use of a medicine, device, or a technique to prevent pregnancy that has the potential to happen during sexual intercourse. C" name="description"/>
  <meta content="origin-when-cross-origin" name="referrer"/>
  <link href="https://www.drugs.com/condition/contraception.html" rel="canonical"/>
  <link href="/opensearch.xml" rel="search" title="Drugs.com" type="application/opensearchdescription+xml"/>
  <!-- Facebook Open Graph Summary -->
  <meta content="1470988726468809" property="fb:app_id"/>
  <meta content="https://www.facebook.com/Drugscom" property="article:author"/>
  <meta content="https://www.facebook.com/Drugscom" property="article:publisher"/>
  <meta content="List of Birth Control (Contracept

In [7]:
for element in soup1.find_all(class_ = 'condition-table__drug-name__link ddc-text-wordbreak'):
    print(element, '\n')

<a class="condition-table__drug-name__link ddc-text-wordbreak" data-brand_name_id="865" data-condition_id="567" data-ddc_id="1461" data-send-data="/js/async/drug-condition-log.php" data-type="condition-drug" href="/mirena.html">Mirena</a> 

<a class="condition-table__drug-name__link ddc-text-wordbreak" data-brand_name_id="14850" data-condition_id="567" data-ddc_id="1044" data-send-data="/js/async/drug-condition-log.php" data-type="condition-drug" href="/lo-loestrin-fe.html">Lo Loestrin Fe</a> 

<a class="condition-table__drug-name__link ddc-text-wordbreak" data-brand_name_id="589" data-condition_id="567" data-ddc_id="1041" data-send-data="/js/async/drug-condition-log.php" data-type="condition-drug" href="/nuvaring.html">NuvaRing</a> 

<a class="condition-table__drug-name__link ddc-text-wordbreak" data-brand_name_id="9720" data-condition_id="567" data-ddc_id="1045" data-send-data="/js/async/drug-condition-log.php" data-type="condition-drug" href="/mtm/sprintec.html">Sprintec</a> 

<a cl

### Get the drug title

In [8]:
for element in soup1.find_all(class_ = 'condition-table__drug-name__link ddc-text-wordbreak'):
    print(element.text, '\n')

Mirena 

Lo Loestrin Fe 

NuvaRing 

Sprintec 

Depo-Provera 

Nexplanon 

Kyleena 

Xulane 

Yaz 

Liletta 

ParaGard 

Twirla 

Annovera 

Phexxi 

levonorgestrel 

norethindrone 

Tri-Sprintec 

Lutera 

Yasmin 

Aviane 

ethinyl estradiol / norgestimate 

Skyla 

depo-subQ provera 104 

Microgestin Fe 1 / 20 

Seasonique 



In [9]:
#try list comprehension
[element.text.strip() for element in soup1.find_all(class_ = 'condition-table__drug-name__link ddc-text-wordbreak')]

['Mirena',
 'Lo Loestrin Fe',
 'NuvaRing',
 'Sprintec',
 'Depo-Provera',
 'Nexplanon',
 'Kyleena',
 'Xulane',
 'Yaz',
 'Liletta',
 'ParaGard',
 'Twirla',
 'Annovera',
 'Phexxi',
 'levonorgestrel',
 'norethindrone',
 'Tri-Sprintec',
 'Lutera',
 'Yasmin',
 'Aviane',
 'ethinyl estradiol / norgestimate',
 'Skyla',
 'depo-subQ provera 104',
 'Microgestin Fe 1 / 20',
 'Seasonique']

## Get the drug link

In [10]:
for elt in soup1.find_all(class_ = 'condition-table__drug-name__link ddc-text-wordbreak'):
    print(elt['href'])

/mirena.html
/lo-loestrin-fe.html
/nuvaring.html
/mtm/sprintec.html
/depo-provera.html
/nexplanon.html
/mtm/kyleena.html
/mtm/xulane-transdermal.html
/yaz.html
/mtm/liletta.html
/paragard.html
/twirla.html
/mtm/annovera-vaginal-ring.html
/phexxi.html
/plan-b.html
/mtm/norethindrone.html
/cdi/tri-sprintec.html
/pro/lutera.html
/yasmin.html
/aviane.html
/mtm/ethinyl-estradiol-and-norgestimate.html
/skyla.html
/mtm/depo-provera-contraceptive-injection.html
/cdi/microgestin-fe-1-20.html
/seasonique.html


In [11]:
#try a list comprehension
['drugs.com' + elt['href'] for elt in soup1.find_all(class_ = 'condition-table__drug-name__link ddc-text-wordbreak')]


['drugs.com/mirena.html',
 'drugs.com/lo-loestrin-fe.html',
 'drugs.com/nuvaring.html',
 'drugs.com/mtm/sprintec.html',
 'drugs.com/depo-provera.html',
 'drugs.com/nexplanon.html',
 'drugs.com/mtm/kyleena.html',
 'drugs.com/mtm/xulane-transdermal.html',
 'drugs.com/yaz.html',
 'drugs.com/mtm/liletta.html',
 'drugs.com/paragard.html',
 'drugs.com/twirla.html',
 'drugs.com/mtm/annovera-vaginal-ring.html',
 'drugs.com/phexxi.html',
 'drugs.com/plan-b.html',
 'drugs.com/mtm/norethindrone.html',
 'drugs.com/cdi/tri-sprintec.html',
 'drugs.com/pro/lutera.html',
 'drugs.com/yasmin.html',
 'drugs.com/aviane.html',
 'drugs.com/mtm/ethinyl-estradiol-and-norgestimate.html',
 'drugs.com/skyla.html',
 'drugs.com/mtm/depo-provera-contraceptive-injection.html',
 'drugs.com/cdi/microgestin-fe-1-20.html',
 'drugs.com/seasonique.html']

## Get the drug rating

In [12]:
for rating in soup1.find_all(class_ = "ddc-text-nowrap"):
    print(rating.text.strip(), '\n')

6.6 

5.7 

6.1 

5.8 

5.3 

5.2 

6.8 

5.9 

5.2 

5.7 

5.8 

0.0 

5.3 

0.0 

6.7 

5.1 

5.5 

6.2 

4.7 

6.3 

5.3 

7.0 

4.8 

5.7 

5.6 

verify here 



In [13]:
#try list comprehension
[rating.text.strip() for rating in soup1.find_all(class_ = "ddc-text-nowrap")]


['6.6',
 '5.7',
 '6.1',
 '5.8',
 '5.3',
 '5.2',
 '6.8',
 '5.9',
 '5.2',
 '5.7',
 '5.8',
 '0.0',
 '5.3',
 '0.0',
 '6.7',
 '5.1',
 '5.5',
 '6.2',
 '4.7',
 '6.3',
 '5.3',
 '7.0',
 '4.8',
 '5.7',
 '5.6',
 'verify here']

In [14]:
#check if the 'verify here' shows up in other soups
soup2 = BeautifulSoup(html[1].text, 'lxml')

In [15]:
[rating.text.strip() for rating in soup2.find_all(class_ = "ddc-text-nowrap")]


['5.3',
 '5.4',
 '4.4',
 '6.3',
 '5.3',
 '5.2',
 '5.1',
 '5.9',
 '5.2',
 '6.1',
 '6.3',
 '4.5',
 '5.9',
 '5.4',
 '5.3',
 '5.1',
 '5.0',
 '7.3',
 '5.2',
 '5.5',
 '4.4',
 '5.8',
 '6.4',
 '6.3',
 '5.3',
 'verify here']

it does, so just return all the elements except the last one

## Get the number of reviews

In [16]:
for num_ratings in soup1.find_all(class_ = "condition-table__reviews ddc-valign-middle"):
    print(num_ratings.text.strip(), '\n')

1767 reviews 

1354 reviews 

1126 reviews 

983 reviews 

923 reviews 

4432 reviews 

1338 reviews 

416 reviews 

422 reviews 

540 reviews 

1129 reviews 

Add review 

17 reviews 

Add review 

5142 reviews 

870 reviews 

528 reviews 

344 reviews 

245 reviews 

442 reviews 

3638 reviews 

1475 reviews 

16 reviews 

533 reviews 

236 reviews 



In [17]:
#try list comprehension
[num_ratings.text.strip() for num_ratings in soup1.find_all(class_ = "condition-table__reviews ddc-valign-middle")]


['1767\xa0reviews',
 '1354\xa0reviews',
 '1126\xa0reviews',
 '983\xa0reviews',
 '923\xa0reviews',
 '4432\xa0reviews',
 '1338\xa0reviews',
 '416\xa0reviews',
 '422\xa0reviews',
 '540\xa0reviews',
 '1129\xa0reviews',
 'Add\xa0review',
 '17\xa0reviews',
 'Add\xa0review',
 '5142\xa0reviews',
 '870\xa0reviews',
 '528\xa0reviews',
 '344\xa0reviews',
 '245\xa0reviews',
 '442\xa0reviews',
 '3638\xa0reviews',
 '1475\xa0reviews',
 '16\xa0reviews',
 '533\xa0reviews',
 '236\xa0reviews']

In [18]:
#list comprehension
temp = []
for num_ratings in soup1.find_all(class_ = "condition-table__reviews ddc-valign-middle"):
    #some have no ratings, so replace those with 0
    try:
        temp.append(int(num_ratings.text.strip()[:-8]))
        
    except:
        temp.append(0)
    

In [19]:
temp

[1767,
 1354,
 1126,
 983,
 923,
 4432,
 1338,
 416,
 422,
 540,
 1129,
 0,
 17,
 0,
 5142,
 870,
 528,
 344,
 245,
 442,
 3638,
 1475,
 16,
 533,
 236]

## Get the links to the reviews

In [20]:
for num_ratings in soup1.find_all(class_ = "condition-table__reviews ddc-valign-middle"):
    for link in num_ratings.find_all('a'):
        print(link['href'], '\n')

/comments/levonorgestrel/mirena-for-contraception.html 

/comments/ethinyl-estradiol-norethindrone/lo-loestrin-fe-for-contraception.html 

/comments/ethinyl-estradiol-etonogestrel/nuvaring-for-contraception.html 

/comments/ethinyl-estradiol-norgestimate/sprintec-for-contraception.html 

/comments/medroxyprogesterone/depo-provera-for-contraception.html 

/comments/etonogestrel/nexplanon-for-contraception.html 

/comments/levonorgestrel/kyleena-for-contraception.html 

/comments/ethinyl-estradiol-norelgestromin/xulane-for-contraception.html 

/comments/drospirenone-ethinyl-estradiol/yaz-for-contraception.html 

/comments/levonorgestrel/liletta-for-contraception.html 

/comments/copper-topical/paragard-for-contraception.html 

/comments/ethinyl-estradiol-levonorgestrel/twirla-for-contraception.html 

/comments/ethinyl-estradiol-segesterone/annovera-for-contraception.html 

/comments/citric-acid-lactic-acid-potassium-bitartrate-topical/phexxi-for-contraception.html 

/comments/levonorgest

In [21]:
#try getting it into a list
temp2 = []
for num_ratings in soup1.find_all(class_ = "condition-table__reviews ddc-valign-middle"):
    for link in num_ratings.find_all('a'):
        temp2.append('drugs.com' + link['href'])
temp2


['drugs.com/comments/levonorgestrel/mirena-for-contraception.html',
 'drugs.com/comments/ethinyl-estradiol-norethindrone/lo-loestrin-fe-for-contraception.html',
 'drugs.com/comments/ethinyl-estradiol-etonogestrel/nuvaring-for-contraception.html',
 'drugs.com/comments/ethinyl-estradiol-norgestimate/sprintec-for-contraception.html',
 'drugs.com/comments/medroxyprogesterone/depo-provera-for-contraception.html',
 'drugs.com/comments/etonogestrel/nexplanon-for-contraception.html',
 'drugs.com/comments/levonorgestrel/kyleena-for-contraception.html',
 'drugs.com/comments/ethinyl-estradiol-norelgestromin/xulane-for-contraception.html',
 'drugs.com/comments/drospirenone-ethinyl-estradiol/yaz-for-contraception.html',
 'drugs.com/comments/levonorgestrel/liletta-for-contraception.html',
 'drugs.com/comments/copper-topical/paragard-for-contraception.html',
 'drugs.com/comments/ethinyl-estradiol-levonorgestrel/twirla-for-contraception.html',
 'drugs.com/comments/ethinyl-estradiol-segesterone/annover

## Next, get all this info and put it into a df. 

In [22]:
from soup_parser import get_titles, get_title_links, get_avg_ratings, get_num_reviews, get_review_links


In [28]:
soup0 = BeautifulSoup(html[0].text, 'lxml')
soup1 = BeautifulSoup(html[1].text, 'lxml')
soup2 = BeautifulSoup(html[2].text, 'lxml')
soup3 = BeautifulSoup(html[3].text, 'lxml')
soup4 = BeautifulSoup(html[4].text, 'lxml')
soup5 = BeautifulSoup(html[5].text, 'lxml')

In [29]:
df = pd.DataFrame()

In [37]:
df['Drug'] = get_titles(soup0) + get_titles(soup1) + \
             get_titles(soup2) + get_titles(soup3) + \
             get_titles(soup4) + get_titles(soup5)


df['Drug_links'] = get_title_links(soup0) + get_title_links(soup1) + \
                   get_title_links(soup2) + get_title_links(soup3) + \
                   get_title_links(soup4) + get_title_links(soup5)


df['Avg_Rating'] = get_avg_ratings(soup0) + get_avg_ratings(soup1) + \
                   get_avg_ratings(soup2) + get_avg_ratings(soup3) + \
                   get_avg_ratings(soup4) + get_avg_ratings(soup5)


df['Num_Reviews'] = get_num_reviews(soup0) + get_num_reviews(soup1) + \
                    get_num_reviews(soup2) + get_num_reviews(soup3) + \
                    get_num_reviews(soup4) + get_num_reviews(soup5)

df['Review_Links'] = get_review_links(soup0) + get_review_links(soup1) + \
                     get_review_links(soup2) + get_review_links(soup3) + \
                     get_review_links(soup4) + get_review_links(soup5)



In [39]:
df.head()

Unnamed: 0,Drug,Drug_links,Avg_Rating,Num_Reviews,Review_Links
0,Mirena,drugs.com/mirena.html,6.6,1767,drugs.com/comments/levonorgestrel/mirena-for-c...
1,Lo Loestrin Fe,drugs.com/lo-loestrin-fe.html,5.7,1354,drugs.com/comments/ethinyl-estradiol-norethind...
2,NuvaRing,drugs.com/nuvaring.html,6.1,1126,drugs.com/comments/ethinyl-estradiol-etonogest...
3,Sprintec,drugs.com/mtm/sprintec.html,5.8,983,drugs.com/comments/ethinyl-estradiol-norgestim...
4,Depo-Provera,drugs.com/depo-provera.html,5.3,923,drugs.com/comments/medroxyprogesterone/depo-pr...


In [45]:
df.shape

(150, 5)

In [46]:
#drop the rows where there are fewer than 15 reviews

df2 = df[df['Num_Reviews'] > 15]
df2.shape

(113, 5)

In [47]:
df2.to_pickle('Drugs_no_revs.pkl')

In [54]:
df2['Review_Links'][0]

'drugs.com/comments/levonorgestrel/mirena-for-contraception.html'

## Next go into each review link 

#### Want to scrape reviews, review date, number of each review rating (i.e. 488 people rated it a 10, 282 rated it a 9, etc.), number of "helpful hearts", length of time the bc was taken for (if available) 

I think I'm actually gonna try using Selenium instead of beautiful soup for this part

In [65]:
# open up a window and go to the page I want to scrape

chromedriver = "/Applications/chromedriver" # path to the chromedriver executable

driver = webdriver.Chrome(chromedriver)


In [68]:
driver.get('https://www.drugs.com/comments/levonorgestrel/mirena-for-contraception.html')
time.sleep(1)#make sure page has loaded


### scroll to the bottom and click next

In [77]:
find_next = driver.find_elements_by_link_text("Next")
driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
find_next[0].click()

### grab the amount of each rating from the table

xpaths:  
//\*[@id="content"]/div[2]/div[1]/div/div[1]/table/tbody/tr[1]/th  
//\*[@id="content"]/div[2]/div[1]/div/div[1]/table/tbody/tr[2]  
//\*[@id="content"]/div[2]/div[1]/div/div[1]/table/tbody/tr[10]  

In [90]:
table1 = driver.find_elements_by_xpath("//*[@id='content']/div[2]/div[1]/div/div[1]/table/tbody/tr[1]")


In [91]:
table1[0].text

'10\n27% (448)'

In [92]:
table2 = driver.find_elements_by_xpath("//*[@id='content']/div[2]/div[1]/div/div[1]/table/tbody/tr[2]")


In [93]:
table2[0].text

'9\n17% (282)'

In [94]:
table3 = driver.find_elements_by_xpath("//*[@id='content']/div[2]/div[1]/div/div[1]/table/tbody/tr[3]")


In [95]:
table3[0].text

'8\n10% (160)'

In [117]:
import re

text = table3[0].text
print(re.split('\n|% \(', text))

['8', '10', '160)']


In [125]:
table = []
for i in range(1,11):
    scores = driver.find_elements_by_xpath("//*[@id='content']/div[2]/div[1]/div/div[1]/table/tbody/tr[" + str(i) + "]")
    text = scores[0].text
    text = text.replace(')', '')
    table.append(re.split('\n|% \(', text))


In [126]:
table

[['10', '27', '448'],
 ['9', '17', '282'],
 ['8', '10', '160'],
 ['7', '6', '96'],
 ['6', '5', '85'],
 ['5', '6', '108'],
 ['4', '4', '64'],
 ['3', '6', '93'],
 ['2', '6', '100'],
 ['1', '14', '234']]

In [127]:
def table_scores():
    table = []
    for i in range(1,11):
        scores = driver.find_elements_by_xpath("//*[@id='content']/div[2]/div[1]/div/div[1]/table/tbody/tr[" + str(i) + "]")
        text = scores[0].text
        text = text.replace(')', '')
        table.append(re.split('\n|% \(', text))
        
    rating = [i[0] for i in table]
    percent = [i[1] for i in table]
    raw_amount = [i[2] for i in table]
    return rating, percent, raw_amount

In [128]:
p_rating, p_percent, p_raw_amount = table_scores()

In [131]:
p_rating
p_percent
p_raw_amount

['448', '282', '160', '96', '85', '108', '64', '93', '100', '234']

### Try to grab the posts now

xpath:  
//\*[@id="content"]/div[2]/div[2]/p

//\*[@id="content"]/div[2]/div[3]

In [171]:
post = driver.find_elements_by_xpath("//*[@class='ddc-comment-content']")


In [174]:
len(post)

25

In [176]:
for i in range(len(post)):
    print(post[i].text)
    print()

“Decided to try Mirena after baby #2. Inserted February 2019. Insertion was painful but expected. Period bleeding reduced to nothing after 3 months. Give it a 4 for doing that but the rest of the side affects are overwhelming. The first 3 months were awful with mood swings. Extreme sensitivity, crying spells, extreme anger, couldn’t handle the children or spouse anymore, patience disappeared. Extreme fatigue to this day. At 3 month check up my OB convinced me in keeping it longer saying symptoms should improve with time. Now 8 months later still very emotional, and it increases with intensity a week before my period. Also have horrible bloating and developed hormonal and cystic acne. Just now having weight gain. Gained 11lbs these past 2 months, as much as 5 lbs in a week because of bloating. I was 119lbs before the IUD and now 130lbs. I have not changed diet or activity. Clothes I just bought 2 months ago no longer fit me. I’m considering having Mirena removed.”

“I LOVE my Mirena. I 

“Mirena has worked for what is has promised in regards to ceasing the ability to become pregnant, but it's just not worth it for me. In the three years I've had it placed, just after my daughter was born, I've had random and severe cramping shooting from my abdomen to my thighs and knees. I've had really bad acne, which has left some severe scars, and hair growing where it absolutely SHOULDN'T for a young woman! Unpredictable periods, lasting mood swings, debilitating anxiety, and for a few months, an increasingly crippling depression so dark and hopeless that life was just a chore. Screwing with your hormones is a very bad idea ladies, if it involves these risks. Every birth control I've ever been on has caused problems similar to this. I think maybe it's time we take back our bodies and stop using these types of bc (pills, injection, IUD, implants)”

“It’s been 5 months since I had Mirena inserted. I’m 40, just had my 2nd and last child a year ago, the procedure is nowhere near givin

In [178]:
with_metadata = driver.find_elements_by_xpath("//*[@class='ddc-comment']")


In [179]:
len(with_metadata)

25

In [183]:
for i in range(len(with_metadata)):
    print(i, '\n')
    print(with_metadata[i].text)
    print()

0 

Gio
October 14, 2019
“Decided to try Mirena after baby #2. Inserted February 2019. Insertion was painful but expected. Period bleeding reduced to nothing after 3 months. Give it a 4 for doing that but the rest of the side affects are overwhelming. The first 3 months were awful with mood swings. Extreme sensitivity, crying spells, extreme anger, couldn’t handle the children or spouse anymore, patience disappeared. Extreme fatigue to this day. At 3 month check up my OB convinced me in keeping it longer saying symptoms should improve with time. Now 8 months later still very emotional, and it increases with intensity a week before my period. Also have horrible bloating and developed hormonal and cystic acne. Just now having weight gain. Gained 11lbs these past 2 months, as much as 5 lbs in a week because of bloating. I was 119lbs before the IUD and now 130lbs. I have not changed diet or activity. Clothes I just bought 2 months ago no longer fit me. I’m considering having Mirena removed

Mel
·
Taken for 6 months to 1 year
September 29, 2019
“I had the Mirena placed in March. It was a little uncomfortable but only during the procedure. My period was light almost immediately. Fast forward until today and I’ve had increased heart rate, difficulty breathing, went to the ER and told it was anxiety and dehydration. Nothing in my life has changed since March! Everything is dry. My eyes, my skin, my hair is brittle and I feel like I shed like a dog, my hands and toes always feel like they are falling asleep. I bruise like crazy. The mood swings and dark depression suck. It may be that I cannot tolerate this amount of hormones but I can’t feel the strings anymore and will see the doctor on Monday.”
2 / 10
Was this helpful?   Yes   No
7 · Report

11 

Loz
·
Taken for 1 to 6 months
September 28, 2019
“The worst 6 months of my life was on Mirena. Insertion was horrific but no where near as bad as the next 6 months after insertion. 3 days after insertion I began bleeding and it nev

The Day of Insertion
·
Taken for less than 1 month
September 17, 2019
“I have lived on this website the past few days reading reviews about all the IUDs, freaking out about the insertion. But I had to keep in mind each experience is different. I have had Severe PMS in the past and have been on birth control pills to help with the serious cramps and heavy flow. I also have IBS which gives severe cramps. So seriously how bad could this be? The first few things they did didn't hurt, but once they started measuring my uterus, I was in excruciating pain. A procedure that should've taken 2-3 minutes, took about 10+ agonizing minutes. After almost passing out and hyperventilating, it was finally over. I have been home for the past few hours switching between walking, laying with a heating pad, and taking baths. The cramps are awful at times and then gone at other times. I haven't experienced bleeding yet. I'll update here in a few months”
5 / 10
Was this helpful?   Yes   No
10 · Report

23 



In [185]:
example = with_metadata[0].text
example

'Gio\nOctober 14, 2019\n“Decided to try Mirena after baby #2. Inserted February 2019. Insertion was painful but expected. Period bleeding reduced to nothing after 3 months. Give it a 4 for doing that but the rest of the side affects are overwhelming. The first 3 months were awful with mood swings. Extreme sensitivity, crying spells, extreme anger, couldn’t handle the children or spouse anymore, patience disappeared. Extreme fatigue to this day. At 3 month check up my OB convinced me in keeping it longer saying symptoms should improve with time. Now 8 months later still very emotional, and it increases with intensity a week before my period. Also have horrible bloating and developed hormonal and cystic acne. Just now having weight gain. Gained 11lbs these past 2 months, as much as 5 lbs in a week because of bloating. I was 119lbs before the IUD and now 130lbs. I have not changed diet or activity. Clothes I just bought 2 months ago no longer fit me. I’m considering having Mirena removed.

In [188]:
example_list = example.split('\n')

In [190]:
example_list

['Gio',
 'October 14, 2019',
 '“Decided to try Mirena after baby #2. Inserted February 2019. Insertion was painful but expected. Period bleeding reduced to nothing after 3 months. Give it a 4 for doing that but the rest of the side affects are overwhelming. The first 3 months were awful with mood swings. Extreme sensitivity, crying spells, extreme anger, couldn’t handle the children or spouse anymore, patience disappeared. Extreme fatigue to this day. At 3 month check up my OB convinced me in keeping it longer saying symptoms should improve with time. Now 8 months later still very emotional, and it increases with intensity a week before my period. Also have horrible bloating and developed hormonal and cystic acne. Just now having weight gain. Gained 11lbs these past 2 months, as much as 5 lbs in a week because of bloating. I was 119lbs before the IUD and now 130lbs. I have not changed diet or activity. Clothes I just bought 2 months ago no longer fit me. I’m considering having Mirena r

In [189]:
len(example_list)

6

In [193]:
example2 = with_metadata[-3].text
example2

"The Day of Insertion\n·\nTaken for less than 1 month\nSeptember 17, 2019\n“I have lived on this website the past few days reading reviews about all the IUDs, freaking out about the insertion. But I had to keep in mind each experience is different. I have had Severe PMS in the past and have been on birth control pills to help with the serious cramps and heavy flow. I also have IBS which gives severe cramps. So seriously how bad could this be? The first few things they did didn't hurt, but once they started measuring my uterus, I was in excruciating pain. A procedure that should've taken 2-3 minutes, took about 10+ agonizing minutes. After almost passing out and hyperventilating, it was finally over. I have been home for the past few hours switching between walking, laying with a heating pad, and taking baths. The cramps are awful at times and then gone at other times. I haven't experienced bleeding yet. I'll update here in a few months”\n5 / 10\nWas this helpful?   Yes   No\n10 · Repor

In [194]:
example2_list = example2.split('\n')

In [195]:
example2_list

['The Day of Insertion',
 '·',
 'Taken for less than 1 month',
 'September 17, 2019',
 "“I have lived on this website the past few days reading reviews about all the IUDs, freaking out about the insertion. But I had to keep in mind each experience is different. I have had Severe PMS in the past and have been on birth control pills to help with the serious cramps and heavy flow. I also have IBS which gives severe cramps. So seriously how bad could this be? The first few things they did didn't hurt, but once they started measuring my uterus, I was in excruciating pain. A procedure that should've taken 2-3 minutes, took about 10+ agonizing minutes. After almost passing out and hyperventilating, it was finally over. I have been home for the past few hours switching between walking, laying with a heating pad, and taking baths. The cramps are awful at times and then gone at other times. I haven't experienced bleeding yet. I'll update here in a few months”",
 '5 / 10',
 'Was this helpful?   Y

In [196]:
len(example2_list)

8

In [64]:
driver.quit()

In [50]:
test = defaultdict(list)

for link in df2['Review_Links'][:5]:
    test[link]
test

defaultdict(list,
            {'drugs.com/comments/levonorgestrel/mirena-for-contraception.html': [],
             'drugs.com/comments/ethinyl-estradiol-norethindrone/lo-loestrin-fe-for-contraception.html': [],
             'drugs.com/comments/ethinyl-estradiol-etonogestrel/nuvaring-for-contraception.html': [],
             'drugs.com/comments/ethinyl-estradiol-norgestimate/sprintec-for-contraception.html': [],
             'drugs.com/comments/medroxyprogesterone/depo-provera-for-contraception.html': []})

In [51]:
test[df2['Review_Links'][0]].append(1)

In [52]:
test

defaultdict(list,
            {'drugs.com/comments/levonorgestrel/mirena-for-contraception.html': [1],
             'drugs.com/comments/ethinyl-estradiol-norethindrone/lo-loestrin-fe-for-contraception.html': [],
             'drugs.com/comments/ethinyl-estradiol-etonogestrel/nuvaring-for-contraception.html': [],
             'drugs.com/comments/ethinyl-estradiol-norgestimate/sprintec-for-contraception.html': [],
             'drugs.com/comments/medroxyprogesterone/depo-provera-for-contraception.html': []})

get a default dict that has the every link as a key and every value is an empty list. then, populate the list with all the html on each page for each list 

In [None]:
#make a default dict that can hold the response from each page for 
links_and_review_response = defaultdict(list)

for link in df2['Review_Links']:
    links_and_review_response[link] 

for i in range(len()):
    response = requests.get(url + str(i))
    html.append(response)
    #wait a second after each page since this website doesn't like scrapers
    time.sleep(1.5)