## Web-Scraping

In [1]:
# Dependencies
from bs4 import BeautifulSoup as bs
import requests
import pymongo
from datetime import datetime as dt
import pandas as pd
from splinter import Browser

In [2]:
!which chromedriver

/usr/local/bin/chromedriver


In [3]:
executable_path = {'executable_path': '/usr/local/bin/chromedriver'}
browser = Browser('chrome', **executable_path, headless=False)

### NASA Mars News

In [4]:
# URL of page to be scraped.  Pass URL into splinter browser
mars_url = 'https://mars.nasa.gov/news/?page=0&per_page=40&order=publish_date+desc%2Ccreated_at+desc&search=&category=19%2C165%2C184%2C204&blank_scope=Latest'
browser.visit(mars_url)

In [5]:
# Scrape URL and create BeautifulSoup object
html = browser.html
soup = bs(html, 'html.parser')

# Print formatted version of the soup
# print(soup.prettify())

# Find the current news titles 
titles = soup.find_all('div', class_="content_title")

# Find the news paragraphs
paras = soup.find_all('div', class_="article_teaser_body")

# Assign the text to variables with text only
news_title = titles[1].text
news_p = paras[1].text

print(news_title)
print(news_p)

MOXIE Could Help Future Rockets Launch Off Mars
The first to be rigged with microphones, the agency's latest Mars rover picked up the subtle sounds of its own inner workings during interplanetary flight.


### JPL Mars Space Images

In [6]:
# URL of page to be scraped.  Pass URL into splinter browser
jpl_url = 'https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars'
browser.visit(jpl_url)

In [10]:
# Create BeautifulSoup object
jpl_html = browser.html
soup = bs(jpl_html, 'html.parser')

# Find the featured image from home page and click Full Image button
browser.links.find_by_partial_text('Full Image')

# Find the more info button and click
browser.links.find_by_partial_text('more info')

# Use BeautifulSoup to find the full size .jpg image URL
# for link in soup.find_all('figure', class_='lede'):
#     jpl_img = link.find('a').get('href')

#     jpl_base = 'https://www.jpl.nasa.gov'

# feature_image_url = jpl_base + jpl_img
# print(feature_image_url)

for link in soup.find_all('div', class_='download_tiff'):
    jpl_img = link.find('a').get('href')

    jpl_base = 'https://www.jpl.nasa.gov'

feature_image_url = jpl_base + jpl_img
print(feature_image_url)

https://www.jpl.nasa.gov//photojournal.jpl.nasa.gov/jpeg/PIA16711.jpg


### Mars Facts

In [11]:
# Use Pandas to scrape the table of facts
facts_url = 'https://space-facts.com/mars/'
mars_table = pd.read_html(facts_url)
mars_table

[                      0                              1
 0  Equatorial Diameter:                       6,792 km
 1       Polar Diameter:                       6,752 km
 2                 Mass:  6.39 × 10^23 kg (0.11 Earths)
 3                Moons:            2 (Phobos & Deimos)
 4       Orbit Distance:       227,943,824 km (1.38 AU)
 5         Orbit Period:           687 days (1.9 years)
 6  Surface Temperature:                   -87 to -5 °C
 7         First Record:              2nd millennium BC
 8          Recorded By:           Egyptian astronomers,
   Mars - Earth Comparison             Mars            Earth
 0               Diameter:         6,779 km        12,742 km
 1                   Mass:  6.39 × 10^23 kg  5.97 × 10^24 kg
 2                  Moons:                2                1
 3      Distance from Sun:   227,943,824 km   149,598,262 km
 4         Length of Year:   687 Earth days      365.24 days
 5            Temperature:     -87 to -5 °C      -88 to 58°C,
           

In [20]:
df_mars = mars_table[0]
df_mars.columns = ['Description', 'Value']
df_mars.set_index('Description', inplace=True)
df_mars.head()

Unnamed: 0_level_0,Value
Description,Unnamed: 1_level_1
Equatorial Diameter:,"6,792 km"
Polar Diameter:,"6,752 km"
Mass:,6.39 × 10^23 kg (0.11 Earths)
Moons:,2 (Phobos & Deimos)
Orbit Distance:,"227,943,824 km (1.38 AU)"


In [21]:
# Use Pandas to convert the data to a HTML table string
df_mars.to_html('Mars_Facts_table.html')