# Scraping

In [1]:
# Dependencies
from bs4 import BeautifulSoup 
import requests
import pymongo
from splinter import Browser
import time
import pandas as pd
import re

## NASA Mars News

In [2]:
# Mars News URL
url = "https://mars.nasa.gov/news/"

#pointing to the directory where chromedriver exists
executable_path = {"executable_path":"/usr/local/bin/chromedriver"}
browser = Browser("chrome", **executable_path, headless = False)

# Retrieve page with the requests module
browser.visit(url)
html = browser.html

# Create BeautifulSoup object; parse with 'html.parser'
soup = BeautifulSoup(html, 'html.parser')

# Get title & description
news_title = soup.find('div', 'content_title', 'a').text
news_p = soup.find('div', class_='article_teaser_body').text

In [3]:
news_title

'Update on Opportunity Rover after Martian Dust Storm'

In [4]:
news_p

"One month since increasing their commanding frequency, engineers have yet to hear from NASA's Opportunity rover."

## JPL Mars Space Images

In [5]:
# JPL Mars URL
url = "https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars"

# Setting up splinter
executable_path = {'executable_path': "/usr/local/bin/chromedriver"}
browser = Browser('chrome', **executable_path)
browser.visit(url)

# Moving through the pages
time.sleep(5)
browser.click_link_by_partial_text('FULL IMAGE')
time.sleep(5)
browser.click_link_by_partial_text('more info')
time.sleep(5)

# Create BeautifulSoup object; parse with 'html.parser'
html = browser.html
soup = BeautifulSoup(html, 'html.parser')

# Get featured image
results = soup.find('article')
extension = results.find('figure', 'lede').a['href']
link = "https://www.jpl.nasa.gov"
featured_image_url = link + extension 
print(featured_image_url)

https://www.jpl.nasa.gov/spaceimages/images/largesize/PIA19180_hires.jpg


## Mars Weather

In [6]:
#get mars weather's latest tweet from the website
url_weather = "https://twitter.com/marswxreport?lang=en"
browser.visit(url_weather)

html_weather = browser.html
soup = BeautifulSoup(html_weather, "html.parser")

mars_weather=soup.find(string=re.compile("Sol"))
print(mars_weather) 

Sol 2171 (2018-09-14), high -12C/10F, low -65C/-84F, pressure at 8.79 hPa, daylight 05:43-17:59


## Mars Facts

In [7]:
# Mars Facts URL
url = "https://space-facts.com/mars/"

# Retrieve page with the requests module
html = requests.get(url)

# Create BeautifulSoup object; parse with 'html.parser'
soup = BeautifulSoup(html.text, 'html.parser')

# Empty dictionary for info
mars_profile = {}

# Get info
results = soup.find('tbody').find_all('tr')

# Storing profile information
for result in results:
    key = result.find('td', 'column-1').text.split(":")[0]
    value = result.find('td', 'column-2').text
    
    mars_profile[key] = value
    
# Creating a DataFrame
profile_df = pd.DataFrame([mars_profile]).T.rename(columns = {0: "Value"})
profile_df.index.rename("Description", inplace=True)

# Converting to html
profile_html = "".join(profile_df.to_html().split("\n"))

In [15]:
profile_html

'<table border="1" class="dataframe">  <thead>    <tr style="text-align: right;">      <th></th>      <th>Value</th>    </tr>    <tr>      <th>Description</th>      <th></th>    </tr>  </thead>  <tbody>    <tr>      <th>Equatorial Diameter</th>      <td>6,792 km\\n</td>    </tr>    <tr>      <th>First Record</th>      <td>2nd millennium BC</td>    </tr>    <tr>      <th>Mass</th>      <td>6.42 x 10^23 kg (10.7% Earth)</td>    </tr>    <tr>      <th>Moons</th>      <td>2 (Phobos &amp; Deimos)</td>    </tr>    <tr>      <th>Orbit Distance</th>      <td>227,943,824 km (1.52 AU)</td>    </tr>    <tr>      <th>Orbit Period</th>      <td>687 days (1.9 years)\\n</td>    </tr>    <tr>      <th>Polar Diameter</th>      <td>6,752 km\\n</td>    </tr>    <tr>      <th>Recorded By</th>      <td>Egyptian astronomers</td>    </tr>    <tr>      <th>Surface Temperature</th>      <td>-153 to 20 °C</td>    </tr>  </tbody></table>'

## Mars Hemispheres

In [8]:
# Mars Hemispheres URL
url = "https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars"

# Empty list of image urls
hemisphere_image_urls = []

### Valles Marineris

In [9]:
# Setting up splinter
executable_path = {'executable_path': "/usr/local/bin/chromedriver"}
browser = Browser('chrome', **executable_path, headless=True)
browser.visit(url)

# Moving through pages
time.sleep(5)
browser.click_link_by_partial_text('Valles Marineris Hemisphere Enhanced')
time.sleep(5)

# Create BeautifulSoup object; parse with 'html.parser'
html = browser.html
soup = BeautifulSoup(html, 'html.parser')

# Store link
valles_link = soup.find('div', 'downloads').a['href']

# Create dictionary
valles_marineris = {
    "title": "Valles Marineris Hemisphere",
    "img_url": valles_link
}

# Appending dictionary
hemisphere_image_urls.append(valles_marineris)

### Cerberus

In [10]:
# Setting up splinter
executable_path = {'executable_path': "/usr/local/bin/chromedriver"}
browser = Browser('chrome', **executable_path, headless=True)
browser.visit(url)

# Moving through pages
time.sleep(5)
browser.click_link_by_partial_text('Cerberus Hemisphere Enhanced')
time.sleep(5)

# Create BeautifulSoup object; parse with 'html.parser'
html = browser.html
soup = BeautifulSoup(html, 'html.parser')

# Store link
cerberus_link = soup.find('div', 'downloads').a['href']

# Create dictionary
cerberus = {
    "title": "Cerberus Hemisphere",
    "img_url": cerberus_link
}

# Appending dictionary
hemisphere_image_urls.append(cerberus)

### Schiaparelli

In [11]:
# Setting up splinter
executable_path = {'executable_path': "/usr/local/bin/chromedriver"}
browser = Browser('chrome', **executable_path, headless=True)
browser.visit(url)

# Moving through pages
time.sleep(5)
browser.click_link_by_partial_text('Schiaparelli Hemisphere Enhanced')
time.sleep(5)

# Create BeautifulSoup object; parse with 'html.parser'
html = browser.html
soup = BeautifulSoup(html, 'html.parser')

# Store link
schiaparelli_link = soup.find('div', 'downloads').a['href']

# Create dictionary
schiaparelli = {
    "title": "Schiaparelli Hemisphere",
    "img_url": schiaparelli_link
}

# Appending dictionary
hemisphere_image_urls.append(schiaparelli)

### Syrtis Major

In [12]:
# Setting up splinter
executable_path = {'executable_path': "/usr/local/bin/chromedriver"}
browser = Browser('chrome', **executable_path, headless=True)
browser.visit(url)

# Moving through pages
time.sleep(5)
browser.click_link_by_partial_text('Syrtis Major Hemisphere Enhanced')
time.sleep(5)

# Create BeautifulSoup object; parse with 'html.parser'
html = browser.html
soup = BeautifulSoup(html, 'html.parser')

# Store link
syrtis_link = soup.find('div', 'downloads').a['href']

# Create dictionary
syrtis_major = {
    "title": "Syrtis Major Hemisphere",
    "img_url": syrtis_link
}

# Appending dictionary
hemisphere_image_urls.append(syrtis_major)

In [13]:
hemisphere_image_urls

[{'title': 'Valles Marineris Hemisphere',
  'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/valles_marineris_enhanced.tif/full.jpg'},
 {'title': 'Cerberus Hemisphere',
  'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/cerberus_enhanced.tif/full.jpg'},
 {'title': 'Schiaparelli Hemisphere',
  'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/schiaparelli_enhanced.tif/full.jpg'},
 {'title': 'Syrtis Major Hemisphere',
  'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/syrtis_major_enhanced.tif/full.jpg'}]