In [None]:
# Dependencies
import json
import pandas as pd
import requests
import pymongo

from splinter import Browser
from bs4 import BeautifulSoup as bs
from webdriver_manager.chrome import ChromeDriverManager

In [None]:
# Setup splinter
executable_path = {'executable_path': ChromeDriverManager().install()}
executable_path = {"executable_path": "/usr/local/bin/chromedriver"}
browser = Browser('chrome', **executable_path, headless=False)

## Step 1 - Scraping

### NASA Mars News

In [None]:
# Have the browser navigate to the webpage and copy the content
# URL for NASA Mars News
url = 'https://redplanetscience.com/'
browser.visit(url)
html = browser.html
soup = bs(html, 'html.parser')

In [None]:
# Visualize the webpage
print(soup.prettify())

In [None]:
# Extract the title of the HTML document
soup.title
print(soup.title.text.strip())

In [None]:
# Find the title related to the latest News
news_title = soup.find('div', class_='content_title')
print(news_title.text.strip())

In [None]:
# Find the paragraph text related to the latest News
news_p = soup.find('div', class_='article_teaser_body')
print(news_p.text.strip())

### JPL Mars Space Images - Featured Image

In [None]:
# Have the browser navigate to the webpage and copy the content
# URL for JPL Mars Space Images - Featured Image
url1 = 'https://spaceimages-mars.com'
browser.visit(url1)
html1 = browser.html
soup1 = bs(html1, 'html.parser')

In [None]:
# Visualize the webpage in a better way
print(soup1.prettify())

In [None]:
# Find the featured image
img = soup1.find('img', class_='headerimage fade-in').get('src')
featured_image_url = ['https://spaceimages-mars.com/' + img]
print(featured_image_url[0])

In [None]:
# Display the image with IPython.display
from IPython.display import Image
Image(url=featured_image_url[0])

### Mars Facts

In [None]:
# URL for Mars Facts
url2 = 'https://galaxyfacts-mars.com'

In [None]:
# Use Pandas to automatically scrape the tabular data from the page.
table = pd.read_html(url2)
table

In [None]:
# Transform table in dataframe.
df = table[0]
df.rename(columns=df.iloc[0,:], inplace=True)
df.head()

In [None]:
# Convert data to html table string.
html_table = df.to_html()
html_table

In [None]:
# Strip unwanted newlines to clean up the table.
html_table.replace('\n', '')

### Mars Hemispheres

In [None]:
# URL for Mars Hemispheres
url3 = 'https://marshemispheres.com/'
browser.visit(url3)

In [None]:
# Design an XPATH selector to grab the images
xpath = '//*[@id="product-section"]/div[2]/div[1]/a/img'

In [None]:
# Use splinter to Click the Mars image 
# to bring up the full resolution image
results = browser.find_by_xpath(xpath)
img = results[0]
img.click()

In [None]:
# Scrape the browser into soup and use soup to find the full resolution image of mars
# Save the image url to a variable called `img_url`
html3 = browser.html
soup3 = bs(html3, 'html.parser')
img_url = soup3.find("img", class_="jpg")["src"]
img_url

In [None]:
# Use the requests library to download and save the image from the `img_url` above
import requests
import shutil
response = requests.get(img_url, stream=True)
with open('img.png', 'wb') as out_file:
    shutil.copyfileobj(response.raw, out_file)

In [None]:
# Display the image with IPython.display
from IPython.display import Image
Image(url='img.png')

In [None]:
browser.quit()