In [1]:
import pandas as pd
from splinter import Browser
from bs4 import BeautifulSoup

In [2]:
def init_browser():
    executable_path = {"executable_path": "chromedriver"}
    return Browser("chrome", **executable_path, headless=False)


In [3]:

    browser = init_browser()

    # Visit Nasa news url.
    news_url = "https://mars.nasa.gov/news/"
    browser.visit(news_url)

    # HTML Object.
    html = browser.html

    # Parse HTML with Beautiful Soup
    news_soup = BeautifulSoup(html, "html.parser")

    # Retrieve the most recent article's title and paragraph.
    # Store in news variables.
    news_title = news_soup.find("div", class_="content_title").get_text()
    news_paragraph = news_soup.find("div", class_="article_teaser_body")


    # Exit Browser.
    browser.quit()

In [4]:
print(f'Title: {news_title}\nText: {news_paragraph}')

Title: Mars Now
Text: None


In [5]:
# Run init_browser/driver.
browser = init_browser()

# Visit the url for JPL Featured Space Image.
jpl_url = "https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars"
browser.visit(jpl_url)

# Select "FULL IMAGE".
browser.click_link_by_partial_text("FULL IMAGE")

# Find "more info" for first image, set to variable, and command click.
browser.is_element_present_by_text("more info", wait_time=1)
more_info_element = browser.links.find_by_partial_text("more info")
more_info_element.click()

# HTML Object.
html = browser.html

# Parse HTML with Beautiful Soup
image_soup = BeautifulSoup(html, "html.parser")

# Scrape image URL.
image_url = image_soup.find("figure", class_="lede").a["href"]

# Concatentate https://www.jpl.nasa.gov with image_url.
featured_image_url = f'https://www.jpl.nasa.gov{image_url}'

# Exit Browser.
browser.quit()



In [6]:
print(featured_image_url)

https://www.jpl.nasa.gov/spaceimages/images/largesize/PIA17302_hires.jpg


In [7]:
browser = init_browser()

# Visit Nasa news url.
tweet_url = "https://twitter.com/MarsWxReport"
browser.visit(tweet_url)

# HTML Object.
html = browser.html

# Parse HTML with Beautiful Soup
tweet_soup = BeautifulSoup(html, "html.parser")

# Retrieve the most recent article's title and paragraph.
# Store in news variables.
mars_weather = tweet_soup.find("div", class_="css-901oao r-hkyrab r-1qd0xha r-a023e6 r-16dba41 r-ad9z0x r-bcqeeo r-bnwqim r-qvutc0").get_text()


# Exit Browser.
browser.quit()

In [8]:
mars_weather

'InSight sol 608 (2020-08-11) low -93.0ºC (-135.5ºF) high -19.5ºC (-3.1ºF)\nwinds from the WNW at 8.5 m/s (19.0 mph) gusting to 23.8 m/s (53.3 mph)\npressure at 7.90 hPa'

In [9]:
browser = init_browser()
facts_url = "https://space-facts.com/mars/"
browser.visit(facts_url)
mars_data = pd.read_html(facts_url)
mars_data = pd.DataFrame(mars_data[0])
mars_facts = mars_data.to_html(header = False, index = False)
browser.quit()

In [10]:
print(mars_facts)

<table border="1" class="dataframe">
  <tbody>
    <tr>
      <td>Equatorial Diameter:</td>
      <td>6,792 km</td>
    </tr>
    <tr>
      <td>Polar Diameter:</td>
      <td>6,752 km</td>
    </tr>
    <tr>
      <td>Mass:</td>
      <td>6.39 × 10^23 kg (0.11 Earths)</td>
    </tr>
    <tr>
      <td>Moons:</td>
      <td>2 (Phobos &amp; Deimos)</td>
    </tr>
    <tr>
      <td>Orbit Distance:</td>
      <td>227,943,824 km (1.38 AU)</td>
    </tr>
    <tr>
      <td>Orbit Period:</td>
      <td>687 days (1.9 years)</td>
    </tr>
    <tr>
      <td>Surface Temperature:</td>
      <td>-87 to -5 °C</td>
    </tr>
    <tr>
      <td>First Record:</td>
      <td>2nd millennium BC</td>
    </tr>
    <tr>
      <td>Recorded By:</td>
      <td>Egyptian astronomers</td>
    </tr>
  </tbody>
</table>


In [None]:
browser = init_browser()
hemispheres_url = "https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars"
browser.visit(hemispheres_url)
html = browser.html
soup = BeautifulSoup(html, "html.parser")
hemisphere_image_urls = []

products = soup.find("div", class_ = "result-list" )
hemispheres = products.find_all("div", class_="item")

for hemisphere in hemispheres:
    title = hemisphere.find("h3").text
    title = title.replace("Enhanced", "")
    end_link = hemisphere.find("a")["href"]
    image_link = "https://astrogeology.usgs.gov/" + end_link    
    browser.visit(image_link)
    html = browser.html
    soup=BeautifulSoup(html, "html.parser")
    downloads = soup.find("div", class_="downloads")
    image_url = downloads.find("a")["href"]
    hemisphere_image_urls.append({"title": title, "img_url": image_url})
    print(title)
    print(image_url)
    
browser.quit()


Cerberus Hemisphere 
https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/cerberus_enhanced.tif/full.jpg
Schiaparelli Hemisphere 
https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/schiaparelli_enhanced.tif/full.jpg
Syrtis Major Hemisphere 
https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/syrtis_major_enhanced.tif/full.jpg
Valles Marineris Hemisphere 
https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/valles_marineris_enhanced.tif/full.jpg


In [None]:
# Create empty dictionary for all Mars Data.
mars_data = {}

# Append news_title and news_paragraph to mars_data.
mars_data['news_title'] = news_title
mars_data['news_paragraph'] = news_paragraph
mars_data['featured_image_url'] = featured_image_url
mars_data['mars_weather'] = mars_weather
mars_data['mars_facts'] = mars_facts
mars_data['hemisphere_image_urls'] = hemisphere_image_urls
mars_data