# Importing Dependencies

In [1]:
import pandas as pd
import requests
from bs4 import BeautifulSoup
from splinter import Browser

Setting up a browser object with chrome webdriver and navigating with browser.visit()

In [2]:
def init_browser(url):
    executable_path = {'executable_path': 'chromedriver.exe'}
    browser = Browser('chrome', **executable_path, headless=False)
    browser.visit(url)
    return browser

# NASA Mars News

Scrape the NASA Mars News Site and collect the latest News Title and Paragraph Text. 

In [3]:
def get_nasa_mars_news(url):
    browser = init_browser(url)
    mars_news_dict = {}
    try:
        html = browser.html
        soup = BeautifulSoup(html, 'lxml')

        news_title = soup.find('div', class_='content_title').a.text
        news_paragraph = soup.find('div', class_='article_teaser_body').text
        
        mars_news_dict["news_title"] = news_title
        mars_news_dict["news_p"] = news_paragraph
        
#         print(news_title)
#         print(mars_news_dict)
    except AttributeError as e:
        print(e)
        
    browser.quit()
    return (mars_news_dict)

nasa_url = "https://mars.nasa.gov/news"
nasa_mars_news = get_nasa_mars_news(nasa_url)
nasa_mars_news

{'news_p': "A new paper suggests that liquid water may be sitting under a layer of ice at Mars' south pole.",
 'news_title': 'NASA Statement on Possible Subsurface Lake near Martian South Pole'}

# JPL Mars Space Images - Featured Image
1. Visit the url(https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars) for JPL Featured Space Image.

2. Use splinter to navigate the site and find the image url for the current Featured Mars Image and assign the url string to a variable called featured_image_url.

3. Make sure to find the image url to the full size .jpg image.

4. Make sure to save a complete url string for this image.

In [14]:
def get_featured_image_url(url):
    browser = init_browser(url)
    try:
        html = browser.html
        soup = BeautifulSoup(html, 'lxml')
        partial_link = soup.find("a" , {"id": "full_image"})["data-link"]
        mediumsize_image_url = "https://www.jpl.nasa.gov" + partial_link

        browser.visit(mediumsize_image_url)
        html = browser.html
        soup = BeautifulSoup(html, 'lxml')
        fullsize_image_partial_url = soup.find("figure", class_="lede").a["href"]
        full_image_url = "https://www.jpl.nasa.gov" + fullsize_image_partial_url
    except AttributeError as e:
        print(e)
    browser.quit() 
    return full_image_url

                                        

In [15]:
jpl_nasa_url = "https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars"
featured_image_url = get_featured_image_url(jpl_nasa_url)
print(featured_image_url)

https://www.jpl.nasa.gov/spaceimages/images/largesize/PIA19637_hires.jpg


# Mars Weather
- Visit the Mars Weather twitter account(https://twitter.com/marswxreport?lang=en) and scrape the latest Mars weather tweet from the page. Save the tweet text for the weather report as a variable called mars_weather.

In [6]:
def get_mars_weather(url):
    try:
        browser = init_browser(url)

        html = browser.html
        soup = BeautifulSoup(html, 'lxml')
        mars_weather = soup.find("p", class_="tweet-text").text
    except AttributeError as e:
        print(e)
    browser.quit()
    return mars_weather

mars_weather_twitter_url = "https://twitter.com/marswxreport?lang=en"
mars_weather = get_mars_weather(mars_weather_twitter_url)
print(mars_weather)

Radar analysis from the Mars Express orbiter indicates liquid water beneath the Planum Australe region.
https://www.esa.int/Our_Activities/Space_Science/Mars_Express/Mars_Express_detects_liquid_water_hidden_under_planet_s_south_pole …pic.twitter.com/30d37fSxQc


# Mars Facts
- Visit the Mars Facts webpage,https://space-facts.com/mars/ and use Pandas to scrape the table containing facts about the planet including Diameter, Mass, etc.

- Use Pandas to convert the data to a HTML table string.

In [7]:
def get_mars_facts_dataframe(url):
    # parse the html and get tables
    tables = pd.read_html(url)
    
    # Generated table is 'list' type, to get the table retreive for index=0
    mars_facts_df = tables[0]
    mars_facts_df.columns = ["Facts", "Value"]
    return mars_facts_df

mars_facts_url = "https://space-facts.com/mars"
mars_facts_df = get_mars_facts_dataframe(mars_facts_url)

In [8]:
mars_facts_df

Unnamed: 0,Facts,Value
0,Equatorial Diameter:,"6,792 km"
1,Polar Diameter:,"6,752 km"
2,Mass:,6.42 x 10^23 kg (10.7% Earth)
3,Moons:,2 (Phobos & Deimos)
4,Orbit Distance:,"227,943,824 km (1.52 AU)"
5,Orbit Period:,687 days (1.9 years)
6,Surface Temperature:,-153 to 20 °C
7,First Record:,2nd millennium BC
8,Recorded By:,Egyptian astronomers


Converting the dataframe to html table

In [9]:
html_table = mars_facts_df.to_html()
print(html_table)

<table border="1" class="dataframe">
  <thead>
    <tr style="text-align: right;">
      <th></th>
      <th>Facts</th>
      <th>Value</th>
    </tr>
  </thead>
  <tbody>
    <tr>
      <th>0</th>
      <td>Equatorial Diameter:</td>
      <td>6,792 km</td>
    </tr>
    <tr>
      <th>1</th>
      <td>Polar Diameter:</td>
      <td>6,752 km</td>
    </tr>
    <tr>
      <th>2</th>
      <td>Mass:</td>
      <td>6.42 x 10^23 kg (10.7% Earth)</td>
    </tr>
    <tr>
      <th>3</th>
      <td>Moons:</td>
      <td>2 (Phobos &amp; Deimos)</td>
    </tr>
    <tr>
      <th>4</th>
      <td>Orbit Distance:</td>
      <td>227,943,824 km (1.52 AU)</td>
    </tr>
    <tr>
      <th>5</th>
      <td>Orbit Period:</td>
      <td>687 days (1.9 years)</td>
    </tr>
    <tr>
      <th>6</th>
      <td>Surface Temperature:</td>
      <td>-153 to 20 °C</td>
    </tr>
    <tr>
      <th>7</th>
      <td>First Record:</td>
      <td>2nd millennium BC</td>
    </tr>
    <tr>
      <th>8</th>
      <td

In [10]:
type(html_table)

str

# Mars Hemispheres
- Visit the USGS Astrogeology site, https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars to obtain high resolution images for each of Mar's hemispheres.

- You will need to click each of the links to the hemispheres in order to find the image url to the full resolution image.

- Save both the image url string for the full resolution hemisphere image, and the Hemisphere title containing the hemisphere name. Use a Python dictionary to store the data using the keys img_url and title.

- Append the dictionary with the image url string and the hemisphere title to a list. This list will contain one dictionary for each hemisphere.

In [16]:
def find_mars_hemisphere_images(url):
    browser = init_browser(url)
    print(browser)
    html = browser.html
    soup = BeautifulSoup(html, 'lxml')
    try:
        mars_images = soup.find_all("div", class_="description")
        
        mars_images_list = []
        for image in mars_images:
            mars_images_dict = {}
            # get the image title
            image_title = image.h3.text
            
            # click the image link
            browser.click_link_by_partial_text(image_title)
            
            # Get the html for the current page and navigate to the full_image link
            html = browser.html
            soup = BeautifulSoup(html, 'lxml')
            full_image_partial_link = soup.find("img", {"class": "wide-image"})["src"]
            
            # Appending the root url to the partial link(for full image)
            full_image_link = "https://astrogeology.usgs.gov" + full_image_partial_link
            
            # Adding title and image url to a dictionary
            mars_images_dict["title"] = image_title
            mars_images_dict["image_url"] = full_image_link
            
            # Adding each dictionary to a list
            mars_images_list.append(mars_images_dict)
            
            # Go back to the previous page
            browser.find_link_by_text("Back").first.click()
            
        # Close the browser    
        browser.quit()
    except AttributeError as e:
        print(e)
    return mars_images_list

mars_hemispheres_url = "https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars"
hemisphere_image_urls = find_mars_hemisphere_images(mars_hemispheres_url)

<splinter.driver.webdriver.chrome.WebDriver object at 0x0000020EF71337F0>


In [17]:
hemisphere_image_urls

[{'image_url': 'https://astrogeology.usgs.gov/cache/images/cfa62af2557222a02478f1fcd781d445_cerberus_enhanced.tif_full.jpg',
  'title': 'Cerberus Hemisphere Enhanced'},
 {'image_url': 'https://astrogeology.usgs.gov/cache/images/3cdd1cbf5e0813bba925c9030d13b62e_schiaparelli_enhanced.tif_full.jpg',
  'title': 'Schiaparelli Hemisphere Enhanced'},
 {'image_url': 'https://astrogeology.usgs.gov/cache/images/ae209b4e408bb6c3e67b6af38168cf28_syrtis_major_enhanced.tif_full.jpg',
  'title': 'Syrtis Major Hemisphere Enhanced'},
 {'image_url': 'https://astrogeology.usgs.gov/cache/images/7cf2da4bf549ed01c17f206327be4db7_valles_marineris_enhanced.tif_full.jpg',
  'title': 'Valles Marineris Hemisphere Enhanced'}]

In [13]:
# help(browser)