In [1]:
# Dependencies
import pandas as pd
from splinter import Browser
from bs4 import BeautifulSoup as bs

In [2]:
# Path to chromedriver
executable_path = {"executable_path": "C:/bin/chromedriver"}
browser=Browser("chrome", **executable_path, headless=False)

### NASA Mars News

In [3]:
# Build query URL for NASA news (using Splinter to scrape)
url = "https://mars.nasa.gov/news/"
browser.visit(url)

# Scrape page into Soup
html = browser.html
soup = bs(html, 'html.parser')

# Collect the latest News Title and Paragraph Text
# Assign the text to variables that you can reference later
news_title = soup.find('div', class_='bottom_gradient').text
news_p = soup.find('div', class_='article_teaser_body').text

print(news_title)
print(news_p)

NASA Moves Forward With Campaign to Return Mars Samples to Earth
During this next phase, the program will mature critical technologies and make critical design decisions as well as assess industry partnerships.


### JPL Mars Space Images - Featured Image

In [4]:
# Build query URL for JPL featured [Mars] image - use Splinter to scrape
url = "https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars"
browser.visit(url)

# Scrape page into Soup
html = browser.html
soup = bs(html, 'html.parser')

# Find the image url for the current Featured Mars Image
#featured_image_url = soup.find('a', class_='carousel_item', style) ## produces error
#featured_image_url = soup.find('a', class_='carousel_item')['style'] ## produces error
#featured_image_url = soup.find('article', class_='carousel_item')['style'] ## includes unneeded control text

# Removed uneeded control text
#featured_image_url = soup.find('article', class_='carousel_item')['style'].replace('background-image: url(','')
#featured_image_url = soup.find('article', class_='carousel_item')['style'].replace('background-image: url(','').replace(');', '')  ## includes quotes

# Find the image url for the current Featured Mars Image and remove unneeded control text
featured_image_url = soup.find('article', class_='carousel_item')['style'].\
                        replace('background-image: url(','').\
                        replace(');', '')[1:-1]



# image url from html only makes sense when referenced in source site - add source site to front of url
featured_image_url = "https://www.jpl.nasa.gov" + featured_image_url

featured_image_url

'https://www.jpl.nasa.gov/spaceimages/images/wallpaper/PIA16227-1920x1200.jpg'

### Mars Facts

In [5]:
# Visit the Mars Facts webpage and use Pandas to scrape the table
results = pd.read_html("https://space-facts.com/mars/")
print(f'Total tables: {len(results)}')
print()
facts_df1 = results[0]
facts_df2 = results[1]
facts_df3 = results[2]

# Use Pandas to convert the data to a HTML table string
facts_html1 = facts_df1.to_html()
facts_html2 = facts_df2.to_html()
facts_html3 = facts_df3.to_html()

#print(facts_html1)
print(facts_html2)
#print(facts_html3)

#facts_df = pd.DataFrame(results)
#facts_html = facts_df.to_html()
#print(facts_html)

Total tables: 3

<table border="1" class="dataframe">
  <thead>
    <tr style="text-align: right;">
      <th></th>
      <th>Mars - Earth Comparison</th>
      <th>Mars</th>
      <th>Earth</th>
    </tr>
  </thead>
  <tbody>
    <tr>
      <th>0</th>
      <td>Diameter:</td>
      <td>6,779 km</td>
      <td>12,742 km</td>
    </tr>
    <tr>
      <th>1</th>
      <td>Mass:</td>
      <td>6.39 × 10^23 kg</td>
      <td>5.97 × 10^24 kg</td>
    </tr>
    <tr>
      <th>2</th>
      <td>Moons:</td>
      <td>2</td>
      <td>1</td>
    </tr>
    <tr>
      <th>3</th>
      <td>Distance from Sun:</td>
      <td>227,943,824 km</td>
      <td>149,598,262 km</td>
    </tr>
    <tr>
      <th>4</th>
      <td>Length of Year:</td>
      <td>687 Earth days</td>
      <td>365.24 days</td>
    </tr>
    <tr>
      <th>5</th>
      <td>Temperature:</td>
      <td>-87 to -5 °C</td>
      <td>-88 to 58°C</td>
    </tr>
  </tbody>
</table>


### Mars Hemispheres

In [6]:
# Visit the USGS Astrogeology site to obtain high resolution images for each of Mar's hemispheres
# Build query URL for page in USGS Astrogeology site that has pics of Mars' hemispheres
url = "https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars"
browser.visit(url)

# Scrape page into Soup
html = browser.html
soup = bs(html, 'html.parser')

images = soup.find_all('div', class_='item')

# Save both the image url string and the Hemisphere title containing the hemisphere name. 
# Use a Python dictionary to store the data using the keys img_url and title.

hemisphere_image_urls = []
for i in images:
    url = "https://astrogeology.usgs.gov" + i.find('a')['href']
#    print(url)
    title = i.find('h3').text.strip()
#    print(title)
    hemisphere_image_urls.append({'title':title,'img_url':url})

hemisphere_image_urls

[{'title': 'Cerberus Hemisphere Enhanced',
  'img_url': 'https://astrogeology.usgs.gov/search/map/Mars/Viking/cerberus_enhanced'},
 {'title': 'Schiaparelli Hemisphere Enhanced',
  'img_url': 'https://astrogeology.usgs.gov/search/map/Mars/Viking/schiaparelli_enhanced'},
 {'title': 'Syrtis Major Hemisphere Enhanced',
  'img_url': 'https://astrogeology.usgs.gov/search/map/Mars/Viking/syrtis_major_enhanced'},
 {'title': 'Valles Marineris Hemisphere Enhanced',
  'img_url': 'https://astrogeology.usgs.gov/search/map/Mars/Viking/valles_marineris_enhanced'}]