## Part 1 - Scraping websites

In [1]:
# Dependencies
from bs4 import BeautifulSoup as bs
from splinter import Browser
from webdriver_manager.chrome import ChromeDriverManager
import pandas as pd

In [2]:
executable_path = {'executable_path': "/Users/Marina/Desktop/chromedriver_win32/chromedriver"}
browser = Browser('chrome', **executable_path, headless=False)

### Mars News

In [3]:
### URL of NASA Mars News to be scrape ###
url_1 = "https://mars.nasa.gov/news"

browser.visit(url_1)  # HTML object
html_1 = browser.html   # Assign a variable for the HTML object

# Parse HTML with Beautiful Soup
soup = bs(html_1, 'html.parser')  

In [4]:
# Retrieve all elements that contain news title
results = soup.find_all("div", class_="content_title")
results

# Get the lastest news title
news_title = results[1].get_text()
print(news_title)

# Get the latest news paragrapgh
news_p = soup.find("div", class_="article_teaser_body").text
print(news_p)

A Martian Roundtrip: NASA's Perseverance Rover Sample Tubes
Marvels of engineering, the rover's sample tubes must be tough enough to safely bring Red Planet samples on the long journey back to Earth in immaculate condition. 


### Mars Featured Image

In [5]:
# URL of JPL Mars Space Images to be scrape
url_2 = "https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars"

# HTML object
browser.visit(url_2) 

In [6]:
# Search for "Full image" and "click"
browser.find_by_id('full_image').click()

In [7]:
# Search for "More info" and "click"
browser.links.find_by_partial_text('more info').click()

In [8]:
html_2 = browser.html
soup = bs(html_2, 'html.parser')

# Search for image source
results = soup.find_all('figure', class_='lede')
relative_img_path = results[0].a['href']
featured_img = 'https://www.jpl.nasa.gov' + relative_img_path

print(featured_img)

https://www.jpl.nasa.gov/spaceimages/images/largesize/PIA19323_hires.jpg


### Mars Facts

In [10]:
# URL of Mars Facts to be scrape
url_3 = "https://space-facts.com/mars/"
 
# Use Pandas to scrape data
tables = pd.read_html(url_3, match="Equatorial Diameter")[0]

# Rename table colunms
df = tables.rename(columns={0: " ", 1: "  "})
df

Unnamed: 0,Unnamed: 1,Unnamed: 2
0,Equatorial Diameter:,"6,792 km"
1,Polar Diameter:,"6,752 km"
2,Mass:,6.39 × 10^23 kg (0.11 Earths)
3,Moons:,2 (Phobos & Deimos)
4,Orbit Distance:,"227,943,824 km (1.38 AU)"
5,Orbit Period:,687 days (1.9 years)
6,Surface Temperature:,-87 to -5 °C
7,First Record:,2nd millennium BC
8,Recorded By:,Egyptian astronomers


In [12]:
# Convert the dataframe to html string
mars_facts_table = df.to_html(class="table table-sm", index=False, header=True, border=0)
mars_facts_table

SyntaxError: invalid syntax (<ipython-input-12-705faf388433>, line 2)

### Mars Hemispheres

In [11]:
# URL of JPL Mars Space Images to be scrape
url_4 = "https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars"

# HTML object
browser.visit(url_4) 

In [12]:
# Store data in a list
hemisphere_image_urls = []

# Get a List of All the Hemispheres
links = browser.find_by_css("a.product-item h3")
for item in range(len(links)):
    hemisphere = {}
    
    # Find and click on each element on the list
    browser.find_by_css("a.product-item h3")[item].click()
    
    # Get Hemisphere Title
    hemisphere["title"] = browser.find_by_css("h2.title").text

    # Find Sample Image Anchor Tag, Extract <href> and store full img_url
    sample_element = browser.links.find_by_text("Sample").first
    hemisphere["img_url"] = sample_element["href"]   
    
    # Append Hemisphere Object to List
    hemisphere_image_urls.append(hemisphere)
    
    # Navigate Backwards
    browser.back()

In [13]:
# Call list of Hemisphere and Image dictionaries
hemisphere_image_urls

[{'title': 'Cerberus Hemisphere Enhanced',
  'img_url': 'https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/cerberus_enhanced.tif/full.jpg'},
 {'title': 'Schiaparelli Hemisphere Enhanced',
  'img_url': 'https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/schiaparelli_enhanced.tif/full.jpg'},
 {'title': 'Syrtis Major Hemisphere Enhanced',
  'img_url': 'https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/syrtis_major_enhanced.tif/full.jpg'},
 {'title': 'Valles Marineris Hemisphere Enhanced',
  'img_url': 'https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/valles_marineris_enhanced.tif/full.jpg'}]