In [None]:
import pandas as pd
from bs4 import BeautifulSoup
import os
import requests

from splinter import Browser
import time

In [None]:
#url of page to be scraped
url = 'https://mars.nasa.gov/news/'

In [None]:
#Retrieve page with the requests module
response = requests.get(url)

In [None]:
#Create BeautifulSoup object; parse with 'html.parser'
soup = BeautifulSoup(response.text, 'html.parser')

In [None]:
 #Examine the results, then determine element that contains sought info
print(soup.prettify())

In [None]:
 #results are returned as an iterable list
title_result = soup.find('div', class_="content_title")
news_para = soup.find('div', class_="rollover_description_inner").text

In [None]:
title_result

In [None]:
news_para

In [None]:

try:
    # Identify and return title of article
    news_title = title_result.find('a').text


    # Print results only if title is available
    if (news_title):
        print('-------------')
        print(news_title)
except AttributeError as e:
    print('-------------')
    print(e)

In [None]:
#save the text to variables
print(news_title)
print(news_para) 

In [None]:
#Image Scraping
#splinter setup
executable_path = {'executable_path': '../chromedriver_win32/chromedriver.exe'}
browser = Browser('chrome', **executable_path, headless=False)

In [None]:
#Use splinter to navigate the site and find the image url for the current Featured Mars Image and assign the url string to a variable called featured_image_url
nasa_url = 'https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars'
browser.visit(nasa_url)

In [None]:
#Make sure to find the image url to the full size .jpg image.
browser.links.find_by_partial_text('FULL').click()
time.sleep(3)
rendered_html= browser.html

In [None]:
#Create a Beautiful Soup object
soup = BeautifulSoup(rendered_html, 'html.parser')

In [None]:
#Retrieve the URL of the full image
full_image_url = soup.find_all('img', class_='fancybox-image')[0]['src']

#Create the full URL
featured_image_url = 'https://www.jpl.nasa.gov' + full_image_url
featured_image_url

In [None]:
#Mars Facts Scraping

#Visit the Mars Facts webpage and use Pandas to scrape the table containing facts about the planet
#URL of page to be scraped
facts_url = "https://space-facts.com/mars/"

#Retrieve page with the requests module
response = requests.get(facts_url)

#Create a Beautiful Soup object
soup = BeautifulSoup(response.text, 'html.parser')


facts_table = soup.find("table", attrs={"class": "tablepress"})
facts_table_data = facts_table.tbody.find_all("tr")

facts_data = []

#Put the all the fact data in a list
for item in range(9): #tabel row range is easily made static based on the small size
    for td in facts_table_data[item].find_all("td"):
        #Remove all the markup from the text
        facts_data.append(td.text.strip())

In [None]:
facts_data

In [None]:
#Make a list of the fact items in the even indicies
fact_item = facts_data[::2] 

#Make a list of the fact data in the odd indicies
fact_data = facts_data[1::2]

In [None]:
facts_table = pd.DataFrame({'Description':fact_item,'Data':fact_data}).set_index('Description', drop=True)
facts_table

In [None]:
#Use Pandas to convert the data to a HTML table string.
pd.DataFrame.to_html(facts_table)

In [None]:
#Mars Hemispheres

#Visit the USGS Astrogeology site to obtain high resolution images for each of Mar's hemispheres.
#splinter setup
executable_path = {'executable_path': '../chromedriver_win32/chromedriver.exe'}
browser = Browser('chrome', **executable_path, headless=False)

In [None]:
#URL of the page being scraped
image_url = "https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars"

names = []
urls = []

#Cycle through hemisphere image list and collect the names and image links
for item in range(4):
    browser.visit(image_url)

    #Wait for the page to load
    time.sleep(6)
    browser.links.find_by_partial_text('Hemisphere')[item].click()

    # Create a Beautiful Soup object
    soup = BeautifulSoup(browser.html, 'html.parser')
    
    #Get the name of the hemisphere
    title = soup.find('h2', class_='title')
    name = title.text.strip()
    names.append(name)
    
    #Get the URL of the full size hemisphere image
    url = 'https://astrogeology.usgs.gov'+ soup.find('img', class_='wide-image')['src']
    urls.append(url)

In [None]:
#Make the name and URL dictionary using list comprehension
hemisphere_image_urls = [ {'title': names[item], 'image_url': urls[item] } for item in range(len(urls)) ]
hemisphere_image_urls