In [1]:
# Import Dependencies
from splinter import Browser
from bs4 import BeautifulSoup
from webdriver_manager.chrome import ChromeDriverManager
import pandas as pd
from time import sleep 

In [2]:
# Setup splinter
executable_path = {"executable_path": ChromeDriverManager().install()}
browser = Browser("chrome", **executable_path, headless = False)



Current google-chrome version is 99.0.4844
Get LATEST chromedriver version for 99.0.4844 google-chrome
Trying to download new driver from https://chromedriver.storage.googleapis.com/99.0.4844.51/chromedriver_win32.zip
Driver has been saved in cache [C:\Users\tomsa\.wdm\drivers\chromedriver\win32\99.0.4844.51]


In [3]:
### NASA Mars News

In [4]:
## Scrape for the latest News Title and Paragraph Text

# Open the target url
url_1 = "https://redplanetscience.com/"
browser.visit(url_1)
sleep(3)
# HTML object
html = browser.html
# Parse HTML with Beautiful Soup
news_soup = BeautifulSoup(html, "html.parser")

In [5]:
# Create lists that will hold the scraped information
titles = []
paragraphs = []
# Retrieve all the elements that contain the News Title and Paragraph Text
articles = news_soup.find_all("div", class_ = "list_text")
# Iterate through each element and save the latest data to a list
for article in articles:
    try:
        titles.append(article.find("div", class_ = "content_title").text)
        paragraphs.append(article.find("div", class_ = "article_teaser_body").text)
    except:
        print("Ooops something happened!")
# Save and display the first entree in the News Title list
news_title = titles[0]
print(news_title)
# Save and display the first entree in the News Paragraph list
news_paragraph = paragraphs[0]
print(news_paragraph)

NASA Wins Two Emmy Awards for Interactive Mission Coverage
NASA-JPL's coverage of the Mars InSight landing earns one of the two wins, making this the NASA center's second Emmy.


In [6]:
### JPL Mars Space Images - Featured Image

In [7]:
## Find the image url for the current Featured Mars Image and assign the url string to a variable

# Open the target url
url_2 = "https://spaceimages-mars.com/"
browser.visit(url_2)
sleep(3)
# HTML object
html = browser.html
# Parse HTML with Beautiful Soup
image_soup = BeautifulSoup(html, "html.parser")

In [8]:
# Retrieve all the elements that contains the  featured picture
featured_picture = image_soup.find_all("a", class_ = "showimg fancybox-thumbs")
# Iterate through each element and save the url to a variable
for picture in featured_picture:
    try:
        href = picture["href"]
        featured_image_url = url_2 + href
    except:
        print("Ooops something happened!")
# Display the url for the current Featured Mars Image
print(f"featured image url: {featured_image_url}")  

featured image url: https://spaceimages-mars.com/image/featured/mars2.jpg


In [9]:
### Mars Facts

In [10]:
##  Use Pandas to scrape the table containing facts about the planet

# Open the target url
url_3 = "https://galaxyfacts-mars.com/"
browser.visit(url_3)
sleep(3)

In [11]:
# Scrape all the tabular data from the webpage and save to a list
facts_tables = pd.read_html(url_3)
# From the tables list, select and save to a dataframe the element that contains the comparison stats between Mars and Earth
facts_table_df = facts_tables[0]
# Create a dictionary that contains the column headers
facts_table_dict = {
    0 : "Description",
    1 : "Mars",
    2 : "Earth"
}
# Rename the column headers based on the dictionary
facts_table_df.rename(columns = facts_table_dict, inplace = True)
# Set the "Description" column as index
facts_table_df.set_index("Description", drop = True, inplace = True)
# Display the dataframe
print(facts_table_df)

                                    Mars            Earth
Description                                              
Mars - Earth Comparison             Mars            Earth
Diameter:                       6,779 km        12,742 km
Mass:                    6.39 × 10^23 kg  5.97 × 10^24 kg
Moons:                                 2                1
Distance from Sun:        227,943,824 km   149,598,262 km
Length of Year:           687 Earth days      365.24 days
Temperature:                -87 to -5 °C      -88 to 58°C


In [12]:
# Generate the HTML table from the dataframe
facts_table_html = facts_table_df.to_html()
# Display the HTML table
print(facts_table_html)

<table border="1" class="dataframe">
  <thead>
    <tr style="text-align: right;">
      <th></th>
      <th>Mars</th>
      <th>Earth</th>
    </tr>
    <tr>
      <th>Description</th>
      <th></th>
      <th></th>
    </tr>
  </thead>
  <tbody>
    <tr>
      <th>Mars - Earth Comparison</th>
      <td>Mars</td>
      <td>Earth</td>
    </tr>
    <tr>
      <th>Diameter:</th>
      <td>6,779 km</td>
      <td>12,742 km</td>
    </tr>
    <tr>
      <th>Mass:</th>
      <td>6.39 × 10^23 kg</td>
      <td>5.97 × 10^24 kg</td>
    </tr>
    <tr>
      <th>Moons:</th>
      <td>2</td>
      <td>1</td>
    </tr>
    <tr>
      <th>Distance from Sun:</th>
      <td>227,943,824 km</td>
      <td>149,598,262 km</td>
    </tr>
    <tr>
      <th>Length of Year:</th>
      <td>687 Earth days</td>
      <td>365.24 days</td>
    </tr>
    <tr>
      <th>Temperature:</th>
      <td>-87 to -5 °C</td>
      <td>-88 to 58°C</td>
    </tr>
  </tbody>
</table>


In [13]:
### Mars Hemispheres

In [14]:
## Scrape to obtain high resolution images for each of Mar's hemispheres

# Open the target url
url_4 = "https://marshemispheres.com/"
browser.visit(url_4)
sleep(3)
# HTML object
html = browser.html
# Parse HTML with Beautiful Soup
hs_soup = BeautifulSoup(html, "html.parser")

In [15]:
# Retrieve all the elements that contains the links to each of Mar's hemispheres and save to a list
hemispheres = hs_soup.body.find_all("div", class_ = "description")
# Create a list that will hold the titles and urls of the different Mar's hemispheres
hs_data = []
# Iterate through each element and save the picture title and urls to a dictionary
for hemisphere in hemispheres:
    try:
        # Create dictionary that will hold the image titles and urls
        hs_dict = {}
        # Save the image's title to a variable
        hs_title = hemisphere.find("h3").text
        sleep(2)
        # Click on the link to the image url
        browser.links.find_by_partial_text(hs_title).click()
        sleep(2)
        # HTML object
        html = browser.html
        # Parse HTML with Beautiful Soup
        link_soup = BeautifulSoup(html, "html.parser")
        # Save the image url to a variable
        hs_image = link_soup.find("img", class_ = "wide-image")["src"]
        img_url = url_4 + hs_image
        # Save the title and image url to the dictionary
        hs_dict = {
            "title" : hs_title, 
            "img_url" : img_url
        }
        # Append the dictionary information to the Hemisphere's Data list
        hs_data.append(hs_dict)
        # Direct the browser to go back to the previous page
        browser.back()
    except:
        print("Ooops something happened!")
# Display the list containing the Hemisphere data
print(hs_data)

[{'title': 'Cerberus Hemisphere Enhanced', 'img_url': 'https://marshemispheres.com/images/f5e372a36edfa389625da6d0cc25d905_cerberus_enhanced.tif_full.jpg'}, {'title': 'Schiaparelli Hemisphere Enhanced', 'img_url': 'https://marshemispheres.com/images/3778f7b43bbbc89d6e3cfabb3613ba93_schiaparelli_enhanced.tif_full.jpg'}, {'title': 'Syrtis Major Hemisphere Enhanced', 'img_url': 'https://marshemispheres.com/images/555e6403a6ddd7ba16ddb0e471cadcf7_syrtis_major_enhanced.tif_full.jpg'}, {'title': 'Valles Marineris Hemisphere Enhanced', 'img_url': 'https://marshemispheres.com/images/b3c7c6c9138f57b4756be9b9c43e3a48_valles_marineris_enhanced.tif_full.jpg'}]


In [16]:
# Close the browser
browser.quit()

In [17]:
# Store all the Mars data to a dictionary
mission_mars_data = {
        "news_title" : news_title,
        "news_paragraph" : news_paragraph,
        "featured_image_url" : featured_image_url,
        "html_table" : facts_table_html,
        "hs_data" : hs_data
}
# Display the dictionary containing the Mars data
print(mission_mars_data)

{'news_title': 'NASA Wins Two Emmy Awards for Interactive Mission Coverage', 'news_paragraph': "NASA-JPL's coverage of the Mars InSight landing earns one of the two wins, making this the NASA center's second Emmy.", 'featured_image_url': 'https://spaceimages-mars.com/image/featured/mars2.jpg', 'html_table': '<table border="1" class="dataframe">\n  <thead>\n    <tr style="text-align: right;">\n      <th></th>\n      <th>Mars</th>\n      <th>Earth</th>\n    </tr>\n    <tr>\n      <th>Description</th>\n      <th></th>\n      <th></th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>Mars - Earth Comparison</th>\n      <td>Mars</td>\n      <td>Earth</td>\n    </tr>\n    <tr>\n      <th>Diameter:</th>\n      <td>6,779 km</td>\n      <td>12,742 km</td>\n    </tr>\n    <tr>\n      <th>Mass:</th>\n      <td>6.39 × 10^23 kg</td>\n      <td>5.97 × 10^24 kg</td>\n    </tr>\n    <tr>\n      <th>Moons:</th>\n      <td>2</td>\n      <td>1</td>\n    </tr>\n    <tr>\n      <th>Distance from Sun: