In [1]:
# Dependencies
from bs4 import BeautifulSoup
from splinter import Browser
from webdriver_manager.chrome import ChromeDriverManager
import pandas as pd
import requests

In [2]:
executable_path = {'executable_path': ChromeDriverManager().install()}
browser = Browser('chrome', **executable_path, headless=False)



Current google-chrome version is 100.0.4896
Get LATEST chromedriver version for 100.0.4896 google-chrome
Driver [/Users/ash/.wdm/drivers/chromedriver/mac64/100.0.4896.60/chromedriver] found in cache


## 1. NASA Mars News

In [3]:
# Defind and visit a URL to scrape
news_url = "https://redplanetscience.com/"
browser.visit(news_url)

# Create BeautifulSoup object; parse with 'html.parser'
html = browser.html
soup = BeautifulSoup(html,'html.parser')

In [4]:
# find the latest News Title and Paragraph Text 
slide_elem = soup.select_one('div.list_text')
news_title = slide_elem.find('div', class_="content_title").text.strip()
news_p = soup.find('div', class_='article_teaser_body').text.strip()

print("--------------------------------------------------------------------")
print(news_title)
print(news_p)


--------------------------------------------------------------------
NASA Adds Return Sample Scientists to Mars 2020 Leadership Team
The leadership council for Mars 2020 science added two new members who represent the interests of scientists destined to handle and study the first samples from Red Planet.


## 2. JPL Mars Space Images—Featured Image

In [5]:
# Defind and visit a URL to scrape
image_url = "https://spaceimages-mars.com/"
browser.visit(image_url)

# Create BeautifulSoup object; parse with 'html.parser'
html = browser.html
soup = BeautifulSoup(html,'html.parser')

In [6]:
# find image URL for the current Featured Mars Image
featured_image_title = soup.find('h1', class_="media_feature_title").text.strip()
featured_image_url = image_url + soup.find('img', class_='headerimage fade-in')['src']

print("--------------------------------------------------------------------")
print(featured_image_title)
print(featured_image_url)

--------------------------------------------------------------------
Dusty Space Cloud
https://spaceimages-mars.com/image/featured/mars2.jpg


## 3. Mars Facts

In [7]:
# Defind and visit a URL to scrape
mars_facts_url = "https://galaxyfacts-mars.com/"

# get the table on the website, there are two tables in total
tables = pd.read_html(mars_facts_url)

tables

[                         0                1                2
 0  Mars - Earth Comparison             Mars            Earth
 1                Diameter:         6,779 km        12,742 km
 2                    Mass:  6.39 × 10^23 kg  5.97 × 10^24 kg
 3                   Moons:                2                1
 4       Distance from Sun:   227,943,824 km   149,598,262 km
 5          Length of Year:   687 Earth days      365.24 days
 6             Temperature:     -87 to -5 °C      -88 to 58°C,
                       0                              1
 0  Equatorial Diameter:                       6,792 km
 1       Polar Diameter:                       6,752 km
 2                 Mass:  6.39 × 10^23 kg (0.11 Earths)
 3                Moons:          2 ( Phobos & Deimos )
 4       Orbit Distance:       227,943,824 km (1.38 AU)
 5         Orbit Period:           687 days (1.9 years)
 6  Surface Temperature:                   -87 to -5 °C
 7         First Record:              2nd millennium BC

In [8]:
# get the first table
tables[0]

Unnamed: 0,0,1,2
0,Mars - Earth Comparison,Mars,Earth
1,Diameter:,"6,779 km","12,742 km"
2,Mass:,6.39 × 10^23 kg,5.97 × 10^24 kg
3,Moons:,2,1
4,Distance from Sun:,"227,943,824 km","149,598,262 km"
5,Length of Year:,687 Earth days,365.24 days
6,Temperature:,-87 to -5 °C,-88 to 58°C


In [9]:
# transform the table to what we need
earth_mars_facts_table = tables[0].loc[1:,:].set_index([0])
earth_mars_facts_table.index.name=None
earth_mars_facts_table.columns = ['Mars','Earh']
earth_mars_facts_table

Unnamed: 0,Mars,Earh
Diameter:,"6,779 km","12,742 km"
Mass:,6.39 × 10^23 kg,5.97 × 10^24 kg
Moons:,2,1
Distance from Sun:,"227,943,824 km","149,598,262 km"
Length of Year:,687 Earth days,365.24 days
Temperature:,-87 to -5 °C,-88 to 58°C


In [10]:
# get the second table
tables[1]

Unnamed: 0,0,1
0,Equatorial Diameter:,"6,792 km"
1,Polar Diameter:,"6,752 km"
2,Mass:,6.39 × 10^23 kg (0.11 Earths)
3,Moons:,2 ( Phobos & Deimos )
4,Orbit Distance:,"227,943,824 km (1.38 AU)"
5,Orbit Period:,687 days (1.9 years)
6,Surface Temperature:,-87 to -5 °C
7,First Record:,2nd millennium BC
8,Recorded By:,Egyptian astronomers


In [11]:
# transform the table to what we need
mars_facts_table = tables[1].set_index([0])
mars_facts_table.index.name=None
mars_facts_table.columns = ['Mars']
mars_facts_table

Unnamed: 0,Mars
Equatorial Diameter:,"6,792 km"
Polar Diameter:,"6,752 km"
Mass:,6.39 × 10^23 kg (0.11 Earths)
Moons:,2 ( Phobos & Deimos )
Orbit Distance:,"227,943,824 km (1.38 AU)"
Orbit Period:,687 days (1.9 years)
Surface Temperature:,-87 to -5 °C
First Record:,2nd millennium BC
Recorded By:,Egyptian astronomers


In [12]:
# convert the data to a HTML table string
mars_facts_table.to_html('templates/mars_facts_table.html')
earth_mars_facts_table.to_html('templates/earth_mars_facts_table.html')

## 4. Mars Hemispheres

In [13]:
# Create a list to store the information we need.
hemisphere_image_urls = []

# Defind and visit a URL to scrape
hemi_url = "https://marshemispheres.com/"

browser.visit(hemi_url)
# Create BeautifulSoup object; parse with 'html.parser'
html = browser.html
soup = BeautifulSoup(html,'html.parser')

In [14]:
# results are returned as an iterable list
hemi_links = soup.find_all("div", class_ = "item")


for hemi_link in hemi_links:
    # Error handling
    try:
        # return title of listing
        img_name = hemi_link.find('h3').text
        # return link for each page
        artical_url = hemi_url+hemi_link.a['href']
        
        # visit each links we got above
        browser.visit(artical_url)
        hemi_html = browser.html
        hemi_soup = BeautifulSoup(hemi_html, 'html.parser')
        
        # return link for each image
        hemi_img_url = hemi_url+hemi_soup.find('img', class_='wide-image')['src']


        # Run only if title, price, and link are available
        if (img_name and hemi_img_url):
            print("--------------------------------------------------------------------")
            print(img_name)
            print(hemi_img_url)

            # store the information
            hemisphere_image_url = {
                'title': img_name,
                'img_url': hemi_img_url
            }
            
            # get the iterable list
            hemisphere_image_urls.append(hemisphere_image_url)


    except AttributeError as e:
        print(e)
    
    

--------------------------------------------------------------------
Cerberus Hemisphere Enhanced
https://marshemispheres.com/images/f5e372a36edfa389625da6d0cc25d905_cerberus_enhanced.tif_full.jpg
--------------------------------------------------------------------
Schiaparelli Hemisphere Enhanced
https://marshemispheres.com/images/3778f7b43bbbc89d6e3cfabb3613ba93_schiaparelli_enhanced.tif_full.jpg
--------------------------------------------------------------------
Syrtis Major Hemisphere Enhanced
https://marshemispheres.com/images/555e6403a6ddd7ba16ddb0e471cadcf7_syrtis_major_enhanced.tif_full.jpg
--------------------------------------------------------------------
Valles Marineris Hemisphere Enhanced
https://marshemispheres.com/images/b3c7c6c9138f57b4756be9b9c43e3a48_valles_marineris_enhanced.tif_full.jpg


In [15]:
browser.quit()