In [1]:
# Dependencies
from bs4 import BeautifulSoup as bs
import requests
import pymongo
from splinter import Browser
from splinter.exceptions import ElementDoesNotExist
import pandas as pd
import time

In [2]:
executable_path = {'executable_path': 'chromedriver.exe'}
browser = Browser('chrome', **executable_path, headless=False)

# NASA Mars News Articles

In [3]:
#Collect News urls
news_url="https://mars.nasa.gov/news/"

browser.visit(news_url)
time.sleep(1)

In [4]:
# HTML Object
html = browser.html

# Parse HTML with Beautiful Soup
soup = bs(html, 'html.parser')

In [5]:
# Retrieve the latest element that contains news title and news_paragraph
news_title = soup.find('div', class_='bottom_gradient').find('div').find('h3').text
news_p = soup.find('div', class_='article_teaser_body').text

# Display scrapped data 
print(news_title)
print(news_p)

NASA's Mars Rover Drivers Need Your Help
Using an online tool to label Martian terrain types, you can train an artificial intelligence algorithm that could improve the way engineers guide the Curiosity rover.


# JPL Mars Space Images - Featured Image

In [6]:
#Collect pic urls
pic_url="https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars"

browser.visit(pic_url)
time.sleep(1)

In [7]:
# HTML Object 
image = browser.html

# Parse HTML with Beautiful Soup
soup_img = bs(image, 'html.parser')

# Retrieve background-image url from style tag 
featured_image_url  = soup_img.find('article')['style'].replace('background-image: url(','').replace(');', '')[1:-1]

# Concatenate website url with scrapped route
featured_image_url = 'https://www.jpl.nasa.gov' + featured_image_url

# Display full link to featured image
print(featured_image_url)

https://www.jpl.nasa.gov/spaceimages/images/wallpaper/PIA19324-1920x1200.jpg


# Mars Weather

In [8]:
#Collect twitter url
twitter = "https://twitter.com/marswxreport?lang=en"

browser.visit(twitter)
time.sleep(2)

In [9]:
# HTML Object 
weather = browser.html

# Parse HTML with Beautiful Soup
soup_weather = bs(weather, 'html.parser')

# Retrieve mars weather 
weather_tweet  = soup_weather.find_all('div', class_='css-1dbjc4n')

# Look for entries that display weather related words to exclude non weather related tweets 
for tweet in weather_tweet: 
    node1 = tweet.find('div')
    if node1 is not None:
        node2 = node1.find('span')
        if node2 is not None:
            mars_weather = node2.text
            if 'sol' and 'pressure' in mars_weather:
                #Print Mars Weather
                print(mars_weather)
                break
            else: 
                continue

InSight sol 549 (2020-06-12) low -91.8ºC (-133.3ºF) high -1.5ºC (29.3ºF)
winds from the SW at 4.5 m/s (10.1 mph) gusting to 18.2 m/s (40.6 mph)
pressure at 7.40 hPa


# Mars Facts

In [17]:
#Collect MARS facts url
facts = "https://space-facts.com/mars/"

# Use Panda's `read_html` to parse the url
mars_facts = pd.read_html(facts)

#Save facts to df
mars_facts_df=mars_facts[0]

mars_facts_df.columns = ['Description', 'Value']

#Replace colons
mars_facts_df['Description']=mars_facts_df['Description'].replace(':','', regex=True)

# Save html code 
mars_facts_html=mars_facts_df.to_html(classes='table table-striped',header=['Description','Value'],index=False,
                                      border='', justify='unset')

#Display html string
print(mars_facts_html)

<table border="" class="dataframe table table-striped">
  <thead>
    <tr style="text-align: unset;">
      <th>Description</th>
      <th>Value</th>
    </tr>
  </thead>
  <tbody>
    <tr>
      <td>Equatorial Diameter</td>
      <td>6,792 km</td>
    </tr>
    <tr>
      <td>Polar Diameter</td>
      <td>6,752 km</td>
    </tr>
    <tr>
      <td>Mass</td>
      <td>6.39 × 10^23 kg (0.11 Earths)</td>
    </tr>
    <tr>
      <td>Moons</td>
      <td>2 (Phobos &amp; Deimos)</td>
    </tr>
    <tr>
      <td>Orbit Distance</td>
      <td>227,943,824 km (1.38 AU)</td>
    </tr>
    <tr>
      <td>Orbit Period</td>
      <td>687 days (1.9 years)</td>
    </tr>
    <tr>
      <td>Surface Temperature</td>
      <td>-87 to -5 °C</td>
    </tr>
    <tr>
      <td>First Record</td>
      <td>2nd millennium BC</td>
    </tr>
    <tr>
      <td>Recorded By</td>
      <td>Egyptian astronomers</td>
    </tr>
  </tbody>
</table>


# Mars Hemispheres

In [11]:
# Visit hemispheres website
hemispheres_url = 'https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars'

browser.visit(hemispheres_url)
time.sleep(3)

In [12]:
# HTML Object
hemispheres = browser.html

# Parse HTML with Beautiful Soup
hem_soup = bs(hemispheres, 'html.parser')

# Retreive all items that contain mars hemispheres image info
images = hem_soup.find_all('div', class_='item')

# Create empty list for hemisphere urls 
hemispheres_main_urls = []

# Loop through the items previously stored
for i in images: 
    # Store title
    title = i.find('h3').text
    
    # Save partial image links
    partial_img_url = i.find('a', class_='itemLink product-item')['href']
    
    # Append to partial link to create full link and visit the webpage 
    browser.visit('https://astrogeology.usgs.gov' + partial_img_url)
    time.sleep(1)
    
    # HTML Object of individual hemisphere information website 
    partial_img_html = browser.html
    
    # Parse HTML with Beautiful Soup for every individual hemisphere information website 
    soup_1 = bs( partial_img_html, 'html.parser')
    
    # Retrieve full image source 
    img_url = 'https://astrogeology.usgs.gov' + soup_1.find('img', class_='wide-image')['src']
    
    # Append the retreived information into a list of dictionaries 
    hemispheres_main_urls.append({"title" : title, "img_url" : img_url})
    

# Display hemisphere_image_urls
print(hemispheres_main_urls)


[{'title': 'Cerberus Hemisphere Enhanced', 'img_url': 'https://astrogeology.usgs.gov/cache/images/f5e372a36edfa389625da6d0cc25d905_cerberus_enhanced.tif_full.jpg'}, {'title': 'Schiaparelli Hemisphere Enhanced', 'img_url': 'https://astrogeology.usgs.gov/cache/images/3778f7b43bbbc89d6e3cfabb3613ba93_schiaparelli_enhanced.tif_full.jpg'}, {'title': 'Syrtis Major Hemisphere Enhanced', 'img_url': 'https://astrogeology.usgs.gov/cache/images/555e6403a6ddd7ba16ddb0e471cadcf7_syrtis_major_enhanced.tif_full.jpg'}, {'title': 'Valles Marineris Hemisphere Enhanced', 'img_url': 'https://astrogeology.usgs.gov/cache/images/b3c7c6c9138f57b4756be9b9c43e3a48_valles_marineris_enhanced.tif_full.jpg'}]
