In [1]:
# Dependencies and Setup
from bs4 import BeautifulSoup
from splinter import Browser
import pandas as pd

In [2]:
# Set Executable Path & Initialize Chrome Browser for Mac Computers
executable_path = {"executable_path": "/usr/local/bin/chromedriver"}
browser = Browser("chrome", **executable_path, headless=False)

# NASA Mars News

In [3]:
# Visit the NASA Mars News Site
url = "https://mars.nasa.gov/news/"
browser.visit(url)

In [4]:
# Parse Results HTML with BeautifulSoup
# Find Everything Inside:
#   <ul class="item_list">
#     <li class="slide">

html = browser.html
news_soup = BeautifulSoup(html, "html.parser")
slide_element = news_soup.select_one("ul.item_list li.slide")

In [7]:
slide_element.find("div", class_="content_title")

AttributeError: 'NoneType' object has no attribute 'find'

In [8]:
# Scrape the Latest News Title
# Use Parent Element to Find First <a> Tag and Save it as news_title
news_title = slide_element.find("div", class_="content_title").get_text()
print(news_title)

AttributeError: 'NoneType' object has no attribute 'find'

# JPL Mars Space Images - Featured Image

In [9]:
# Visit the NASA JPL (Jet Propulsion Laboratory) Site
executable_path = {"executable_path": "/usr/local/bin/chromedriver"}
browser = Browser("chrome", **executable_path)
url = "https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars"
browser.visit(url)

In [10]:
# Ask Splinter to Go to Site and Click Button with Class Name full_image
# <button class="full_image">Full Image</button>
full_image_button = browser.find_by_id("full_image")
full_image_button.click()

In [11]:
# Find "More Info" Button and Click It
browser.is_element_present_by_text("more info", wait_time=1)
more_info_element = browser.find_link_by_partial_text("more info")
more_info_element.click()



In [12]:
# Parse Results HTML with BeautifulSoup
html = browser.html
image_soup = BeautifulSoup(html, "html.parser")

In [13]:
img_url = image_soup.select_one("figure.lede a img").get("src")
img_url

'/spaceimages/images/largesize/PIA18897_hires.jpg'

In [14]:
# Use Base URL to Create Absolute URL
img_url = f"https://www.jpl.nasa.gov{img_url}"
print(img_url)

https://www.jpl.nasa.gov/spaceimages/images/largesize/PIA18897_hires.jpg


# Mars Weather

In [28]:
# Visit the Mars Weather Twitter Account
executable_path = {"executable_path": "/usr/local/bin/chromedriver"}
browser = Browser("chrome", **executable_path, headless=False)
url = "https://twitter.com/marswxreport?lang=en"
browser.visit(url)

In [33]:
# Parse Results HTML with BeautifulSoup
html = browser.html
weather_soup = BeautifulSoup(html, "html.parser")

WebDriverException: Message: chrome not reachable
  (Session info: chrome=83.0.4103.116)


In [36]:
# Find a Tweet with the data-name `Mars Weather`
mars_weather_tweet = weather_soup.find_all('article',class_= 'css-1dbjc4n r-1loqt21 r-16y2uox r-1wbh5a2 r-1ny4l3l r-1udh08x r-1j3t67a r-o7ynqc r-6416eg')  
print(mars_weather_tweet)

[<article class="css-1dbjc4n r-1loqt21 r-16y2uox r-1wbh5a2 r-1ny4l3l r-1udh08x r-1j3t67a r-o7ynqc r-6416eg" data-focusable="true" role="article" tabindex="0"><div class="css-1dbjc4n"><div class="css-1dbjc4n"><div class="css-1dbjc4n r-18u37iz"><div class="css-1dbjc4n r-1iusvr4 r-16y2uox r-m611by"></div></div></div><div class="css-1dbjc4n r-18u37iz" data-testid="tweet"><div class="css-1dbjc4n r-1awozwy r-18kxxzh r-zso239" style="flex-basis: 49px;"><div class="css-1dbjc4n r-18kxxzh r-1wbh5a2 r-13qz1uu"><div class="css-1dbjc4n r-1wbh5a2 r-dnmrzs"><a class="css-4rbku5 css-18t94o4 css-1dbjc4n r-sdzlij r-1loqt21 r-1adg3ll r-ahm1il r-1ny4l3l r-1udh08x r-o7ynqc r-6416eg r-13qz1uu" data-focusable="true" href="/MarsWxReport" role="link"><div class="css-1dbjc4n r-1adg3ll r-1udh08x" style=""><div class="r-1adg3ll r-13qz1uu" style="padding-bottom: 100%;"></div><div class="r-1p0dtai r-1pi2tsx r-1d2f490 r-u8s1d r-ipm5af r-13qz1uu"><div aria-label="" class="css-1dbjc4n r-sdzlij r-1p0dtai r-1mlwlqe r-1d

In [34]:
# Search Within Tweet for <p> Tag Containing Tweet Text
#mars_weather = mars_weather_tweet.find("p", "tweet-text").get_text()
weather = mars_weather_tweet[0].find_all('div', class_='css-901oao r-hkyrab r-1qd0xha r-a023e6 r-16dba41 r-ad9z0x r-bcqeeo r-bnwqim r-qvutc0')
print(weather)

[<div class="css-901oao r-hkyrab r-1qd0xha r-a023e6 r-16dba41 r-ad9z0x r-bcqeeo r-bnwqim r-qvutc0" dir="auto" lang="en"><span class="css-901oao css-16my406 r-1qd0xha r-ad9z0x r-bcqeeo r-qvutc0">InSight sol 561 (2020-06-25) low -89.7ºC (-129.5ºF) high -2.9ºC (26.8ºF)
winds from the W at 5.7 m/s (12.8 mph) gusting to 17.8 m/s (39.8 mph)
pressure at 7.60 hPa</span></div>]


In [35]:
# Search Within Tweet for weather text
for w in weather:  
    weather_text = w.find('span', class_='css-901oao css-16my406 r-1qd0xha r-ad9z0x r-bcqeeo r-qvutc0').text
print(weather_text)

InSight sol 561 (2020-06-25) low -89.7ºC (-129.5ºF) high -2.9ºC (26.8ºF)
winds from the W at 5.7 m/s (12.8 mph) gusting to 17.8 m/s (39.8 mph)
pressure at 7.60 hPa


# Mars Facts

In [None]:
# Visit the Mars Facts Site Using Pandas to Read
mars_df = pd.read_html("https://space-facts.com/mars/")[0]
print(mars_df)
mars_df.columns=["Description", "Value"]
mars_df.set_index("Description", inplace=True)
mars_df

# Mars Hemispheres

In [None]:
# Visit the USGS Astrogeology Science Center Site
executable_path = {"executable_path": "/usr/local/bin/chromedriver"}
browser = Browser("chrome", **executable_path, headless=False)
url = "https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars"
browser.visit(url)

In [None]:
hemisphere_image_urls = []

# Get a List of All the Hemispheres
links = browser.find_by_css("a.product-item h3")
for item in range(len(links)):
    hemisphere = {}
    
    # Find Element on Each Loop to Avoid a Stale Element Exception
    browser.find_by_css("a.product-item h3")[item].click()
    
    # Find Sample Image Anchor Tag & Extract <href>
    sample_element = browser.find_link_by_text("Sample").first
    hemisphere["img_url"] = sample_element["href"]
    
    # Get Hemisphere Title
    hemisphere["title"] = browser.find_by_css("h2.title").text
    
    # Append Hemisphere Object to List
    hemisphere_image_urls.append(hemisphere)
    
    # Navigate Backwards
    browser.back()

In [None]:
hemisphere_image_urls