In [2]:
# Dependencies
from bs4 import BeautifulSoup as bs
from splinter import Browser
import pandas as pd
import requests
import os
import pymongo

In [3]:
# Nasa site 
news_url = 'https://mars.nasa.gov/news/'

# Retrieve page
response = requests.get(news_url)

# Parse bs object
soup = bs(response.text, 'html.parser')

In [4]:
# Get News Article title
title = soup.find('div', class_="content_title").text.strip()
title

'NASA to Broadcast Mars 2020 Perseverance Launch, Prelaunch Activities'

In [5]:
# Get News Article Paragraph
paragraph = soup.find('div', class_="image_and_description_container").text.strip()
paragraph

'Starting July 27, news activities will cover everything from mission engineering and science to returning samples from Mars to, of course, the launch itself.'

In [6]:
# Nasa Images site
executable_path = {'executable_path':'C:\\Users\salic\data_2020\web-scraping-challenge\chromedriver.exe'}
browser = Browser('chrome', **executable_path, headless = False)
nasa_images_url = 'https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars'
browser.visit(nasa_images_url)

# Get Featured Image URL
image_html = browser.html

# Parse bs object
image_soup = bs(image_html, 'html.parser')

article = image_soup.find('a', class_='button fancybox')
href = article['data-fancybox-href']
featured_image_url = 'https://www.jpl.nasa.gov' + href
featured_image_url

'https://www.jpl.nasa.gov/spaceimages/images/mediumsize/PIA14627_ip.jpg'

In [7]:
# Mars Weather Twitter site
executable_path = {'executable_path':'C:\\Users\salic\data_2020\web-scraping-challenge\chromedriver.exe'}
browser = Browser('chrome', **executable_path, headless = False)
weather_url = 'https://twitter.com/marswxreport?lang=en'
browser.visit(weather_url)

weather_html = browser.html

# Parse bs object
weather_soup = bs(weather_html, 'html.parser')
mars_weather = weather_soup.find('div', class_='css-901oao r-jwli3a r-1qd0xha r-a023e6 r-16dba41 r-ad9z0x r-bcqeeo r-bnwqim r-qvutc0').text
mars_weather

'InSight sol 589 (2020-07-23) low -90.1ºC (-130.2ºF) high -13.1ºC (8.4ºF)\nwinds from the W at 5.7 m/s (12.8 mph) gusting to 16.1 m/s (35.9 mph)\npressure at 7.90 hPa'

In [8]:
# Mars Facts Page
mars_facts = pd.read_html("https://space-facts.com/mars/")[0]
mars_facts.columns=["Description", "Value"]
mars_facts.set_index("Description", inplace=True)
mars_facts.to_html()

'<table border="1" class="dataframe">\n  <thead>\n    <tr style="text-align: right;">\n      <th></th>\n      <th>Value</th>\n    </tr>\n    <tr>\n      <th>Description</th>\n      <th></th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>Equatorial Diameter:</th>\n      <td>6,792 km</td>\n    </tr>\n    <tr>\n      <th>Polar Diameter:</th>\n      <td>6,752 km</td>\n    </tr>\n    <tr>\n      <th>Mass:</th>\n      <td>6.39 × 10^23 kg (0.11 Earths)</td>\n    </tr>\n    <tr>\n      <th>Moons:</th>\n      <td>2 (Phobos &amp; Deimos)</td>\n    </tr>\n    <tr>\n      <th>Orbit Distance:</th>\n      <td>227,943,824 km (1.38 AU)</td>\n    </tr>\n    <tr>\n      <th>Orbit Period:</th>\n      <td>687 days (1.9 years)</td>\n    </tr>\n    <tr>\n      <th>Surface Temperature:</th>\n      <td>-87 to -5 °C</td>\n    </tr>\n    <tr>\n      <th>First Record:</th>\n      <td>2nd millennium BC</td>\n    </tr>\n    <tr>\n      <th>Recorded By:</th>\n      <td>Egyptian astronomers</td>\n    </tr>\

In [9]:
# USGS Astrogeology site
executable_path = {'executable_path':'C:\\Users\salic\data_2020\web-scraping-challenge\chromedriver.exe'}
browser = Browser('chrome', **executable_path, headless = False)
hemisphere_url = 'https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars'
browser.visit(hemisphere_url)

hemisphere_html = browser.html

# Parse bs object'
soup = bs(hemisphere_html, 'html.parser')

results = soup.find_all('div', class_="item")

hemisphere_image_urls = []

for r in results: 
    heading = r.find('h3').text.replace('Enhanced', '')
    link = r.find('a')['href']
    url = "https://astrogeology.usgs.gov" + link
    browser.visit(url)
    image_html = browser.html
    soup = bs(image_html, 'html.parser')
    img_url = soup.find('div', class_="downloads").find('a')['href']
    print(heading)
    print(img_url)
    hemisphere = {
        'title': heading,
        'img_url': img_url
    }
    hemisphere_image_urls.append(hemisphere)

Cerberus Hemisphere 
https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/cerberus_enhanced.tif/full.jpg
Schiaparelli Hemisphere 
https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/schiaparelli_enhanced.tif/full.jpg
Syrtis Major Hemisphere 
https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/syrtis_major_enhanced.tif/full.jpg
Valles Marineris Hemisphere 
https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/valles_marineris_enhanced.tif/full.jpg


In [11]:
mars_df = {
    "news_title": title,
    "news_paragraph": paragraph,
    "featured_image": featured_image_url,
    "mars_weather": mars_weather,
    "mars_facts": mars_facts,
    "hemisphere_image_urls": hemisphere_image_urls
}
mars_df

{'news_title': 'NASA to Broadcast Mars 2020 Perseverance Launch, Prelaunch Activities',
 'news_paragraph': 'Starting July 27, news activities will cover everything from mission engineering and science to returning samples from Mars to, of course, the launch itself.',
 'featured_image': 'https://www.jpl.nasa.gov/spaceimages/images/mediumsize/PIA14627_ip.jpg',
 'mars_weather': 'InSight sol 589 (2020-07-23) low -90.1ºC (-130.2ºF) high -13.1ºC (8.4ºF)\nwinds from the W at 5.7 m/s (12.8 mph) gusting to 16.1 m/s (35.9 mph)\npressure at 7.90 hPa',
 'mars_facts':                                               Value
 Description                                        
 Equatorial Diameter:                       6,792 km
 Polar Diameter:                            6,752 km
 Mass:                 6.39 × 10^23 kg (0.11 Earths)
 Moons:                          2 (Phobos & Deimos)
 Orbit Distance:            227,943,824 km (1.38 AU)
 Orbit Period:                  687 days (1.9 years)
 Surface Temper