In [83]:
# Dependencies
from bs4 import BeautifulSoup as bs
import pymongo
from splinter import Browser
import pandas as pd
import time

# Scraping

## Scraping Mars news

In [2]:
url = 'https://mars.nasa.gov/news/'

In [3]:
executable_path = {'executable_path':'C:\chromedrv\chromedriver.exe'}
browser = Browser('chrome', **executable_path, headless = False)

In [4]:
browser.visit(url)

In [5]:
html = browser.html
soup = bs(html,'html.parser')

In [6]:
news_title = soup.find('div', class_ = 'content_title').a.text
news_title

'NASA Updates Mars 2020 Mission Environmental Review'

In [7]:
news_p = soup.find('div', class_ = 'article_teaser_body').text
news_p

'NASA and the Department of Energy have completed a more detailed risk analysis for the Mars 2020 rover launch from Florida.'

## Scraping Mars Image

In [8]:
url = 'https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars'

In [9]:
browser.visit(url)

In [12]:
browser.click_link_by_partial_text('FULL IMAGE')

In [13]:
html= browser.html


In [14]:
soup = bs(html, 'html.parser')

In [28]:
featured_image = soup.find('div', class_="fancybox-inner").img['src']
featured_image

'/spaceimages/images/mediumsize/PIA10181_ip.jpg'

In [31]:
featured_image_url = f"https://www.jpl.nasa.gov{featured_image}"
featured_image_url

'https://www.jpl.nasa.gov/spaceimages/images/mediumsize/PIA10181_ip.jpg'

## Scraping Mars Weather

In [33]:
url = 'https://twitter.com/marswxreport?lang=en'

In [34]:
browser.visit(url)

In [36]:
html = browser.html
soup = bs(html, 'html.parser')

In [42]:
mars_weather = soup.find('p',class_='TweetTextSize').text
mars_weather

'InSight sol 351 (2019-11-22) low -100.4ºC (-148.7ºF) high -22.6ºC (-8.6ºF)\nwinds from the SSE at 5.5 m/s (12.3 mph) gusting to 23.7 m/s (53.1 mph)\npressure at 6.70 hPapic.twitter.com/HEvWMSOcYQ'

## Scraping Mars Facts

In [43]:
url='https://space-facts.com/mars/'

In [45]:
mars_table = pd.read_html(url)
mars_table

[                      0                              1
 0  Equatorial Diameter:                       6,792 km
 1       Polar Diameter:                       6,752 km
 2                 Mass:  6.39 × 10^23 kg (0.11 Earths)
 3                Moons:            2 (Phobos & Deimos)
 4       Orbit Distance:       227,943,824 km (1.38 AU)
 5         Orbit Period:           687 days (1.9 years)
 6  Surface Temperature:                   -87 to -5 °C
 7         First Record:              2nd millennium BC
 8          Recorded By:           Egyptian astronomers,
   Mars - Earth Comparison             Mars            Earth
 0               Diameter:         6,779 km        12,742 km
 1                   Mass:  6.39 × 10^23 kg  5.97 × 10^24 kg
 2                  Moons:                2                1
 3      Distance from Sun:   227,943,824 km   149,598,262 km
 4         Length of Year:   687 Earth days      365.24 days
 5            Temperature:    -153 to 20 °C      -88 to 58°C,
           

In [50]:
mars_table_html = mars_table[0].to_html().replace('\n','')
mars_table_html

'<table border="1" class="dataframe">  <thead>    <tr style="text-align: right;">      <th></th>      <th>0</th>      <th>1</th>    </tr>  </thead>  <tbody>    <tr>      <th>0</th>      <td>Equatorial Diameter:</td>      <td>6,792 km</td>    </tr>    <tr>      <th>1</th>      <td>Polar Diameter:</td>      <td>6,752 km</td>    </tr>    <tr>      <th>2</th>      <td>Mass:</td>      <td>6.39 × 10^23 kg (0.11 Earths)</td>    </tr>    <tr>      <th>3</th>      <td>Moons:</td>      <td>2 (Phobos &amp; Deimos)</td>    </tr>    <tr>      <th>4</th>      <td>Orbit Distance:</td>      <td>227,943,824 km (1.38 AU)</td>    </tr>    <tr>      <th>5</th>      <td>Orbit Period:</td>      <td>687 days (1.9 years)</td>    </tr>    <tr>      <th>6</th>      <td>Surface Temperature:</td>      <td>-87 to -5 °C</td>    </tr>    <tr>      <th>7</th>      <td>First Record:</td>      <td>2nd millennium BC</td>    </tr>    <tr>      <th>8</th>      <td>Recorded By:</td>      <td>Egyptian astronomers</td>    </

## Scraping Mars Hemispheres

In [72]:
hemisphere_image_urls = []
base_url = 'https://astrogeology.usgs.gov'
url = 'https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars'
hemispheres = ['Cerberus','Schiaparelli','Syrtis Major','Valles Marineris']

In [76]:
for hemisphere in hemispheres:
    browser.visit(url)
    browser.click_link_by_partial_text(hemisphere)
    browser.click_link_by_partial_text('Open')
    soup = bs(browser.html,'html.parser')
    partial_url = soup.find('img',class_='wide-image')['src']
    image_url = f"{base_url}{partial_url}"
    hemisphere_image_urls.append({'title':f"{hemisphere} Hemisphere","img_url":image_url})

In [77]:
hemisphere_image_urls

[{'title': 'Cerberus Hemisphere',
  'img_url': 'https://astrogeology.usgs.gov/cache/images/cfa62af2557222a02478f1fcd781d445_cerberus_enhanced.tif_full.jpg'},
 {'title': 'Schiaparelli Hemisphere',
  'img_url': 'https://astrogeology.usgs.gov/cache/images/3cdd1cbf5e0813bba925c9030d13b62e_schiaparelli_enhanced.tif_full.jpg'},
 {'title': 'Syrtis Major Hemisphere',
  'img_url': 'https://astrogeology.usgs.gov/cache/images/ae209b4e408bb6c3e67b6af38168cf28_syrtis_major_enhanced.tif_full.jpg'},
 {'title': 'Valles Marineris Hemisphere',
  'img_url': 'https://astrogeology.usgs.gov/cache/images/7cf2da4bf549ed01c17f206327be4db7_valles_marineris_enhanced.tif_full.jpg'}]

## Final output

In [79]:
scraped_data = {"news_title":news_title,"news_p":news_p,"featured_image_url":featured_image_url,"mars_weather":mars_weather,"mars_table_html":mars_table_html,"hemisphere_image_urls":hemisphere_image_urls}
scraped_data

{'news_title': 'NASA Updates Mars 2020 Mission Environmental Review',
 'news_p': 'NASA and the Department of Energy have completed a more detailed risk analysis for the Mars 2020 rover launch from Florida.',
 'featured_image_url': 'https://www.jpl.nasa.gov/spaceimages/images/mediumsize/PIA10181_ip.jpg',
 'mars_weather': 'InSight sol 351 (2019-11-22) low -100.4ºC (-148.7ºF) high -22.6ºC (-8.6ºF)\nwinds from the SSE at 5.5 m/s (12.3 mph) gusting to 23.7 m/s (53.1 mph)\npressure at 6.70 hPapic.twitter.com/HEvWMSOcYQ',
 'mars_table_html': '<table border="1" class="dataframe">  <thead>    <tr style="text-align: right;">      <th></th>      <th>0</th>      <th>1</th>    </tr>  </thead>  <tbody>    <tr>      <th>0</th>      <td>Equatorial Diameter:</td>      <td>6,792 km</td>    </tr>    <tr>      <th>1</th>      <td>Polar Diameter:</td>      <td>6,752 km</td>    </tr>    <tr>      <th>2</th>      <td>Mass:</td>      <td>6.39 × 10^23 kg (0.11 Earths)</td>    </tr>    <tr>      <th>3</th>     

In [84]:
time.sleep(2)