# Scraping with Pandas

In [455]:
# import dependencies
from splinter import Browser
import pandas as pd
import requests
from bs4 import BeautifulSoup as bsoup
from pprint import pprint

# NASA Mars News

In [456]:
url = 'https://mars.nasa.gov/news'
executable_path = {"executable_path": 'C:/chromedriver.exe'}
browser = Browser('chrome', **executable_path, headless=False)
browser.visit(url)
# Scrape page into Soup
html = browser.html
soup = bsoup(html, 'html.parser')

In [457]:
# Get the latest news
news = soup.find("div", class_='list_text')
news_title = news.find("div", class_="content_title").text
news_p = news.find("div", class_="article_teaser_body").text

In [458]:
print(news_title)
print(news_p)
browser.quit()

7 Things to Know About the NASA Rover About to Land on Mars
The Mars 2020 Perseverance rover, which has started its approach to the Red Planet, will help answer the next logical question in Mars exploration.


## JPL Mars Space Images - Featured Image

In [504]:
jpl_url = 'https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars'
executable_path = {"executable_path": 'C:/chromedriver.exe'}

browser = Browser('chrome', **executable_path, headless=False)
browser.visit(jpl_url)
# Scrape page into Soup
html = browser.html
soup = bsoup(html, 'html.parser')

In [512]:
image = soup.find_all('ul', class_='articles')[0].li.a.get('data-fancybox-href')
featured_image_url = "https://www.jpl.nasa.gov" + image
print(featured_image_url)
browser.quit()

https://www.jpl.nasa.gov/spaceimages/images/largesize/PIA24355_hires.jpg


## Mars Facts

In [502]:
url = 'https://space-facts.com/mars/'

In [462]:
tables = pd.read_html(url)


In [463]:
type(tables)

list

In [464]:
df_mars_profile = tables[0]
df_mars_profile.columns = ['MARS PLANET PROFILE', 'Data']
df_mars_profile

Unnamed: 0,MARS PLANET PROFILE,Data
0,Equatorial Diameter:,"6,792 km"
1,Polar Diameter:,"6,752 km"
2,Mass:,6.39 × 10^23 kg (0.11 Earths)
3,Moons:,2 (Phobos & Deimos)
4,Orbit Distance:,"227,943,824 km (1.38 AU)"
5,Orbit Period:,687 days (1.9 years)
6,Surface Temperature:,-87 to -5 °C
7,First Record:,2nd millennium BC
8,Recorded By:,Egyptian astronomers


In [465]:
df_Mars_Earth_Comparison = tables[1]
df_Mars_Earth_Comparison.columns = [' Mars - Earth Comparison', 'Mars', 'Earth']
df_Mars_Earth_Comparison

Unnamed: 0,Mars - Earth Comparison,Mars,Earth
0,Diameter:,"6,779 km","12,742 km"
1,Mass:,6.39 × 10^23 kg,5.97 × 10^24 kg
2,Moons:,2,1
3,Distance from Sun:,"227,943,824 km","149,598,262 km"
4,Length of Year:,687 Earth days,365.24 days
5,Temperature:,-87 to -5 °C,-88 to 58°C


In [466]:
## Convert the data to a HTML table string.
html_table = df_mars_profile.to_html()
html_table

'<table border="1" class="dataframe">\n  <thead>\n    <tr style="text-align: right;">\n      <th></th>\n      <th>MARS PLANET PROFILE</th>\n      <th>Data</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>0</th>\n      <td>Equatorial Diameter:</td>\n      <td>6,792 km</td>\n    </tr>\n    <tr>\n      <th>1</th>\n      <td>Polar Diameter:</td>\n      <td>6,752 km</td>\n    </tr>\n    <tr>\n      <th>2</th>\n      <td>Mass:</td>\n      <td>6.39 × 10^23 kg (0.11 Earths)</td>\n    </tr>\n    <tr>\n      <th>3</th>\n      <td>Moons:</td>\n      <td>2 (Phobos &amp; Deimos)</td>\n    </tr>\n    <tr>\n      <th>4</th>\n      <td>Orbit Distance:</td>\n      <td>227,943,824 km (1.38 AU)</td>\n    </tr>\n    <tr>\n      <th>5</th>\n      <td>Orbit Period:</td>\n      <td>687 days (1.9 years)</td>\n    </tr>\n    <tr>\n      <th>6</th>\n      <td>Surface Temperature:</td>\n      <td>-87 to -5 °C</td>\n    </tr>\n    <tr>\n      <th>7</th>\n      <td>First Record:</td>\n      <td>2nd mill

In [467]:
html_table.replace('\n', '')

'<table border="1" class="dataframe">  <thead>    <tr style="text-align: right;">      <th></th>      <th>MARS PLANET PROFILE</th>      <th>Data</th>    </tr>  </thead>  <tbody>    <tr>      <th>0</th>      <td>Equatorial Diameter:</td>      <td>6,792 km</td>    </tr>    <tr>      <th>1</th>      <td>Polar Diameter:</td>      <td>6,752 km</td>    </tr>    <tr>      <th>2</th>      <td>Mass:</td>      <td>6.39 × 10^23 kg (0.11 Earths)</td>    </tr>    <tr>      <th>3</th>      <td>Moons:</td>      <td>2 (Phobos &amp; Deimos)</td>    </tr>    <tr>      <th>4</th>      <td>Orbit Distance:</td>      <td>227,943,824 km (1.38 AU)</td>    </tr>    <tr>      <th>5</th>      <td>Orbit Period:</td>      <td>687 days (1.9 years)</td>    </tr>    <tr>      <th>6</th>      <td>Surface Temperature:</td>      <td>-87 to -5 °C</td>    </tr>    <tr>      <th>7</th>      <td>First Record:</td>      <td>2nd millennium BC</td>    </tr>    <tr>      <th>8</th>      <td>Recorded By:</td>      <td>Egyptian a

In [468]:
## Save the table directly to a file.
df_mars_profile.to_html('marsFactsTable.html')

## Mars Hemispheres

### Valles Marineris

In [469]:
usgs_url = url = 'https://astrogeology.usgs.gov/search/map/Mars/Viking/valles_marineris_enhanced'
executable_path = {"executable_path": 'C:/chromedriver.exe'}

browser = Browser('chrome', **executable_path, headless=False)
browser.visit(usgs_url)

# Scrape page into Soup
html = browser.html
soup =  bsoup(html, 'html.parser')
marineris_url = (soup.find_all('div', class_='downloads'))[0].ul.li.a.get('href')
print(marineris_url)

browser.quit()

https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/valles_marineris_enhanced.tif/full.jpg


### Cerberus

In [470]:
usgs_url = url = 'https://astrogeology.usgs.gov/search/map/Mars/Viking/cerberus_enhanced'
executable_path = {"executable_path": 'C:/chromedriver.exe'}

browser = Browser('chrome', **executable_path, headless=False)
browser.visit(usgs_url)

# Scrape page into Soup
html = browser.html
soup =  bsoup(html, 'html.parser')
cerberu_url = (soup.find_all('div', class_='downloads'))[0].ul.li.a.get('href')
print(cerberu_url)


browser.quit()

https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/cerberus_enhanced.tif/full.jpg


### Schiaparelli

In [471]:
usgs_url = url = 'https://astrogeology.usgs.gov/search/map/Mars/Viking/schiaparelli_enhanced'
executable_path = {"executable_path": 'C:/chromedriver.exe'}

browser = Browser('chrome', **executable_path, headless=False)
browser.visit(usgs_url)

# Scrape page into Soup
html = browser.html
soup =  bsoup(html, 'html.parser')
schiaparelli_url = (soup.find_all('div', class_='downloads'))[0].ul.li.a.get('href')
print(schiaparelli_url)

browser.quit()

https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/schiaparelli_enhanced.tif/full.jpg


### Syrtis Major

In [472]:
usgs_url = url = 'https://astrogeology.usgs.gov/search/map/Mars/Viking/syrtis_major_enhanced'
executable_path = {"executable_path": 'C:/chromedriver.exe'}

browser = Browser('chrome', **executable_path, headless=False)
browser.visit(usgs_url)

# Scrape page into Soup
html = browser.html
soup =  bsoup(html, 'html.parser')
syrtis_url = (soup.find_all('div', class_='downloads'))[0].ul.li.a.get('href')
print(syrtis_url)

browser.quit()

https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/syrtis_major_enhanced.tif/full.jpg


In [433]:
hemisphere_image_urls = [
                            {"title": "Valles Marineris Hemisphere", "img_url": marineris_url},
                            {"title": "Cerberus Hemisphere", "img_url": cerberu_url},
                            {"title": "Schiaparelli Hemisphere", "img_url": schiaparelli_url},
                            {"title": "Syrtis Major Hemisphere", "img_url": syrtis_url},
]


In [473]:
hemisphere_image_urls

[{'title': 'Valles Marineris Hemisphere',
  'img_url': 'https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/valles_marineris_enhanced.tif/full.jpg'},
 {'title': 'Cerberus Hemisphere',
  'img_url': 'https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/cerberus_enhanced.tif/full.jpg'},
 {'title': 'Schiaparelli Hemisphere',
  'img_url': 'https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/schiaparelli_enhanced.tif/full.jpg'},
 {'title': 'Syrtis Major Hemisphere',
  'img_url': 'https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/syrtis_major_enhanced.tif/full.jpg'}]