# Scraping Mars Data Exploratory Notebook

## Mars News Headline & Summary Text

In [79]:
from bs4 import BeautifulSoup as bs
import requests

In [80]:
url = "https://mars.nasa.gov/news/"

mars_news = requests.get(url)
# print(mars_news.text)
news_soup = bs(mars_news.text, "html.parser")

latest_news = news_soup.find_all("div", class_="content_title")

# mars_headline
latest_news

[<div class="content_title">
 <a href="/news/8716/nasa-to-broadcast-mars-2020-perseverance-launch-prelaunch-activities/">
 NASA to Broadcast Mars 2020 Perseverance Launch, Prelaunch Activities
 </a>
 </div>,
 <div class="content_title">
 <a href="/news/8695/the-launch-is-approaching-for-nasas-next-mars-rover-perseverance/">
 The Launch Is Approaching for NASA's Next Mars Rover, Perseverance
 </a>
 </div>,
 <div class="content_title">
 <a href="/news/8692/nasa-to-hold-mars-2020-perseverance-rover-launch-briefing/">
 NASA to Hold Mars 2020 Perseverance Rover Launch Briefing
 </a>
 </div>,
 <div class="content_title">
 <a href="/news/8659/alabama-high-school-student-names-nasas-mars-helicopter/">
 Alabama High School Student Names NASA's Mars Helicopter
 </a>
 </div>,
 <div class="content_title">
 <a href="/news/8645/mars-helicopter-attached-to-nasas-perseverance-rover/">
 Mars Helicopter Attached to NASA's Perseverance Rover
 </a>
 </div>,
 <div class="content_title">
 <a href="/news/864

### Requests does not return full page, so use splinter

In [81]:
from splinter import Browser
import time

In [82]:
executable_path = {'executable_path': 'chromedriver.exe'}
browser = Browser('chrome', **executable_path, headless=False)

In [83]:
browser.visit(url)

mars_news = browser.html

#add a delay so page fully loads
# time.sleep(3)

news_soup = bs(mars_news, "html.parser")

latest_news = news_soup.find_all("li", class_="slide")[0]

latest_headline = latest_news.find("div", class_="content_title").a.text
latest_description = latest_news.find("div", class_="article_teaser_body").text

print(latest_headline + "\n" + "-"*50 + "\n" + latest_description)

IndexError: list index out of range

## Get Current Mars image

In [84]:
executable_path = {'executable_path': 'chromedriver.exe'}
browser2 = Browser('chrome', **executable_path, headless=False)

In [85]:
base_url = "https://www.jpl.nasa.gov"

image_url = "https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars"
browser2.visit(image_url)
browser2.find_by_css('img.thumb').first.click()

time.sleep(2)
browser2.execute_script("document.getElementById('fancybox-lock').scrollTo(0, document.body.scrollHeight);")

browser2.click_link_by_partial_text("more info")

time.sleep(1)

#get image src
img_soup = bs(browser2.html, "html.parser")

img_src = img_soup.find("img", class_="main_image")["src"]
img_src = base_url + img_src
img_src



'https://www.jpl.nasa.gov/spaceimages/images/largesize/PIA24055_hires.jpg'

## Get Mars Data Table

In [86]:
import pandas as pd

data_table_url = "https://space-facts.com/mars/"
tables = pd.read_html(data_table_url)
mars_info_df = tables[0]
mars_info_df = mars_info_df.set_index(0)
mars_info_df.index.name = "Mars"
mars_info_df.columns = [""]
mars_info_df

Mars,Unnamed: 1
Equatorial Diameter:,"6,792 km"
Polar Diameter:,"6,752 km"
Mass:,6.39 × 10^23 kg (0.11 Earths)
Moons:,2 (Phobos & Deimos)
Orbit Distance:,"227,943,824 km (1.38 AU)"
Orbit Period:,687 days (1.9 years)
Surface Temperature:,-87 to -5 °C
First Record:,2nd millennium BC
Recorded By:,Egyptian astronomers


In [87]:
html_mars_table = mars_info_df.to_html()
print(html_mars_table)

<table border="1" class="dataframe">
  <thead>
    <tr style="text-align: right;">
      <th></th>
      <th></th>
    </tr>
    <tr>
      <th>Mars</th>
      <th></th>
    </tr>
  </thead>
  <tbody>
    <tr>
      <th>Equatorial Diameter:</th>
      <td>6,792 km</td>
    </tr>
    <tr>
      <th>Polar Diameter:</th>
      <td>6,752 km</td>
    </tr>
    <tr>
      <th>Mass:</th>
      <td>6.39 × 10^23 kg (0.11 Earths)</td>
    </tr>
    <tr>
      <th>Moons:</th>
      <td>2 (Phobos &amp; Deimos)</td>
    </tr>
    <tr>
      <th>Orbit Distance:</th>
      <td>227,943,824 km (1.38 AU)</td>
    </tr>
    <tr>
      <th>Orbit Period:</th>
      <td>687 days (1.9 years)</td>
    </tr>
    <tr>
      <th>Surface Temperature:</th>
      <td>-87 to -5 °C</td>
    </tr>
    <tr>
      <th>First Record:</th>
      <td>2nd millennium BC</td>
    </tr>
    <tr>
      <th>Recorded By:</th>
      <td>Egyptian astronomers</td>
    </tr>
  </tbody>
</table>


### Get Mars Hemisphere Photos

In [133]:
executable_path = {'executable_path': 'chromedriver.exe'}
browser3 = Browser('chrome', **executable_path, headless=False)

In [134]:
base_url = "https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars"
all_hemispheres = []

browser3.visit(base_url)
num_hemispheres = len(browser3.find_by_css(".thumb"))

for hemisphere_num in range(num_hemispheres):
    curr_title = browser3.find_by_tag("h3")[hemisphere_num].html.replace(" Enhanced", "")
    browser3.find_by_css(".thumb")[hemisphere_num].click()
    curr_img_url = browser3.find_by_text("Sample").first["href"]
    # print(curr_img_url)
    browser3.back()

    all_hemispheres.append({"title":curr_title, "img_url":curr_img_url})

browser3.windows[0].close_others()
print(all_hemispheres)
browser3.quit()

[{'title': 'Cerberus Hemisphere', 'img_url': 'https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/cerberus_enhanced.tif/full.jpg'}, {'title': 'Schiaparelli Hemisphere', 'img_url': 'https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/schiaparelli_enhanced.tif/full.jpg'}, {'title': 'Syrtis Major Hemisphere', 'img_url': 'https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/syrtis_major_enhanced.tif/full.jpg'}, {'title': 'Valles Marineris Hemisphere', 'img_url': 'https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/valles_marineris_enhanced.tif/full.jpg'}]
