# Mission to Mars

### Step 1: Scraping

In [55]:
# import dependencies
import pandas as pd
from bs4 import BeautifulSoup as bs
import requests

#### a) scrape recent news title and paragraph text from NASA's news site

In [56]:
# use local NASA file
local_nasa_file = "News_NASA_Mars_Exploration_Program.html"
nasa_html = open(local_nasa_file, "r").read()
#use bs to write into html
news_soup = bs(nasa_html, "html.parser")
#news_soup

In [57]:
# find latest title
news_title = news_soup.find("div", class_="content_title").text
#print(news_title)

In [58]:
# find latest paragraph
news_paragraph = news_soup.find("div", class_="rollover_description").text
#print(news_paragraph)

#### b) scrape featured image from JPL Mars Space Images

In [59]:
# define page to be scraped
image_url = "https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars"

# retrieve page with requests module
response = requests.get(image_url)

# create bs object; parse with "lxm"
image_soup = bs(response.text, "lxml") 

# examine result to determine element that contains sought info
result = image_soup.find("div", class_="carousel_items") 
#result

# use element to find partial url to add to root of image_url
partial_url = image_soup.find_all("a", class_="fancybox")[0].get("data-fancybox-href")

# combine partial url with root url
combined_image_url = "https://www.jpl.nasa.gov" + partial_url

# print combined image url
print(combined_image_url)

https://www.jpl.nasa.gov/spaceimages/images/mediumsize/PIA18846_ip.jpg


#### c) scrape latest weather report from mars weather twitter account

In [60]:
# define page to be scraped
weather_url = "https://twitter.com/marswxreport?lang=en"

# retrieve page with requests module
response = requests.get(weather_url)

# create bs object; parse with "lxml"
weather_soup = bs(response.text, "lxml") 

# examine result to determine element that contains sought info
result = weather_soup.find("div", attrs={"class": "tweet", "data-name": "Mars Weather"})
#result

mars_weather = weather_soup.find("p", class_="TweetTextSize TweetTextSize--normal js-tweet-text tweet-text").text
print(mars_weather)

Sol 2242 (2018-11-26), high -2C/28F, low -70C/-93F, pressure at 8.48 hPa, daylight 06:29-18:45


#### d) scrape table of facts about mars from space-facts website

In [61]:
# define page to be scraped
facts_url = "https://space-facts.com/mars/"

# use pandas to read url table
facts_table = pd.read_html(facts_url)
#facts_table[0]

# convert table into pandas df
facts_df = facts_table[0]

# change column names
facts_df.columns=["Measure","Value"]

# set index to Measure column
facts_df.set_index("Measure", inplace=True)

# convert and save df as html file
facts_df.to_html("facts_table.html")

# display table in notebook
facts_df

Unnamed: 0_level_0,Value
Measure,Unnamed: 1_level_1
Equatorial Diameter:,"6,792 km"
Polar Diameter:,"6,752 km"
Mass:,6.42 x 10^23 kg (10.7% Earth)
Moons:,2 (Phobos & Deimos)
Orbit Distance:,"227,943,824 km (1.52 AU)"
Orbit Period:,687 days (1.9 years)
Surface Temperature:,-153 to 20 °C
First Record:,2nd millennium BC
Recorded By:,Egyptian astronomers


#### e) scrape images of Mar's hemispheres from USGS Astrogeology site

In [66]:
# define page to be scraped
hemispheres_url = "https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars"
# retrieve page with requests module
response = requests.get(hemispheres_url)
# create bs object; parse with "lxml"
hemispheres_soup = bs(response.text, "lxml") 
#print(hemispheres_soup)
# examine results to determine element that contains sought info
results = hemispheres_soup.find_all("div", class_="item")

# create list for all hemisphere titles and image urls
hem3_list = []

# loop through each hemisphere and add title and image link to dictionary then to list
for result in results: 
    hem_dict = {} 
    
    # find and add title
    hem_title = result.find('h3').text.replace(' Enhanced', '') 
    # find and add initial image url
    hem_link = result.find('a', class_='itemLink').get('href')
    hemispheres_url2 = "https://astrogeology.usgs.gov" + hem_link
    response2 = requests.get(hemispheres_url2)
    hemispheres_soup2 = bs(response2.text, "lxml")
    result2 = hemispheres_soup2.find("img", class_="wide-image").get("src")
    hem_link2 =  "https://astrogeology.usgs.gov" + result2
    
    hem_dict['img_url'] = hem_link2
    hem_dict['title'] = hem_title
    # Append dictionaries to list
    hem3_list.append(hem_dict)    

#    print(hem3_list)