In [16]:
from bs4 import BeautifulSoup
from splinter import Browser


In [17]:
def init_browser():
    # Replacing the path with your actual path to the chromedriver
    executable_path = {"executable_path": "/usr/local/bin/chromedriver"}
    return Browser("chrome", **executable_path, headless=False)


In [18]:
# NASA news URL
browser = init_browser()
url = "https://mars.nasa.gov/news/" 
browser.visit(url)

In [19]:
# Scrapping url page into Beautifulsoup

new_html = browser.html
bsoup = BeautifulSoup(new_html, 'html.parser')

In [20]:
# Getting most recent article, title and date

article = bsoup.find("div", class_="list_text")
news_p = article.find("div", class_="article_teaser_body").text
news_title = article.find("div", class_="content_title").text
news_date = article.find("div", class_="list_date").text
print(news_date)
print(news_title)
print(news_p)

February 25, 2019
InSight Lands Praise and a Proclamation from LA County
Several members of the Mars InSight team accepted a proclamation on behalf of the mission from L.A. County Board of Supervisors on Tuesday, Feb. 19.


In [21]:
# Accessing the JPL Mars URL

url2 = "https://jpl.nasa.gov/spaceimages/?search=&category=Mars" 
browser.visit(url2)

In [22]:
# Scraping the browser into soup and using soup to find Mars image
# Saving image url to a variable called `img_url`
new_html = browser.html
bsoup = BeautifulSoup(new_html, 'html.parser')
image = bsoup.find("img", class_="thumb")["src"]
img_url = "https://jpl.nasa.gov"+image
present_image_url = img_url

In [23]:
# Requesting library to download/save/display image from the above img_url

import requests
import shutil
response = requests.get(img_url, stream=True)
with open('img.jpg', 'wb') as out_file:
    shutil.copyfileobj(response.raw, out_file)

In [24]:
# Displaying image with IPython.display
from IPython.display import Image
Image(url='img.jpg')

In [25]:
# Visiting Mars Weather twitter/scraping the latest tweet.

twt_url = 'https://twitter.com/marswxreport?lang=en'

browser.visit(twt_url)

html = browser.html

soup = BeautifulSoup(html, 'html.parser')

twt = soup.find('p', class_="TweetTextSize TweetTextSize--normal js-tweet-text tweet-text").text

twt = twt.replace('pic.twitter.com/1msjBvhiu7','')

print(twt)


InSight sol 89 (2019-02-26) low -95.1ºC (-139.2ºF) high -14.4ºC (6.1ºF)
winds from the SW at 4.3 m/s (9.6 mph) gusting to 12.4 m/s (27.8 mph)
pressure at 7.20 hPapic.twitter.com/h8gODY5bfk


In [26]:
# Scraping table data Mars web page into Pandas

url3 = "http://space-facts.com/mars/"
browser.visit(url3)

In [27]:
# Populating data into a dataframe, cleaning and creating into an HTML table

import pandas as pd
grab = pd.read_html(url3)
mars_data = pd.DataFrame(grab[0])
mars_data.columns = ['Mars','Data']
mars_table = mars_data.set_index("Mars")
marsdata = mars_table.to_html(classes='marsdata')
marsdata = marsdata.replace('\n', ' ')
marsdata

'<table border="1" class="dataframe marsdata">   <thead>     <tr style="text-align: right;">       <th></th>       <th>Data</th>     </tr>     <tr>       <th>Mars</th>       <th></th>     </tr>   </thead>   <tbody>     <tr>       <th>Equatorial Diameter:</th>       <td>6,792 km</td>     </tr>     <tr>       <th>Polar Diameter:</th>       <td>6,752 km</td>     </tr>     <tr>       <th>Mass:</th>       <td>6.42 x 10^23 kg (10.7% Earth)</td>     </tr>     <tr>       <th>Moons:</th>       <td>2 (Phobos &amp; Deimos)</td>     </tr>     <tr>       <th>Orbit Distance:</th>       <td>227,943,824 km (1.52 AU)</td>     </tr>     <tr>       <th>Orbit Period:</th>       <td>687 days (1.9 years)</td>     </tr>     <tr>       <th>Surface Temperature:</th>       <td>-153 to 20 °C</td>     </tr>     <tr>       <th>First Record:</th>       <td>2nd millennium BC</td>     </tr>     <tr>       <th>Recorded By:</th>       <td>Egyptian astronomers</td>     </tr>   </tbody> </table>'

In [13]:
# Scraping pictures of the hemispheres from USGS Astrogeology web page

url4 = "https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1 =target&v1=Mars"
browser.visit(url4)

In [14]:
# Loading 4 images and load them into dictionary

import time
new_html = browser.html
bsoup = BeautifulSoup(new_html, 'html.parser')
mars_hemisphere=[]

for item in range (4):
    time.sleep(5)
    images = browser.find_by_tag('h3')
    images[item].click()
    new_html = browser.html
    bsoup = BeautifulSoup(new_html, 'html.parser')
    newContent = bsoup.find("img", class_="wide-image")["src"]
    img_title = bsoup.find("h2",class_="title").text
    img_url = 'https://astrogeology.usgs.gov'+newContent
    dict_list = {"title":img_title,"img_url":img_url}
    mars_hemisphere.append(dict_list)
    browser.back()
    
print(mars_hemisphere)


[{'title': 'Cerberus Hemisphere Enhanced', 'img_url': 'https://astrogeology.usgs.gov/cache/images/cfa62af2557222a02478f1fcd781d445_cerberus_enhanced.tif_full.jpg'}, {'title': 'Schiaparelli Hemisphere Enhanced', 'img_url': 'https://astrogeology.usgs.gov/cache/images/3cdd1cbf5e0813bba925c9030d13b62e_schiaparelli_enhanced.tif_full.jpg'}, {'title': 'Syrtis Major Hemisphere Enhanced', 'img_url': 'https://astrogeology.usgs.gov/cache/images/ae209b4e408bb6c3e67b6af38168cf28_syrtis_major_enhanced.tif_full.jpg'}, {'title': 'Valles Marineris Hemisphere Enhanced', 'img_url': 'https://astrogeology.usgs.gov/cache/images/7cf2da4bf549ed01c17f206327be4db7_valles_marineris_enhanced.tif_full.jpg'}]


In [15]:
print(mars_hemisphere[0]["img_url"])

https://astrogeology.usgs.gov/cache/images/cfa62af2557222a02478f1fcd781d445_cerberus_enhanced.tif_full.jpg
