In [1]:
from bs4 import BeautifulSoup as bs
from splinter import Browser
import pandas as pd
import requests

In [2]:
#starting the chrome browser
executable_path = {'executable_path': 'chromedriver.exe'}
browser = Browser('chrome', **executable_path, headless=False)

In [3]:
#NASA Mars News Site
url = "https://mars.nasa.gov/news/"
browser.visit(url)
# storing the content in a variable
html = browser.html
# parse the elements of html using beautiful soup
soup = bs(html, 'html.parser')

In [4]:
# collect the latest News Title and Paragraph Text
title = soup.find("div", class_="content_title").text
paragraph = soup.find("div", class_="article_teaser_body").text

In [5]:
# use splinter functions https://splinter.readthedocs.io/en/latest/api/driver-and-element-api.html
base_url = 'https://www.jpl.nasa.gov'
mars_url = base_url + '/spaceimages/?search=&category=Mars'
browser.visit(mars_url)
browser.is_text_present('Full IMAGE')
browser.click_link_by_partial_text('FULL IMAGE')

In [9]:
image_html = browser.html
image_bs = bs(image_html, 'lxml')

In [10]:
image_url = image_bs.find('div', class_='fancybox-inner').img['src']
image_url

'/spaceimages/images/mediumsize/PIA18182_ip.jpg'

In [11]:
featured_image_url = base_url + image_url
featured_image_url

'https://www.jpl.nasa.gov/spaceimages/images/mediumsize/PIA18182_ip.jpg'

In [12]:
# scrape mars weather info from official twitter account page
mars_weather_url = 'https://twitter.com/marswxreport?lang=en'
browser.visit(mars_weather_url)

In [13]:
mars_weather_html = browser.html

In [14]:
mars_weather_bs = bs(mars_weather_html, 'lxml')

In [15]:
# mars_weather = mars_weather_soup.find('p', class_='tweet-text').text.replace('\n', '').split("pic")[0]
mars_weather = mars_weather_bs.find('p', class_='tweet-text').text.split("pic")[0]
mars_weather

'InSight sol 141 (2019-04-20) low -98.3ºC (-144.9ºF) high -19.7ºC (-3.5ºF)\nwinds from the SW at 4.7 m/s (10.6 mph) gusting to 12.9 m/s (28.8 mph)\npressure at 7.40 hPa'

In [16]:
# URL for mars facts table
mars_facts_url = 'https://space-facts.com/mars/'

In [17]:
mars_facts = pd.read_html(mars_facts_url)
mars_facts

[                      0                              1
 0  Equatorial Diameter:                       6,792 km
 1       Polar Diameter:                       6,752 km
 2                 Mass:  6.42 x 10^23 kg (10.7% Earth)
 3                Moons:            2 (Phobos & Deimos)
 4       Orbit Distance:       227,943,824 km (1.52 AU)
 5         Orbit Period:           687 days (1.9 years)
 6  Surface Temperature:                  -153 to 20 °C
 7         First Record:              2nd millennium BC
 8          Recorded By:           Egyptian astronomers]

In [18]:
#Extracting only the relevant information and dropping the rest
df = mars_facts[0]
df.columns = ['Names', 'Value']
df.drop

<bound method DataFrame.drop of                   Names                          Value
0  Equatorial Diameter:                       6,792 km
1       Polar Diameter:                       6,752 km
2                 Mass:  6.42 x 10^23 kg (10.7% Earth)
3                Moons:            2 (Phobos & Deimos)
4       Orbit Distance:       227,943,824 km (1.52 AU)
5         Orbit Period:           687 days (1.9 years)
6  Surface Temperature:                  -153 to 20 °C
7         First Record:              2nd millennium BC
8          Recorded By:           Egyptian astronomers>

In [19]:
# convert data frame to html
mars_facts_html = df.to_html(index=False)
mars_facts_html

'<table border="1" class="dataframe">\n  <thead>\n    <tr style="text-align: right;">\n      <th>Names</th>\n      <th>Value</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <td>Equatorial Diameter:</td>\n      <td>6,792 km</td>\n    </tr>\n    <tr>\n      <td>Polar Diameter:</td>\n      <td>6,752 km</td>\n    </tr>\n    <tr>\n      <td>Mass:</td>\n      <td>6.42 x 10^23 kg (10.7% Earth)</td>\n    </tr>\n    <tr>\n      <td>Moons:</td>\n      <td>2 (Phobos &amp; Deimos)</td>\n    </tr>\n    <tr>\n      <td>Orbit Distance:</td>\n      <td>227,943,824 km (1.52 AU)</td>\n    </tr>\n    <tr>\n      <td>Orbit Period:</td>\n      <td>687 days (1.9 years)</td>\n    </tr>\n    <tr>\n      <td>Surface Temperature:</td>\n      <td>-153 to 20 °C</td>\n    </tr>\n    <tr>\n      <td>First Record:</td>\n      <td>2nd millennium BC</td>\n    </tr>\n    <tr>\n      <td>Recorded By:</td>\n      <td>Egyptian astronomers</td>\n    </tr>\n  </tbody>\n</table>'

In [20]:
# writing dataframe to html file - it would be nice to flatten column head into same row though
with open('mars_facts.html', 'w') as mars:
    df.to_html(mars)

In [21]:
hemisphere_base_url = 'https://astrogeology.usgs.gov'
hemisphere_url = 'https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars'
browser.visit(hemisphere_url)

In [22]:
hemisphere_html = browser.html
hemisphere_bs = bs(hemisphere_html, 'html.parser')
contents = hemisphere_bs.find_all('div', class_="item")
hemisphere_image_dict = []

In [23]:
for content in contents:
    title = content.find('h3').text
    image_url = content.find('a', class_='itemLink product-item')['href']
    browser.visit(hemisphere_base_url + image_url)
    image_html = browser.html
    soup = bs(image_html, 'html.parser')
    image_url = hemisphere_base_url + soup.find('img', class_='wide-image')['src']
    hemisphere_image_dict.append(
        {
            "title":title,
            "img_url": image_url
        }
    )

hemisphere_image_dict

[{'title': 'Cerberus Hemisphere Enhanced',
  'img_url': 'https://astrogeology.usgs.gov/cache/images/cfa62af2557222a02478f1fcd781d445_cerberus_enhanced.tif_full.jpg'},
 {'title': 'Schiaparelli Hemisphere Enhanced',
  'img_url': 'https://astrogeology.usgs.gov/cache/images/3cdd1cbf5e0813bba925c9030d13b62e_schiaparelli_enhanced.tif_full.jpg'},
 {'title': 'Syrtis Major Hemisphere Enhanced',
  'img_url': 'https://astrogeology.usgs.gov/cache/images/ae209b4e408bb6c3e67b6af38168cf28_syrtis_major_enhanced.tif_full.jpg'},
 {'title': 'Valles Marineris Hemisphere Enhanced',
  'img_url': 'https://astrogeology.usgs.gov/cache/images/7cf2da4bf549ed01c17f206327be4db7_valles_marineris_enhanced.tif_full.jpg'}]