In [1]:
from splinter import Browser
from bs4 import BeautifulSoup
import pandas as pd
import pymongo
from flask import Flask, render_template, redirect
from flask_pymongo import PyMongo
from webdriver_manager.chrome import ChromeDriverManager

In [2]:
# create the paths for chrome drivers
executable_path = {'executable_path': ChromeDriverManager().install()}
# load settings for the Browser
browser = Browser('chrome', **executable_path, headless = False)



Current google-chrome version is 93.0.4577
Get LATEST driver version for 93.0.4577
Driver [/Users/hsuehchen/.wdm/drivers/chromedriver/mac64/93.0.4577.63/chromedriver] found in cache


### Scraping Mars News

In [3]:
# mars news url
mars_news_url = 'https://mars.nasa.gov/news/'

# check the url
browser.visit(mars_news_url)

# load the html into the parser
news_html = browser.html
soup_news = BeautifulSoup(news_html, 'html.parser')


In [4]:
# retrieving latest news page title and body
news_title = soup_news.find('div', class_='list_text').find('a').text
news_body = soup_news.find('div', class_= 'article_teaser_body').text

print(f'''
      {news_title}
      ---
      {news_body}
      ''')


      NASA's Perseverance Rover Collects Puzzle Pieces of Mars' History
      ---
      The rocks it has analyzed for sample collection are helping the team better understand a past marked by volcanic activity and water.
      


### Scraping Featured Image Link

In [5]:
# base image url
image_base_path = 'https://spaceimages-mars.com/'

# check the html
browser.visit(image_base_path)

# load the html into the parser
html_img = browser.html
images_soup = BeautifulSoup(html_img, 'html.parser')

In [6]:
# retrieve featured image link
img_path = images_soup.find('img', class_ = 'headerimage fade-in')['src']
featured_image_url = image_base_path + img_path
featured_image_url

'https://spaceimages-mars.com/image/featured/mars2.jpg'

### Scrape Mars Fact Tables

In [48]:
# mars_fact_url
mar_fact_url = 'https://galaxyfacts-mars.com'
html_table_string = pd.read_html(mar_fact_url)

# html_tables = [df.columns = ['Description', 'Value'] for df in html_table_string]
# html_table_render_0 = html_tables[0].to_html()
# html_table_render_1 = html_tables[1].to_html()

mars_table = html_table_string[0].rename(columns = {0:'Description', 1:'Mars', 2:"Earth"}).set_index('Description').iloc[1:]
mars_table_html = mars_table.to_html()
mars_table_html

# html_table_render_1

### Scrape Hemisphere Information and Images

In [8]:
# retreive mars hemisphere information
hemisphere_url = 'https://marshemispheres.com/'

# check the html
browser.visit(hemisphere_url)

# load the html into the parser
html_hemis = browser.html
hem_soup = BeautifulSoup(html_hemis, 'html.parser')

In [9]:
# scrape html for list of hemisphere
hem_title = [title.find('h3').text for title in hem_soup.find_all('div', class_ = 'description')]

# scrap html for list of separate hemisphere url
hem_ref = [img_url.find('a')['href'] for img_url in hem_soup.find_all('div', class_ = 'description')]

# create a function to extract the indivisual url based on each hemisphere url
def img_url_parser(hem_url):
    hem_url = hemisphere_url +hem_url
    browser.visit(hem_url)
    hem_img_soup = BeautifulSoup(browser.html,'html.parser')
    img_url = hemisphere_url + hem_img_soup.find_all('li')[1].find('a')['href']
    return img_url

# apply the function to each itmes in the hemisphere url list
img_url = list(map(img_url_parser, hem_ref))

# create the list of hemisphere dictionaries
hemisphere_image_urls = []
if len(hem_title) == len(img_url):
    for index in range(len(hem_title)):
        hemisphere_image_urls.append(
        { 'title' : hem_title[index],
            'img_url': img_url[index]}
        )
        
hemisphere_image_urls

[{'title': 'Cerberus Hemisphere Enhanced',
  'img_url': 'https://marshemispheres.com/images/cerberus_enhanced.tif'},
 {'title': 'Schiaparelli Hemisphere Enhanced',
  'img_url': 'https://marshemispheres.com/images/schiaparelli_enhanced.tif'},
 {'title': 'Syrtis Major Hemisphere Enhanced',
  'img_url': 'https://marshemispheres.com/images/syrtis_major_enhanced.tif'},
 {'title': 'Valles Marineris Hemisphere Enhanced',
  'img_url': 'https://marshemispheres.com/images/valles_marineris_enhanced.tif'}]