In [None]:
# Dependencies
import pandas as pd
from bs4 import BeautifulSoup
from splinter import Browser
from webdriver_manager.chrome import ChromeDriverManager

In [None]:
# Set the chromedriver path
executable_path = {"executable_path": ChromeDriverManager().install()}
browser = Browser("chrome", **executable_path, headless=False)

In [None]:
# Load the Mars news site
news_url = 'https://redplanetscience.com'
browser.visit(news_url)

In [None]:
# Get the latest news headline and teaser
news_html = browser.html
news_soup = BeautifulSoup(news_html, 'html.parser')

date = news_soup.find('div', {'class': 'list_date'}).text
title = news_soup.find('div', {'class': 'content_title'}).text
teaser = news_soup.find('div', {'class': 'article_teaser_body'}).text

if (date and title and teaser):
    print(date)
    print(title)
    print(teaser)

In [None]:
# Load the Mars images site
images_url = 'https://spaceimages-mars.com'
browser.visit(images_url)

In [None]:
# Get the featured image url
images_html = browser.html
images_soup = BeautifulSoup(images_html, 'html.parser')

images_path = images_soup.find('img', {'class': 'headerimage fade-in'}).get('src')
featured_image_url = f'{images_url}/{images_path}'
print(featured_image_url)

In [None]:
# Load the Mars facts site
facts_url = 'https://galaxyfacts-mars.com'
browser.visit(facts_url)

In [None]:
# Scrape the Mars facts table 
tables = pd.read_html(facts_url)
tables

In [None]:
# Dump it into a dataframe
mars_facts_df = tables[0]
mars_facts_df.columns = mars_facts_df.iloc[0]
mars_facts_df = mars_facts_df.iloc[1:].reset_index(drop=True)
mars_facts_df

In [None]:
# Save it to an html file
mars_facts_df.to_html('mars_facts.html', index=False, index_names=False)

In [None]:
# Load the Mars hemispheres site
hemisphere_url = 'https://marshemispheres.com'
browser.visit(hemisphere_url)

In [None]:
hemisphere_html = browser.html
hemisphere_soup = BeautifulSoup(hemisphere_html,'html.parser')

In [None]:
results = hemisphere_soup.find_all('div', {'class': 'description'})

In [None]:
hemisphere_images = []

In [None]:
# Get high resolution images for each of Mar's hemispheres
for result in results:
    # Create a dictionary
    hemisphere_dict = {}
    
    items = result.find('div', {'class': 'description'})
    header = result.find('h3').text
    item_link = result.find('a', {'class': 'itemLink product-item'})['href']
    
    # click each hemisphere link to get the url to the full resolution image
    browser.links.find_by_partial_text(header).click()
    
    html = browser.html
    soup = BeautifulSoup(html, 'html.parser')
    
    rel_path = soup.find('img', {'class': 'wide-image'})['src']
    abs_path = f'{hemisphere_url}/{rel_path}'
    
    browser.visit(hemisphere_url)
    
    # Use the dictionary
    hemisphere_dict['title'] = header
    hemisphere_dict['img_url'] = abs_path
    
    # Append the dictionary
    hemisphere_images.append(hemisphere_dict)

hemisphere_images

In [None]:
browser.quit()