# Webscraping the following websites for news on upcoming mission to mars:
https://redplanetscience.com/
https://spaceimages-mars.com
https://galaxyfacts-mars.com
https://marshemispheres.com/

In [1]:
# Import modules for use in webscraping:
import pandas as pd
from bs4 import BeautifulSoup as bs
from splinter import Browser
from webdriver_manager.chrome import ChromeDriverManager

In [2]:
# Setup for working with Browser:
executable_path = {'executable_path': ChromeDriverManager().install()}
browser = Browser('chrome', **executable_path, headless=False)




Current google-chrome version is 91.0.4472
Get LATEST driver version for 91.0.4472
Driver [C:\Users\pippe\.wdm\drivers\chromedriver\win32\91.0.4472.19\chromedriver.exe] found in cache


## Latest News - Mars Mission

In [3]:
# URL for news on Mars Mission
rps_url = "https://redplanetscience.com/"

# Use Browser to pull html data and use beautiful soup to parse the data
browser.visit(rps_url)
rps_html = browser.html
rps_soup = bs(rps_html, "html.parser")

In [4]:
# Search parsed soup file for latest news title and snippet
news_title = rps_soup.find("div", class_="content_title").text
news_teaser = rps_soup.find("div", class_ = "article_teaser_body").text
print(news_title)
print(news_teaser)

What's Mars Solar Conjunction, and Why Does It Matter?
NASA spacecraft at Mars are going to be on their own for a few weeks when the Sun comes between Mars and Earth, interrupting communications.


## Capture Mars image

In [5]:
# URL for JPL site housing image of Mars
jpl_url = "https://spaceimages-mars.com/"

# Use Browser to pull html data and use beautiful soup to parse the data
browser.visit(jpl_url)
jpl_html = browser.html
jpl_soup = bs(jpl_html, "html.parser")

In [6]:
# Search parsed soup file for html containing Mars image
jpl_find_img = jpl_soup.find_all("img", "headerimage")

# Loop through header data to find the url link of of the image
for img in jpl_find_img:
    jpl_img = img["src"]

# Establish variable to hold the image url    
featured_image_url = jpl_url + jpl_img
print(featured_image_url) 

https://spaceimages-mars.com/image/featured/mars1.jpg


## Mars Facts

In [18]:
# URL for facts about Mars
facts_url = "https://galaxyfacts-mars.com"

# Read html from url into variable
table = pd.read_html(facts_url)

In [19]:
# Create data frame from html data
facts_df = table[0]

# Convert first row to column headers
header_row = 0
facts_df.columns = facts_df.iloc[header_row]
facts_df = facts_df.drop(header_row)

# Rename first column
facts_df=facts_df.rename(columns = {'Mars - Earth Comparison':'Description'})

# Set index to first column
facts_df.set_index("Description", inplace = True)

# Convert dataframe to html
facts_table = facts_df.to_html()

# Remove new line code from table
facts_table = facts_table.replace("\n", " ")

# Create html file from dataframe:
facts_df.to_html("facts_html", index=False)
facts_df

Unnamed: 0_level_0,Mars,Earth
Description,Unnamed: 1_level_1,Unnamed: 2_level_1
Diameter:,"6,779 km","12,742 km"
Mass:,6.39 × 10^23 kg,5.97 × 10^24 kg
Moons:,2,1
Distance from Sun:,"227,943,824 km","149,598,262 km"
Length of Year:,687 Earth days,365.24 days
Temperature:,-87 to -5 °C,-88 to 58°C


## Mars Hemispheres

In [20]:
# URL for images of Mars hemispheres
hem_url = "https://marshemispheres.com/"

# Use Browser to pull html data and use beautiful soup to parse the data
browser.visit(hem_url)
hem_html = browser.html
hem_soup = bs(hem_html, "html.parser")

In [21]:
# Search soup file for section containing hemisphere titles and html's for images
hem_find = hem_soup.find_all("div", class_ = "item")

# Setup for loop to pull the hemisphere titles from H3 header data
# For loop pulls html links for each hemisphere's page
# Image link from each hemisphere page is pulled
# Hemisphere title and image url are stored in a dictionary
hemisphere_image_urls = []

for item in hem_find:
    title = item.find("h3").text
    link = item.find("a", class_ = "itemLink")["href"]
    hemi_url = hem_url + link
    browser.visit(hemi_url)
    hemi_url_html = browser.html
    hemi_soup = bs(hemi_url_html, "html.parser")
    img = hem_url + hemi_soup.find("img", class_ = "wide-image")["src"]
    hemisphere_image_urls.append({"img_url": img, "title": title})
 
print(hemisphere_image_urls)


[{'img_url': 'https://marshemispheres.com/images/f5e372a36edfa389625da6d0cc25d905_cerberus_enhanced.tif_full.jpg', 'title': 'Cerberus Hemisphere Enhanced'}, {'img_url': 'https://marshemispheres.com/images/3778f7b43bbbc89d6e3cfabb3613ba93_schiaparelli_enhanced.tif_full.jpg', 'title': 'Schiaparelli Hemisphere Enhanced'}, {'img_url': 'https://marshemispheres.com/images/555e6403a6ddd7ba16ddb0e471cadcf7_syrtis_major_enhanced.tif_full.jpg', 'title': 'Syrtis Major Hemisphere Enhanced'}, {'img_url': 'https://marshemispheres.com/images/b3c7c6c9138f57b4756be9b9c43e3a48_valles_marineris_enhanced.tif_full.jpg', 'title': 'Valles Marineris Hemisphere Enhanced'}]


In [22]:
browser.quit()