# Step 1 - Scraping

In [1]:
# import dependencies
import pandas as pd
from splinter import Browser
from bs4 import BeautifulSoup
from webdriver_manager.chrome import ChromeDriverManager

In [2]:
# Setup splinter
executable_path = {'executable_path': ChromeDriverManager().install()}
browser = Browser('chrome', **executable_path, headless=False)




[WDM] - Current google-chrome version is 102.0.5005
[WDM] - Get LATEST chromedriver version for 102.0.5005 google-chrome
[WDM] - Driver [/Users/solo/.wdm/drivers/chromedriver/mac64_m1/102.0.5005.61/chromedriver] found in cache


# Scrape NASA Mars News
### collect the latest News Title and Paragraph Text.

In [3]:
nasa_url = 'https://mars.nasa.gov/news/'
browser.visit(nasa_url)
html = browser.html


In [4]:

# --- parse HTML with BeautifulSoup ---
soup = BeautifulSoup(html, 'html.parser')


# collect the latest News Title and Paragraph Text
# Inspect the page to find news title
# Under <li class slide and div class content is the title text.

first_li = soup.find('li', class_='slide')

# --- save the news title under the <div> tag with a class of 'content_title' ---
news_title = first_li.find('div', class_='content_title').text
print(news_title)

# --- save the paragraph text under the <div> tag with a class of 'article_teaser_body' ---
news_para = first_li.find('div', class_='article_teaser_body').text
print(news_para)







NASA, Partner Establish New Research Group for Mars Sample Return Program
Sixteen scientists from the U.S., Europe, Canada, and Japan have been chosen to help future samples from the Red Planet achieve their full potential.


In [5]:
# browser.quit()



# JPL Mars Space Images - Featured Image
### find the image url for the current Featured Mars Image
#### save a complete url string

In [6]:
featured_image_url = 'https://spaceimages-mars.com/image/featured/mars2.jpg'
browser.visit(featured_image_url)
html = browser.html

# Mars Facts

### scrape the table containing facts about the planet including Diameter, Mass, etc.

In [7]:
# https://galaxyfacts-mars.com/
# --- visit the Mars Weather twitter account ---
Marsfacts_url = 'https://galaxyfacts-mars.com/'
browser.visit(Marsfacts_url)


# --- create HTML object ---
html = browser.html

# --- parse HTML with BeautifulSoup ---
soup = BeautifulSoup(html, 'html.parser')


In [8]:
# --- use Pandas to scrape table of facts ---
table = pd.read_html(html)

# --- use indexing to slice the table to a dataframe ---
facts_df = table[0]
facts_df.columns = ['Description', 'Mars', 'Earth']


facts_df


Unnamed: 0,Description,Mars,Earth
0,Mars - Earth Comparison,Mars,Earth
1,Diameter:,"6,779 km","12,742 km"
2,Mass:,6.39 × 10^23 kg,5.97 × 10^24 kg
3,Moons:,2,1
4,Distance from Sun:,"227,943,824 km","149,598,262 km"
5,Length of Year:,687 Earth days,365.24 days
6,Temperature:,-87 to -5 °C,-88 to 58°C


In [9]:
df1=facts_df.drop([0])
mars_df=df1.drop(columns=['Earth'])
mars_df


Unnamed: 0,Description,Mars
1,Diameter:,"6,779 km"
2,Mass:,6.39 × 10^23 kg
3,Moons:,2
4,Distance from Sun:,"227,943,824 km"
5,Length of Year:,687 Earth days
6,Temperature:,-87 to -5 °C


## convert the data to a HTML table string



In [10]:
# --- convert the dataframe to a HTML table and save to html file ---

mars_table = mars_df.to_html('marsfacts.html', border="1",justify="left")



# Mars Hemispheres




In [11]:
# https://marshemispheres.com/
# obtain high resolution images for each of Mar's hemispheres


# --- visit the Mars Hemisphere website ---
MarsHemi_url = 'https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars'
browser.visit(MarsHemi_url)


# --- create HTML object ---
html = browser.html

# --- parse HTML with BeautifulSoup ---
soup = BeautifulSoup(html, 'html.parser')


In [12]:
# --- retrieve all the parent div tags for each hemisphere --- 
hemisphere_divs = soup.find_all('div', class_="item")

# --- create an empty list to store the python dictionary ---
hemisphere_image_urls = []

# --- loop through each div item to get hemisphere data ---
for hemisphere in range(len(hemisphere_divs)):

    # --- use splinter's browser to click on each hemisphere's link in order to retrieve image data ---
    hem_link = browser.find_by_css("a.product-item h3")
    hem_link[hemisphere].click()
   
    
    # --- create a beautiful soup object with the image detail page's html ---
    img_detail_html = browser.html
    imagesoup = BeautifulSoup(img_detail_html, 'html.parser')
    
    # --- create the base url for the fullsize image link ---
    base_url = 'https://astrogeology.usgs.gov'
    
    # --- retrieve the full-res image url and save into a variable ---
    hem_url = imagesoup.find('img', class_="wide-image")['src']
    
    # --- complete the featured image url by adding the base url ---
    img_url = base_url + hem_url

    # --- retrieve the image title using the title class and save into variable ---
    img_title = browser.find_by_css('.title').text
    
    # --- add the key value pairs to python dictionary and append to the list ---
    hemisphere_image_urls.append({"title": img_title,
                              "img_url": img_url})
    
    # --- go back to the main page ---
    browser.back()
    
# --- close the browser session ---    
browser.quit()

hemisphere_image_urls



[{'title': 'Cerberus Hemisphere Enhanced',
  'img_url': 'https://astrogeology.usgs.gov/cache/images/f5e372a36edfa389625da6d0cc25d905_cerberus_enhanced.tif_full.jpg'},
 {'title': 'Schiaparelli Hemisphere Enhanced',
  'img_url': 'https://astrogeology.usgs.gov/cache/images/3778f7b43bbbc89d6e3cfabb3613ba93_schiaparelli_enhanced.tif_full.jpg'},
 {'title': 'Syrtis Major Hemisphere Enhanced',
  'img_url': 'https://astrogeology.usgs.gov/cache/images/555e6403a6ddd7ba16ddb0e471cadcf7_syrtis_major_enhanced.tif_full.jpg'},
 {'title': 'Valles Marineris Hemisphere Enhanced',
  'img_url': 'https://astrogeology.usgs.gov/cache/images/b3c7c6c9138f57b4756be9b9c43e3a48_valles_marineris_enhanced.tif_full.jpg'}]

In [13]:
# # Example:
# hemisphere_image_urls = [
#     {"title": "Valles Marineris Hemisphere", "img_url": "..."},
#     {"title": "Cerberus Hemisphere", "img_url": "..."},
#     {"title": "Schiaparelli Hemisphere", "img_url": "..."},
#     {"title": "Syrtis Major Hemisphere", "img_url": "..."},
# ]


# Step 2 - MongoDB and Flask Application