## Step 1 - Scraping

In [1]:
#Dependencies
from bs4 import BeautifulSoup as bs
import requests as req
from splinter import Browser
import pandas as pd

### NASA Mars News

In [2]:
#Obtain the news title & description paragraph from URL

url = 'https://mars.nasa.gov/news/?page=0&per_page=40&order=publish_date+desc%2Ccreated_at+desc&search=&category=19%2C165%2C184%2C204&blank_scope=Latest'

response = req.get(url)
soup = bs(response.text, 'html.parser')

#Obtaining the news title
news_title = soup.find_all('div', class_='content_title')[0].find('a').text.strip()
news_title

'NASA to Broadcast Mars 2020 Perseverance Launch, Prelaunch Activities'

In [3]:
#Obtaining the paragraph part
news_p = soup.find_all('div', class_='rollover_description_inner')[0].text.strip()
news_p

'Starting July 27, news activities will cover everything from mission engineering and science to returning samples from Mars to, of course, the launch itself.'

### JPL Mars Space Images - Featured Image

In [4]:
#Using chromedriver
exe_path = {'executable_path': 'chromedriver.exe'}
browser = Browser('chrome', **exe_path, headless=False)

urli = 'https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars'
browser.visit(urli)

#URL data
html = browser.html
soup2 = bs(html, "html.parser")

#Getting the partial address of image url
partial_address = soup2.find_all('a', class_='fancybox')[0].get('data-fancybox-href').strip()

#the image URL
featured_image_url = "https://www.jpl.nasa.gov"+partial_address

print(featured_image_url)
browser.visit(featured_image_url)

https://www.jpl.nasa.gov/spaceimages/images/mediumsize/PIA19036_ip.jpg


### Mars Facts

In [5]:
urlf = 'https://space-facts.com/mars/'

#tables, dataframes part
tables = pd.read_html(urlf)

df = tables[0]

#Assigning columns
df.columns = ['description','value']

#Replace
df.set_index('description', inplace=True)

#Saving dataframe as html
mars_facts = df.to_html(index = True, header =True)
mars_facts

'<table border="1" class="dataframe">\n  <thead>\n    <tr style="text-align: right;">\n      <th></th>\n      <th>value</th>\n    </tr>\n    <tr>\n      <th>description</th>\n      <th></th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>Equatorial Diameter:</th>\n      <td>6,792 km</td>\n    </tr>\n    <tr>\n      <th>Polar Diameter:</th>\n      <td>6,752 km</td>\n    </tr>\n    <tr>\n      <th>Mass:</th>\n      <td>6.39 × 10^23 kg (0.11 Earths)</td>\n    </tr>\n    <tr>\n      <th>Moons:</th>\n      <td>2 (Phobos &amp; Deimos)</td>\n    </tr>\n    <tr>\n      <th>Orbit Distance:</th>\n      <td>227,943,824 km (1.38 AU)</td>\n    </tr>\n    <tr>\n      <th>Orbit Period:</th>\n      <td>687 days (1.9 years)</td>\n    </tr>\n    <tr>\n      <th>Surface Temperature:</th>\n      <td>-87 to -5 °C</td>\n    </tr>\n    <tr>\n      <th>First Record:</th>\n      <td>2nd millennium BC</td>\n    </tr>\n    <tr>\n      <th>Recorded By:</th>\n      <td>Egyptian astronomers</td>\n    </tr>\

### Mars Hemispheres

In [6]:
#Use chromedriver
exe_path = {'executable_path': 'chromedriver.exe'}
browser = Browser('chrome', **exe_path, headless=False)

#URL for this
urlh = 'https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars'
browser.visit(urlh)

#URL data
html = browser.html
soup3 = bs(html,"html.parser")

#images URLs
hemisphere_image_urls = []

all_images = soup3.find("div", class_ = "result-list" )
results = all_images.find_all("div", class_="item")

#Looping
for hemisphere in results:
    
    #Get the title
    title = hemisphere.find("h3").text
    title = title.replace("Enhanced", "")
    
    #URL image
    end_link = hemisphere.find("a")["href"]
    
    image_link = "https://astrogeology.usgs.gov/" + end_link    
    browser.visit(image_link)
    
    #data
    html = browser.html
    soup4 = bs(html, "html.parser")
    
    #full size image
    downloads = soup4.find("div", class_="downloads")
    image_url = downloads.find("a")["href"]
    
    hemisphere_image_urls.append({"title": title, "img_url": image_url})

hemisphere_image_urls

[{'title': 'Cerberus Hemisphere ',
  'img_url': 'https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/cerberus_enhanced.tif/full.jpg'},
 {'title': 'Schiaparelli Hemisphere ',
  'img_url': 'https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/schiaparelli_enhanced.tif/full.jpg'},
 {'title': 'Syrtis Major Hemisphere ',
  'img_url': 'https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/syrtis_major_enhanced.tif/full.jpg'},
 {'title': 'Valles Marineris Hemisphere ',
  'img_url': 'https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/valles_marineris_enhanced.tif/full.jpg'}]

### Step 2 - MongoDB and Flask Application

Use MongoDB with Flask templating to create a new HTML page that displays all of the information that was scraped from the URLs above.

•	Start by converting your Jupyter notebook into a Python script called scrape_mars.py with a function called scrape that will execute all of your scraping code from above and return one Python dictionary containing all of the scraped data.

•	Next, create a route called /scrape that will import your scrape_mars.py script and call your scrape function.

•	Store the return value in Mongo as a Python dictionary.

•	Create a root route / that will query your Mongo database and pass the mars data into an HTML template to display the data.

•	Create a template HTML file called index.html that will take the mars data dictionary and display all of the data in the appropriate HTML elements. Use the following as a guide for what the final product should look like, but feel free to create your own design.
