In [1]:
# Dependencies
import requests
from splinter import Browser
from bs4 import BeautifulSoup
from webdriver_manager.chrome import ChromeDriverManager
import pandas as pd
import pymongo

In [2]:
# Configure ChromeDriver / Setup Splinter
executable_path = {'executable_path': ChromeDriverManager().install()}
browser = Browser('chrome', **executable_path, headless=False)

[WDM] - Current google-chrome version is 87.0.4280
[WDM] - Get LATEST driver version for 87.0.4280
[WDM] - Driver [C:\Users\brook\.wdm\drivers\chromedriver\win32\87.0.4280.88\chromedriver.exe] found in cache


 


## Step 1 - Scraping

### A. NASA Mars News

In [3]:
# URL of page to be scraped and Configure Splinter
# URL = universal resource locator (FYI)
url = 'https://mars.nasa.gov/news'
browser.visit(url)

In [4]:
# Capture HTML from URL
html = browser.html
# print(html)

In [5]:
# Parse HTML with BeautifulSoup
news_soup = BeautifulSoup(html, 'html.parser')
# print(news_soup)

In [6]:
# Refine search to just the area on the web page with target news articles
article_container = news_soup.find('ul', class_='item_list')
# print(article_container)

In [7]:
article_date = article_container.find('div', class_='list_date').text
article_title = article_container.find('div', class_='content_title').text
article_summary = article_container.find('div', class_='article_teaser_body').text.strip()

print(f'Date:    {article_date}')
print(f'Title:   {article_title}')
print(f'Summary: {article_summary}')

Date:    December 22, 2020
Title:   A Martian Roundtrip: NASA's Perseverance Rover Sample Tubes
Summary: Marvels of engineering, the rover's sample tubes must be tough enough to safely bring Red Planet samples on the long journey back to Earth in immaculate condition.


#### Convert above code into a function named "mars_news()"

In [8]:
def mars_news():
    
    url = 'https://mars.nasa.gov/news'
    browser.visit(url)
    
    html = browser.html
    news_soup = BeautifulSoup(html, 'html.parser')
    
    article_container = news_soup.find('ul', class_='item_list')
    
    article_date = article_container.find('div', class_='list_date').text
    article_title = article_container.find('div', class_='content_title').text
    article_summary = article_container.find('div', class_='article_teaser_body').text.strip()
    
    return article_date, article_title, article_summary

mars_news()

('December 22, 2020',
 "A Martian Roundtrip: NASA's Perseverance Rover Sample Tubes",
 "Marvels of engineering, the rover's sample tubes must be tough enough to safely bring Red Planet samples on the long journey back to Earth in immaculate condition.")

### B1. JPL Space Images - Featured Image

In [9]:
# URL of page to be scraped and Configure Splinter
# URL = universal resource locator (FYI)
url = 'https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars'
browser.visit(url)

In [10]:
# Capture HTML from URL
html = browser.html
# print(html)

In [11]:
# Parse HTML with BeautifulSoup
featured_image_soup_1 = BeautifulSoup(html, 'html.parser')
# print(featured_image_soup)

##### Featured Image Title

In [12]:
featured_image_title = featured_image_soup_1.find('h1', 'media_feature_title').text.strip()
print(f'Featured Image Title: {featured_image_title}')

Featured Image Title: Dark Side of the Moon: Enceladus


![MongoDB mars_app (database) mars (collection)](project_images/feature_title.png)

##### Method 1

In [13]:
# Refine search to just the area on the web page with target image
# In this case, the image is referenced from within the style attribute in the article tag (as a background image)
featured_image_element_m1 = featured_image_soup_1.find('article', class_='carousel_item')['style']
print(featured_image_element_m1)

background-image: url('/spaceimages/images/wallpaper/PIA18328-1920x1200.jpg');


In [14]:
# The URL is encased in HTML code, which we remove (parse) for our purposes
# Also, it is a relative URL which we supplement by re-attaching the root URL
image_url_m1 = featured_image_element_m1.replace("background-image: url('", '')
image_url_m1 = image_url_m1.replace("');", '')
image_url_m1 = f'https://www.jpl.nasa.gov{image_url_m1}'
print(image_url_m1)

https://www.jpl.nasa.gov/spaceimages/images/wallpaper/PIA18328-1920x1200.jpg


![MongoDB mars_app (database) mars (collection)](project_images/article.carousel_item.png)

##### Method 2

In [15]:
# Use Splinter to click the Full Image button which opens a new web page
full_image_element = browser.find_by_id('full_image')[0]
full_image_element.click()

![MongoDB mars_app (database) mars (collection)](project_images/button-fancy-box.png)

In [16]:
# Splinter opened a new web page (above); hence, we are also required to capture the HTML of this newly opened page (URL)
html = browser.html
# print(html)

In [17]:
# Parse HTML with BeautifulSoup
featured_image_soup_2 = BeautifulSoup(html, 'html.parser')
# print(featured_image_soup_2)

In [18]:
# Zero in on the relative URL via "src=" in HTML
# Also, it is a relative URL which we supplement by re-attaching the root URL
try:   
    image_url_m2 = featured_image_soup_2.find('img', class_='fancybox-image')['src']
    image_url_m2 = f'https://www.jpl.nasa.gov{image_url_m2}'
    print(image_url_m2)
except Exception as e: 
    print(e)

https://www.jpl.nasa.gov/spaceimages/images/mediumsize/PIA18328_ip.jpg


![MongoDB mars_app (database) mars (collection)](project_images/fancy-box2.png)

#### Convert above code into a function named "featured_image()" using Method 1 (which returned the larger image)

In [19]:
def featured_image():
    
    url = 'https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars'
    browser.visit(url)
    
    html = browser.html
    featured_image_soup_1 = BeautifulSoup(html, 'html.parser')
    
    featured_image_title = featured_image_soup_1.find('h1', 'media_feature_title').text.strip()
    
    featured_image_element_m1 = featured_image_soup_1.find('article', class_='carousel_item')['style']
    image_url_m1 = featured_image_element_m1.replace("background-image: url('", '')
    image_url_m1 = image_url_m1.replace("');", '')
    image_url_m1 = f'https://www.jpl.nasa.gov{image_url_m1}'
    featured_image_url = image_url_m1
    
    return featured_image_title, featured_image_url

featured_image()    

('Dark Side of the Moon: Enceladus',
 'https://www.jpl.nasa.gov/spaceimages/images/wallpaper/PIA18328-1920x1200.jpg')

### B1. Mars Facts

In [20]:
# URL of page to be scraped and Configure Splinter
# URL = universal resource locator (FYI)
url = 'https://space-facts.com/mars/'
browser.visit(url)

In [21]:
# Use pandas to scrape tables from source URL
# It pulls all tables on that web page
mars_facts_df = pd.read_html(url)

# Zero in on desired table, which in this case happens to be the first
mars_facts_df = mars_facts_df[0]

# Rename the columns
mars_facts_df.columns = ['Planet Metric', 'Mars']
mars_facts_df

Unnamed: 0,Planet Metric,Mars
0,Equatorial Diameter:,"6,792 km"
1,Polar Diameter:,"6,752 km"
2,Mass:,6.39 × 10^23 kg (0.11 Earths)
3,Moons:,2 (Phobos & Deimos)
4,Orbit Distance:,"227,943,824 km (1.38 AU)"
5,Orbit Period:,687 days (1.9 years)
6,Surface Temperature:,-87 to -5 °C
7,First Record:,2nd millennium BC
8,Recorded By:,Egyptian astronomers


In [22]:
# Export dataframe to HTML
mars_facts_html = mars_facts_df.to_html(classes='table table=striped', index=False, justify='left', border=0)
print(mars_facts_html)

<table border="0" class="dataframe table table=striped">
  <thead>
    <tr style="text-align: left;">
      <th>Planet Metric</th>
      <th>Mars</th>
    </tr>
  </thead>
  <tbody>
    <tr>
      <td>Equatorial Diameter:</td>
      <td>6,792 km</td>
    </tr>
    <tr>
      <td>Polar Diameter:</td>
      <td>6,752 km</td>
    </tr>
    <tr>
      <td>Mass:</td>
      <td>6.39 × 10^23 kg (0.11 Earths)</td>
    </tr>
    <tr>
      <td>Moons:</td>
      <td>2 (Phobos &amp; Deimos)</td>
    </tr>
    <tr>
      <td>Orbit Distance:</td>
      <td>227,943,824 km (1.38 AU)</td>
    </tr>
    <tr>
      <td>Orbit Period:</td>
      <td>687 days (1.9 years)</td>
    </tr>
    <tr>
      <td>Surface Temperature:</td>
      <td>-87 to -5 °C</td>
    </tr>
    <tr>
      <td>First Record:</td>
      <td>2nd millennium BC</td>
    </tr>
    <tr>
      <td>Recorded By:</td>
      <td>Egyptian astronomers</td>
    </tr>
  </tbody>
</table>


#### Alternative Method pulling HTML table directly from source page without pandas, including original formatting

In [23]:
html = browser.html
mars_facts_soup = BeautifulSoup(html, 'html.parser')
mars_facts_source_html = mars_facts_soup.find(id='tablepress-p-mars')
mars_facts_source_html

<table class="tablepress tablepress-id-p-mars" id="tablepress-p-mars"><tbody><tr class="row-1 odd"><td class="column-1"><strong>Equatorial Diameter:</strong></td><td class="column-2">6,792 km<br/></td></tr><tr class="row-2 even"><td class="column-1"><strong>Polar Diameter:</strong></td><td class="column-2">6,752 km<br/></td></tr><tr class="row-3 odd"><td class="column-1"><strong>Mass:</strong></td><td class="column-2">6.39 × 10^23 kg<br/> (0.11 Earths)</td></tr><tr class="row-4 even"><td class="column-1"><strong>Moons:</strong></td><td class="column-2">2 (<a href="https://space-facts.com/moons/phobos/">Phobos</a> &amp; <a href="https://space-facts.com/moons/deimos/">Deimos</a>)</td></tr><tr class="row-5 odd"><td class="column-1"><strong>Orbit Distance:</strong></td><td class="column-2">227,943,824 km<br/> (1.38 AU)</td></tr><tr class="row-6 even"><td class="column-1"><strong>Orbit Period:</strong></td><td class="column-2">687 days (1.9 years)<br/></td></tr><tr class="row-7 odd"><td cla

#### Convert above code into a function named "mars_facts()"

In [24]:
def mars_facts():
    
    url = 'https://space-facts.com/mars/'
    browser.visit(url)
    
    mars_facts_df = pd.read_html(url)
    mars_facts_df = mars_facts_df[0]
    mars_facts_df.columns = ['Planet Metric', 'Mars']
    
    mars_facts_html = mars_facts_df.to_html(classes='table table=striped', index=False, justify='left', border=0)
    
    return mars_facts_html

mars_facts()

'<table border="0" class="dataframe table table=striped">\n  <thead>\n    <tr style="text-align: left;">\n      <th>Planet Metric</th>\n      <th>Mars</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <td>Equatorial Diameter:</td>\n      <td>6,792 km</td>\n    </tr>\n    <tr>\n      <td>Polar Diameter:</td>\n      <td>6,752 km</td>\n    </tr>\n    <tr>\n      <td>Mass:</td>\n      <td>6.39 × 10^23 kg (0.11 Earths)</td>\n    </tr>\n    <tr>\n      <td>Moons:</td>\n      <td>2 (Phobos &amp; Deimos)</td>\n    </tr>\n    <tr>\n      <td>Orbit Distance:</td>\n      <td>227,943,824 km (1.38 AU)</td>\n    </tr>\n    <tr>\n      <td>Orbit Period:</td>\n      <td>687 days (1.9 years)</td>\n    </tr>\n    <tr>\n      <td>Surface Temperature:</td>\n      <td>-87 to -5 °C</td>\n    </tr>\n    <tr>\n      <td>First Record:</td>\n      <td>2nd millennium BC</td>\n    </tr>\n    <tr>\n      <td>Recorded By:</td>\n      <td>Egyptian astronomers</td>\n    </tr>\n  </tbody>\n</table>'

### C. Mars Hemispheres

#### Proof

In [25]:
# Initiate empty list of dictionaries (no dictionaries yet present)
hemisphere_image_urls = []

In [26]:
# URL of page to be scraped and Configure Splinter
# URL = universal resource locator (FYI)
url = 'https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars'
browser.visit(url)

# Capture HTML from URL
html = browser.html

In [27]:
# Get list of hemispheres 
links = browser.find_by_css("a.product-item h3")
# Identify type - element list
type(links)

splinter.element_list.ElementList

In [28]:
# Proof of element list
links[0]

<splinter.driver.webdriver.WebDriverElement at 0x13dbfbbfb80>

In [29]:
len(links)

4

In [30]:
# Further confirmation / Also to serve as a possible mechanism for traversing with iterrows()(which I opted not to do)
links_df = pd.DataFrame(links)
links_df = links_df.rename(columns={0: "links_splinter"})
links_df

Unnamed: 0,links_splinter
0,<splinter.driver.webdriver.WebDriverElement ob...
1,<splinter.driver.webdriver.WebDriverElement ob...
2,<splinter.driver.webdriver.WebDriverElement ob...
3,<splinter.driver.webdriver.WebDriverElement ob...


In [31]:
# Initiate empty dictionary
hemisphere_dict = {}

In [32]:
# Use Splinter to click through to linked page
browser.find_by_css("a.product-item h3")[0].click()

In [33]:
# Capture HTML from second URL (linked page)
html = browser.html

# Parse HTML from second URL (linked page) with BeautifulSoup
mars_hemispheres_image_soup = BeautifulSoup(html, 'html.parser')

In [34]:
# Use BeautifulSoup to zero in on image_title
mars_hemispheres_image_title = mars_hemispheres_image_soup.find('h2', class_='title').text
print(mars_hemispheres_image_title)

Cerberus Hemisphere Enhanced


In [35]:
# Use BeautifulSoup to zero in on relative URL for wide_image (full image)
mars_hemispheres_image_url = mars_hemispheres_image_soup.find('img', class_='wide-image')['src']
print(mars_hemispheres_image_url)

# Augment relative URL to create full URL for wide_image (full image)
mars_hemispheres_image_url = f'https://astrogeology.usgs.gov{mars_hemispheres_image_url}'
mars_hemispheres_image_url

/cache/images/f5e372a36edfa389625da6d0cc25d905_cerberus_enhanced.tif_full.jpg


'https://astrogeology.usgs.gov/cache/images/f5e372a36edfa389625da6d0cc25d905_cerberus_enhanced.tif_full.jpg'

In [36]:
# Populate dictionary with results
hemisphere_dict = {"title": mars_hemispheres_image_title, "img_url": mars_hemispheres_image_url}
hemisphere_dict

{'title': 'Cerberus Hemisphere Enhanced',
 'img_url': 'https://astrogeology.usgs.gov/cache/images/f5e372a36edfa389625da6d0cc25d905_cerberus_enhanced.tif_full.jpg'}

In [37]:
# Append dictionary to list of dictionaries
hemisphere_image_urls.append(hemisphere_dict)
hemisphere_image_urls

[{'title': 'Cerberus Hemisphere Enhanced',
  'img_url': 'https://astrogeology.usgs.gov/cache/images/f5e372a36edfa389625da6d0cc25d905_cerberus_enhanced.tif_full.jpg'}]

In [38]:
# Use Splinter to go back to prior web page
browser.back()

#### Create a for-loop to traverse the above code blocks

In [39]:
# Initiate empty list of dictionaries (no dictionaries yet present)
hemisphere_image_urls = []

# URL of page to be scraped and Configure Splinter
# URL = universal resource locator (FYI)
url = 'https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars'
browser.visit(url)

# Capture HTML from URL
html = browser.html

# Get list of hemispheres 
links = browser.find_by_css("a.product-item h3")

# Traverse Splinter links
for i in range(len(links)):
    
    #Find the elements on each loop to avoid a stale element exception
    browser.find_by_css("a.product-item h3")[i].click()

    # Capture HTML from second URL (linked page)
    html = browser.html

    # Parse HTML from second URL (linked page) with BeautifulSoup
    mars_hemispheres_image_soup = BeautifulSoup(html, 'html.parser')

    # Use BeautifulSoup to zero in on image_title
    mars_hemispheres_image_title = mars_hemispheres_image_soup.find('h2', class_='title').text

    # Use BeautifulSoup to zero in on relative URL for wide_image (full image)
    mars_hemispheres_image_url = mars_hemispheres_image_soup.find('img', class_='wide-image')['src']

    # Augment relative URL to create full URL for wide_image (full image)
    mars_hemispheres_image_url = 'https://astrogeology.usgs.gov' + mars_hemispheres_image_url

    # Initiate empty dictionary
    hemisphere_dict = {}

    # Populate dictionary with results
    hemisphere_dict = {"title": mars_hemispheres_image_title, "img_url": mars_hemispheres_image_url}

    # Append dictionary to list of dictionaries
    hemisphere_image_urls.append(hemisphere_dict)

    # Use Splinter to go back to prior web page
    browser.back()

In [40]:
# Proof
hemisphere_image_urls

[{'title': 'Cerberus Hemisphere Enhanced',
  'img_url': 'https://astrogeology.usgs.gov/cache/images/f5e372a36edfa389625da6d0cc25d905_cerberus_enhanced.tif_full.jpg'},
 {'title': 'Schiaparelli Hemisphere Enhanced',
  'img_url': 'https://astrogeology.usgs.gov/cache/images/3778f7b43bbbc89d6e3cfabb3613ba93_schiaparelli_enhanced.tif_full.jpg'},
 {'title': 'Syrtis Major Hemisphere Enhanced',
  'img_url': 'https://astrogeology.usgs.gov/cache/images/555e6403a6ddd7ba16ddb0e471cadcf7_syrtis_major_enhanced.tif_full.jpg'},
 {'title': 'Valles Marineris Hemisphere Enhanced',
  'img_url': 'https://astrogeology.usgs.gov/cache/images/b3c7c6c9138f57b4756be9b9c43e3a48_valles_marineris_enhanced.tif_full.jpg'}]

#### Convert above code into a function named "mars_hemispheres()"

In [41]:
def mars_hemispheres():

    # Initiate empty list of dictionaries (no dictionaries yet present)
    global hemisphere_image_urls
    hemisphere_image_urls = []

    # URL of page to be scraped and Configure Splinter
    # URL = universal resource locator (FYI)
    url = 'https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars'
    browser.visit(url)

    # Capture HTML from URL
    html = browser.html

    # Get list of hemispheres 
    links = browser.find_by_css("a.product-item h3")

    # Traverse Splinter links
    for i in range(len(links)):

        #Find the elements on each loop 
        browser.find_by_css("a.product-item h3")[i].click()

        # Capture HTML from second URL (linked page)
        html = browser.html

        # Parse HTML from second URL (linked page) with BeautifulSoup
        mars_hemispheres_image_soup = BeautifulSoup(html, 'html.parser')

        # Use BeautifulSoup to zero in on image_title
        mars_hemispheres_image_title = mars_hemispheres_image_soup.find('h2', class_='title').text

        # Use BeautifulSoup to zero in on relative URL for wide_image (full image)
        mars_hemispheres_image_url = mars_hemispheres_image_soup.find('img', class_='wide-image')['src']

        # Augment relative URL to create full URL for wide_image (full image)
        mars_hemispheres_image_url = 'https://astrogeology.usgs.gov' + mars_hemispheres_image_url

        # Initiate empty dictionary
        hemisphere_dict = {}

        # Populate dictionary with results
        hemisphere_dict = {"title": mars_hemispheres_image_title, "img_url": mars_hemispheres_image_url}

        # Append dictionary to list of dictionaries
        hemisphere_image_urls.append(hemisphere_dict)

        # Use Splinter to go back to prior web page
        browser.back()
        
    return hemisphere_image_urls

mars_hemispheres()

[{'title': 'Cerberus Hemisphere Enhanced',
  'img_url': 'https://astrogeology.usgs.gov/cache/images/f5e372a36edfa389625da6d0cc25d905_cerberus_enhanced.tif_full.jpg'},
 {'title': 'Schiaparelli Hemisphere Enhanced',
  'img_url': 'https://astrogeology.usgs.gov/cache/images/3778f7b43bbbc89d6e3cfabb3613ba93_schiaparelli_enhanced.tif_full.jpg'},
 {'title': 'Syrtis Major Hemisphere Enhanced',
  'img_url': 'https://astrogeology.usgs.gov/cache/images/555e6403a6ddd7ba16ddb0e471cadcf7_syrtis_major_enhanced.tif_full.jpg'},
 {'title': 'Valles Marineris Hemisphere Enhanced',
  'img_url': 'https://astrogeology.usgs.gov/cache/images/b3c7c6c9138f57b4756be9b9c43e3a48_valles_marineris_enhanced.tif_full.jpg'}]

### Create single function to pull all data but the hemispheres portion

In [43]:
# Scrape all prior functions mars_news(), featured_image(), mars_facts()
article_date, article_title, article_summary  = mars_news()
featured_image_title, featured_image_url = featured_image()
mars_facts_html = mars_facts()

In [44]:
mars_dict = {
    'article_date': article_date, 
    'article_title': article_title,
    'article_summary': article_summary,
    'featured_image_title': featured_image_title, 
    'featured_image_url': featured_image_url,
    'mars_facts_html': mars_facts_html
}

print('-------')
mars_dict

-------


{'article_date': 'December 22, 2020',
 'article_title': "A Martian Roundtrip: NASA's Perseverance Rover Sample Tubes",
 'article_summary': "Marvels of engineering, the rover's sample tubes must be tough enough to safely bring Red Planet samples on the long journey back to Earth in immaculate condition.",
 'featured_image_title': 'Dark Side of the Moon: Enceladus',
 'featured_image_url': 'https://www.jpl.nasa.gov/spaceimages/images/wallpaper/PIA18328-1920x1200.jpg',
 'mars_facts_html': '<table border="0" class="dataframe table table=striped">\n  <thead>\n    <tr style="text-align: left;">\n      <th>Planet Metric</th>\n      <th>Mars</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <td>Equatorial Diameter:</td>\n      <td>6,792 km</td>\n    </tr>\n    <tr>\n      <td>Polar Diameter:</td>\n      <td>6,752 km</td>\n    </tr>\n    <tr>\n      <td>Mass:</td>\n      <td>6.39 × 10^23 kg (0.11 Earths)</td>\n    </tr>\n    <tr>\n      <td>Moons:</td>\n      <td>2 (Phobos &amp; Deimos)</t

#### Convert above code into a function named "scrape_all()"

In [45]:
def scrape_all():
    article_date, article_title, article_summary  = mars_news()
    featured_image_title, featured_image_url = featured_image()
    mars_facts_html = mars_facts()
    
    global mars_dict

    mars_dict = {
        'article_date': article_date, 
        'article_title': article_title,
        'article_summary': article_summary,
        'featured_image_title': featured_image_title, 
        'featured_image_url': featured_image_url,
        'mars_facts_html': mars_facts_html
    }
    
    return mars_dict

scrape_all()

{'article_date': 'December 22, 2020',
 'article_title': "A Martian Roundtrip: NASA's Perseverance Rover Sample Tubes",
 'article_summary': "Marvels of engineering, the rover's sample tubes must be tough enough to safely bring Red Planet samples on the long journey back to Earth in immaculate condition.",
 'featured_image_title': 'Dark Side of the Moon: Enceladus',
 'featured_image_url': 'https://www.jpl.nasa.gov/spaceimages/images/wallpaper/PIA18328-1920x1200.jpg',
 'mars_facts_html': '<table border="0" class="dataframe table table=striped">\n  <thead>\n    <tr style="text-align: left;">\n      <th>Planet Metric</th>\n      <th>Mars</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <td>Equatorial Diameter:</td>\n      <td>6,792 km</td>\n    </tr>\n    <tr>\n      <td>Polar Diameter:</td>\n      <td>6,752 km</td>\n    </tr>\n    <tr>\n      <td>Mass:</td>\n      <td>6.39 × 10^23 kg (0.11 Earths)</td>\n    </tr>\n    <tr>\n      <td>Moons:</td>\n      <td>2 (Phobos &amp; Deimos)</t

### Create dataframe out of hemispheres list of dictionaries

In [46]:
mars_hemispheres_df = pd.DataFrame(hemisphere_image_urls)
mars_hemispheres_df = mars_hemispheres_df.rename(columns={"title": "mars_hemispheres_title", 
                                                          "img_url": "mars_hemispheres_img_url"})
mars_hemispheres_df.set_index("mars_hemispheres_title", inplace=True)
mars_hemispheres_df

Unnamed: 0_level_0,mars_hemispheres_img_url
mars_hemispheres_title,Unnamed: 1_level_1
Cerberus Hemisphere Enhanced,https://astrogeology.usgs.gov/cache/images/f5e...
Schiaparelli Hemisphere Enhanced,https://astrogeology.usgs.gov/cache/images/377...
Syrtis Major Hemisphere Enhanced,https://astrogeology.usgs.gov/cache/images/555...
Valles Marineris Hemisphere Enhanced,https://astrogeology.usgs.gov/cache/images/b3c...


### Insert into MongoDB

In [47]:
# Initialize pymongo to work with MongoDBs
conn = 'mongodb://localhost:27017'
client = pymongo.MongoClient(conn)

In [48]:
# Connect to mars_app database
db = client.mars_app

# Connect to mars collection
mars = db.mars

In [49]:
# Dictionary to insert
mars_dict

{'article_date': 'December 22, 2020',
 'article_title': "A Martian Roundtrip: NASA's Perseverance Rover Sample Tubes",
 'article_summary': "Marvels of engineering, the rover's sample tubes must be tough enough to safely bring Red Planet samples on the long journey back to Earth in immaculate condition.",
 'featured_image_title': 'Dark Side of the Moon: Enceladus',
 'featured_image_url': 'https://www.jpl.nasa.gov/spaceimages/images/wallpaper/PIA18328-1920x1200.jpg',
 'mars_facts_html': '<table border="0" class="dataframe table table=striped">\n  <thead>\n    <tr style="text-align: left;">\n      <th>Planet Metric</th>\n      <th>Mars</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <td>Equatorial Diameter:</td>\n      <td>6,792 km</td>\n    </tr>\n    <tr>\n      <td>Polar Diameter:</td>\n      <td>6,752 km</td>\n    </tr>\n    <tr>\n      <td>Mass:</td>\n      <td>6.39 × 10^23 kg (0.11 Earths)</td>\n    </tr>\n    <tr>\n      <td>Moons:</td>\n      <td>2 (Phobos &amp; Deimos)</t

In [50]:
# Insert mars_dict dictionary as a document into mars collection of mars_app MongoDB database
# Importantly, we use UPSERT method, which updates existing documents and adds documents if they don't exist
mars.update_one({}, {'$set': mars_dict}, upsert=True)

<pymongo.results.UpdateResult at 0x13dc1704b00>

In [51]:
# Convert mars_hemispheres_df DataFrame into dictionary called mars_hemispheres_dict, 
# which is then inserted into mars collection of mars_app MongoDB database
# Importantly, we use UPSERT method, which updates existing documents and adds documents if they don't exist
mars_hemispheres_dict = mars_hemispheres_df.to_dict()
mars.update_one({}, {'$set': mars_hemispheres_dict}, upsert=True)

<pymongo.results.UpdateResult at 0x13dc17049c0>

![MongoDB mars_app (database) mars (collection)](project_images/MongoDB_mars_app_mars.jpg)