In [1]:
# Dependencies
from splinter import Browser
from bs4 import BeautifulSoup as bs
from webdriver_manager.chrome import ChromeDriverManager
import pandas as pd
import pymongo

In [2]:
# init_browser
def init_browser():
    executable_path = {'executable_path': ChromeDriverManager().install()}
    return Browser('chrome', **executable_path, headless=False)

### NASA Mars News

In [3]:
browser = init_browser()

news = "https://redplanetscience.com/"
browser.visit(news)

# Scrape page into Soup
html = browser.html
soup = bs(html, "html.parser")

latest_title = soup.find("div",class_="content_title").text
lates_paragraph = soup.find("div",class_="article_teaser_body").text

browser.quit()




In [4]:
# check results
print(latest_title,"----------" ,lates_paragraph)

Space History Is Made in This NASA Robot Factory ---------- From rockets to rovers, JPL's Spacecraft Assembly Facility has been at the center of robotic spaceflight. Here's a closer look at what makes it so special.


### JPL Mars Space Images - Featured Image

In [5]:
browser = init_browser()

image = "https://spaceimages-mars.com/"
browser.visit(image)

html = browser.html
soup2 = bs(html,"html.parser")

featured_img_url = image + soup2.find("img",class_ = "headerimage")["src"]

browser.quit()




In [6]:
# check results
print(featured_img_url)

https://spaceimages-mars.com/image/featured/mars1.jpg


### Mars Facts

In [25]:
fact = "https://galaxyfacts-mars.com/"
tables = pd.read_html(fact)
df = tables[0]
df.columns = df.iloc[0]
df = df.iloc[1:,:]
df.set_index("Mars - Earth Comparison",inplace=True)

# save it to HTML
fact_table = df.to_html()

### Mars Hemispheres

In [19]:
browser = init_browser()

hemisphere = "https://marshemispheres.com/"
browser.visit(hemisphere)

hemisphere_lis = []
hemisphere_title = []
hemisphere_img = []

# grab the title 
html = browser.html
soup4 = bs(html,"html.parser")
hemisphere_title = [result.text for result in soup4.find_all("h3")]

browser.quit()

# grab the image  
for title in hemisphere_title[:4]:
    browser = init_browser()
    
    hemisphere = "https://marshemispheres.com/"
    browser.visit(hemisphere)
    
    search = title.split(" ")[0].lower()
    image_url = browser.links.find_by_partial_href(f"{search}").first["href"]
    browser.visit(image_url)
    html = browser.html
    soup_ = bs(html,"html.parser")
    
    hemisphere_img.append(hemisphere + soup_.find("img",class_ = "wide-image")["src"])
    browser.quit()

# form the dictionary
for i in range(len(hemisphere_img)):
    x = hemisphere_title[i]
    y = hemisphere_img[i]
    input_ = {"title":x,"img":y}
    hemisphere_lis.append(input_)








In [20]:
#check results
hemisphere_lis

[{'title': 'Cerberus Hemisphere Enhanced',
  'img': 'https://marshemispheres.com/images/f5e372a36edfa389625da6d0cc25d905_cerberus_enhanced.tif_full.jpg'},
 {'title': 'Schiaparelli Hemisphere Enhanced',
  'img': 'https://marshemispheres.com/images/3778f7b43bbbc89d6e3cfabb3613ba93_schiaparelli_enhanced.tif_full.jpg'},
 {'title': 'Syrtis Major Hemisphere Enhanced',
  'img': 'https://marshemispheres.com/images/555e6403a6ddd7ba16ddb0e471cadcf7_syrtis_major_enhanced.tif_full.jpg'},
 {'title': 'Valles Marineris Hemisphere Enhanced',
  'img': 'https://marshemispheres.com/images/b3c7c6c9138f57b4756be9b9c43e3a48_valles_marineris_enhanced.tif_full.jpg'}]

#### Save final info to Mongo DB

In [26]:
Mars = {
    "NewsTitle":latest_title,
    "NewsParagraph":lates_paragraph,
    "FeaturedImage":featured_img_url,
    "FactsTable":fact_table,
    "Hemisphere":hemisphere_lis
}

In [27]:
# Initialize PyMongo to work with MongoDBs
conn = 'mongodb://localhost:27017'
client = pymongo.MongoClient(conn)

# Define database and collection
db = client.mars_db
collection = db.info

collection.insert_one(Mars)

<pymongo.results.InsertOneResult at 0x25d590bffc8>

In [36]:
collection.find()[0]["FactsTable"]

'<table border="1" class="dataframe">\n  <thead>\n    <tr style="text-align: right;">\n      <th></th>\n      <th>Mars</th>\n      <th>Earth</th>\n    </tr>\n    <tr>\n      <th>Mars - Earth Comparison</th>\n      <th></th>\n      <th></th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>Diameter:</th>\n      <td>6,779 km</td>\n      <td>12,742 km</td>\n    </tr>\n    <tr>\n      <th>Mass:</th>\n      <td>6.39 × 10^23 kg</td>\n      <td>5.97 × 10^24 kg</td>\n    </tr>\n    <tr>\n      <th>Moons:</th>\n      <td>2</td>\n      <td>1</td>\n    </tr>\n    <tr>\n      <th>Distance from Sun:</th>\n      <td>227,943,824 km</td>\n      <td>149,598,262 km</td>\n    </tr>\n    <tr>\n      <th>Length of Year:</th>\n      <td>687 Earth days</td>\n      <td>365.24 days</td>\n    </tr>\n    <tr>\n      <th>Temperature:</th>\n      <td>-87 to -5 °C</td>\n      <td>-88 to 58°C</td>\n    </tr>\n  </tbody>\n</table>'