In [1]:
import pandas as pd 
import time 
from splinter import Browser
from bs4 import BeautifulSoup


## Mars News

In [2]:
executable_path = {"executable_path": "chromedriver.exe"}
browser = Browser("chrome", **executable_path, headless=False)


In [3]:
url = "https://redplanetscience.com/"
browser.visit(url)

In [4]:
#html object
html_news = browser.html
soup = BeautifulSoup(html_news, "html.parser")

#scraping the latest news title and paragraph text 

news_title = soup.find("div", class_ = "content_title").text
news_paragraph = soup.find("div", class_ = "article_teaser_body").text

#displaying what was scraped 

print(news_title)
print("----------------")
print(news_paragraph)



NASA Moves Forward With Campaign to Return Mars Samples to Earth
----------------
During this next phase, the program will mature critical technologies and make critical design decisions as well as assess industry partnerships.


## JPL Featured Space Image

In [5]:
#Visit JPL Featured Space Image url through the splinter module 

url_spaceimage = ("https://spaceimages-mars.com/")
browser.visit(url_spaceimage)

In [6]:
#html object 

html = browser.html
soup = BeautifulSoup(html, "html.parser")



In [7]:
header = soup.find("div", class_ = "header")

In [8]:
#getting the full sized image 

browser.links.find_by_partial_text('FULL IMAGE').click()

html = browser.html
soup = BeautifulSoup(html, 'html.parser')

image_box = soup.find('div', class_='fancybox-inner')
featured_image_url = url_spaceimage.replace('index.html', '') + image_box.img['src']
featured_image_url

'https://spaceimages-mars.com/image/featured/mars3.jpg'

## Mars Facts

In [9]:
# getting the mars facts 

url_facts = "https://galaxyfacts-mars.com/"
mars_facts = pd.read_html(url_facts)

mars_facts

[                         0                1                2
 0  Mars - Earth Comparison             Mars            Earth
 1                Diameter:         6,779 km        12,742 km
 2                    Mass:  6.39 × 10^23 kg  5.97 × 10^24 kg
 3                   Moons:                2                1
 4       Distance from Sun:   227,943,824 km   149,598,262 km
 5          Length of Year:   687 Earth days      365.24 days
 6             Temperature:     -87 to -5 °C      -88 to 58°C,
                       0                              1
 0  Equatorial Diameter:                       6,792 km
 1       Polar Diameter:                       6,752 km
 2                 Mass:  6.39 × 10^23 kg (0.11 Earths)
 3                Moons:          2 ( Phobos & Deimos )
 4       Orbit Distance:       227,943,824 km (1.38 AU)
 5         Orbit Period:           687 days (1.9 years)
 6  Surface Temperature:                   -87 to -5 °C
 7         First Record:              2nd millennium BC

In [10]:
#putting facts into data frame 

mars_facts_df = mars_facts[0]
mars_facts_df = mars_facts_df.rename(columns={0: "Mars - Earth Comparison", 1: "Mars" , 2: "Earth"})
mars_facts_df.drop([0, 0,6], axis=0, inplace=True)
mars_facts_df.to_html("mars_facts.html")

In [11]:
#saving table to html file 

mars_html = mars_facts_df.to_html()
mars_html = mars_html.replace("\n", "")
mars_html

'<table border="1" class="dataframe">  <thead>    <tr style="text-align: right;">      <th></th>      <th>Mars - Earth Comparison</th>      <th>Mars</th>      <th>Earth</th>    </tr>  </thead>  <tbody>    <tr>      <th>1</th>      <td>Diameter:</td>      <td>6,779 km</td>      <td>12,742 km</td>    </tr>    <tr>      <th>2</th>      <td>Mass:</td>      <td>6.39 × 10^23 kg</td>      <td>5.97 × 10^24 kg</td>    </tr>    <tr>      <th>3</th>      <td>Moons:</td>      <td>2</td>      <td>1</td>    </tr>    <tr>      <th>4</th>      <td>Distance from Sun:</td>      <td>227,943,824 km</td>      <td>149,598,262 km</td>    </tr>    <tr>      <th>5</th>      <td>Length of Year:</td>      <td>687 Earth days</td>      <td>365.24 days</td>    </tr>  </tbody></table>'

## Mars Hemispheres

In [12]:
url_hemisphere = "https://marshemispheres.com/"
browser.visit(url_hemisphere)

In [13]:
#HTML Object

html_hemisphere = browser.html
soup = BeautifulSoup(html_hemisphere, "html.parser")

In [14]:
# Scrape all items that contain mars hemispheres information
hemispheres = soup.find_all("div", class_="item")

# Create empty list
hemispheres_info = []

# assign main url for loop
hemispheres_url = "https://marshemispheres.com/"

# Loop through the list of all hemispheres information
for i in hemispheres:
    title = i.find("h3").text
    hemispheres_img = i.find("a", class_="itemLink product-item")["href"]
    
    # Visit the link that contains the full image website 
    browser.visit(hemispheres_url + hemispheres_img)
    
    # HTML Object
    image_html = browser.html
    web_info = BeautifulSoup(image_html, "html.parser")
    
    # Create full image url
    img_url = hemispheres_url + web_info.find("img", class_="wide-image")["src"]
    
    hemispheres_info.append({"title" : title, "img_url" : img_url})

# Display titles and images url
# hemispheres_info

# # Or Display titles and images url this way
    print("")
    print(title)
    print(img_url)
    print("-----------------------------------------")


Cerberus Hemisphere Enhanced
https://marshemispheres.com/images/f5e372a36edfa389625da6d0cc25d905_cerberus_enhanced.tif_full.jpg
-----------------------------------------

Schiaparelli Hemisphere Enhanced
https://marshemispheres.com/images/3778f7b43bbbc89d6e3cfabb3613ba93_schiaparelli_enhanced.tif_full.jpg
-----------------------------------------

Syrtis Major Hemisphere Enhanced
https://marshemispheres.com/images/555e6403a6ddd7ba16ddb0e471cadcf7_syrtis_major_enhanced.tif_full.jpg
-----------------------------------------

Valles Marineris Hemisphere Enhanced
https://marshemispheres.com/images/b3c7c6c9138f57b4756be9b9c43e3a48_valles_marineris_enhanced.tif_full.jpg
-----------------------------------------


In [15]:
#creating mars dictionary to upload in mongo
mars_dict = {
  
    "news_title" : news_title,
    "news_paragraph" : news_paragraph,
    "featured_image_url" : featured_image_url,
    "mars_facts" : mars_html,
    "hemisphere_images" : hemispheres_info
  }

mars_dict



{'news_title': 'NASA Moves Forward With Campaign to Return Mars Samples to Earth',
 'news_paragraph': 'During this next phase, the program will mature critical technologies and make critical design decisions as well as assess industry partnerships.',
 'featured_image_url': 'https://spaceimages-mars.com/image/featured/mars3.jpg',
 'mars_facts': '<table border="1" class="dataframe">  <thead>    <tr style="text-align: right;">      <th></th>      <th>Mars - Earth Comparison</th>      <th>Mars</th>      <th>Earth</th>    </tr>  </thead>  <tbody>    <tr>      <th>1</th>      <td>Diameter:</td>      <td>6,779 km</td>      <td>12,742 km</td>    </tr>    <tr>      <th>2</th>      <td>Mass:</td>      <td>6.39 × 10^23 kg</td>      <td>5.97 × 10^24 kg</td>    </tr>    <tr>      <th>3</th>      <td>Moons:</td>      <td>2</td>      <td>1</td>    </tr>    <tr>      <th>4</th>      <td>Distance from Sun:</td>      <td>227,943,824 km</td>      <td>149,598,262 km</td>    </tr>    <tr>      <th>5</th>  

In [16]:
#saving to json for upload 

import json

with open('mars_dict.json', 'w') as fp:
    json.dump(mars_dict, fp)