# Set up

In [1]:
from bs4 import BeautifulSoup
import pymongo
from splinter import Browser
import requests
import time
import pandas as pd
import time

In [2]:
executable_path = {'executable_path': 'chromedriver.exe'}
browser = Browser('chrome', **executable_path, headless=False)

In [3]:
def cook_soup(url):
    browser.visit(url)
    html = browser.html
    soup = BeautifulSoup(html, 'html.parser')
    return soup

In [4]:
mars_data={}

# Nasa Mars News

In [5]:
nasa_news_soup = cook_soup('https://mars.nasa.gov/news/')

In [6]:
list_of_news = nasa_news_soup.find_all('li',class_='slide')
                        #, class_="grid_list_page")
len(list_of_news)

40

In [7]:
lastest_news_title = list_of_news[0].find('div',class_='content_title').text
lastest_news_title

'Curiosity on the Move Again'

In [8]:
lastest_news_teaser = list_of_news[0].find('div',class_='article_teaser_body').text
lastest_news_teaser

"NASA's Mars Curiosity rover drove about 197 feet over the weekend to a site called Lake Orcadie, pushing its total odometry to over 12 miles."

In [9]:
latest_news_date = list_of_news[0].find('div',class_='list_date').text
latest_news_date

'November  6, 2018'

In [10]:
mars_data['news title']=lastest_news_title
mars_data['news teaser']=lastest_news_teaser
mars_data['news date']=latest_news_date
mars_data

{'news title': 'Curiosity on the Move Again',
 'news teaser': "NASA's Mars Curiosity rover drove about 197 feet over the weekend to a site called Lake Orcadie, pushing its total odometry to over 12 miles.",
 'news date': 'November  6, 2018'}

# JPL Mars Space Images - Featured Image



In [11]:
jpl_base_url = 'https://www.jpl.nasa.gov'
url_pic = jpl_base_url+ '/spaceimages/?search=&category=Mars'

jpl_pic_soup = cook_soup(url_pic)

In [12]:
jpl_feature_img = jpl_pic_soup.find_all('a',class_= 'button fancybox')[0]
jpl_feature_img

<a class="button fancybox" data-description="This composite of the giant barred spiral galaxy NGC 6872 is 522,000 light-years across, making it about five times the size of the Milky Way." data-fancybox-group="images" data-fancybox-href="/spaceimages/images/mediumsize/PIA16613_ip.jpg" data-link="/spaceimages/details.php?id=PIA16613" data-title="Dwarf Galaxy Spotted" id="full_image">
					FULL IMAGE
				  </a>

## Turns out the "full image" link given by Nasa only gives a "mediumsized" image, so we need to reroute to the origin article on the featured image to look for "largesized" image

In [13]:
jpl_feature_img_link = jpl_base_url+jpl_feature_img['data-link']
jpl_feature_img_link

'https://www.jpl.nasa.gov/spaceimages/details.php?id=PIA16613'

In [14]:
jpl_large_pic_soup = cook_soup(jpl_feature_img_link)

In [15]:
jpl_large_feature_img_link= jpl_base_url +\
                            jpl_large_pic_soup.find('article').find('figure',class_='lede').find('a')['href']
jpl_large_feature_img_link

'https://www.jpl.nasa.gov/spaceimages/images/largesize/PIA16613_hires.jpg'

In [16]:
mars_data['featured image link'] = jpl_large_feature_img_link
mars_data

{'news title': 'Curiosity on the Move Again',
 'news teaser': "NASA's Mars Curiosity rover drove about 197 feet over the weekend to a site called Lake Orcadie, pushing its total odometry to over 12 miles.",
 'news date': 'November  6, 2018',
 'featured image link': 'https://www.jpl.nasa.gov/spaceimages/images/largesize/PIA16613_hires.jpg'}

# Mars Weather

In [17]:
twt_url='https://twitter.com/marswxreport?lang=en'

In [18]:
twt_soup = cook_soup(twt_url)

In [19]:
latest_weather_twt = twt_soup.find('div',class_='content').find('div',class_="js-tweet-text-container").find('p').text

In [20]:
latest_weather_twt

'Sol 2224 (2018-11-08), high 0C/32F, low -72C/-97F, pressure at 8.65 hPa, daylight 06:19-18:36'

In [21]:
mars_data['mars weather'] = latest_weather_twt
mars_data

{'news title': 'Curiosity on the Move Again',
 'news teaser': "NASA's Mars Curiosity rover drove about 197 feet over the weekend to a site called Lake Orcadie, pushing its total odometry to over 12 miles.",
 'news date': 'November  6, 2018',
 'featured image link': 'https://www.jpl.nasa.gov/spaceimages/images/largesize/PIA16613_hires.jpg',
 'mars weather': 'Sol 2224 (2018-11-08), high 0C/32F, low -72C/-97F, pressure at 8.65 hPa, daylight 06:19-18:36'}

# Mars Facts

In [22]:
facts_url = 'https://space-facts.com/mars/'


In [23]:
facts_tables = pd.read_html(facts_url)
facts_tables

[                      0                              1
 0  Equatorial Diameter:                       6,792 km
 1       Polar Diameter:                       6,752 km
 2                 Mass:  6.42 x 10^23 kg (10.7% Earth)
 3                Moons:            2 (Phobos & Deimos)
 4       Orbit Distance:       227,943,824 km (1.52 AU)
 5         Orbit Period:           687 days (1.9 years)
 6  Surface Temperature:                  -153 to 20 °C
 7         First Record:              2nd millennium BC
 8          Recorded By:           Egyptian astronomers]

In [24]:
facts_df = facts_tables[0]
facts_df

Unnamed: 0,0,1
0,Equatorial Diameter:,"6,792 km"
1,Polar Diameter:,"6,752 km"
2,Mass:,6.42 x 10^23 kg (10.7% Earth)
3,Moons:,2 (Phobos & Deimos)
4,Orbit Distance:,"227,943,824 km (1.52 AU)"
5,Orbit Period:,687 days (1.9 years)
6,Surface Temperature:,-153 to 20 °C
7,First Record:,2nd millennium BC
8,Recorded By:,Egyptian astronomers


In [25]:
facts_df.columns = ['properties', 'data']
facts_df

Unnamed: 0,properties,data
0,Equatorial Diameter:,"6,792 km"
1,Polar Diameter:,"6,752 km"
2,Mass:,6.42 x 10^23 kg (10.7% Earth)
3,Moons:,2 (Phobos & Deimos)
4,Orbit Distance:,"227,943,824 km (1.52 AU)"
5,Orbit Period:,687 days (1.9 years)
6,Surface Temperature:,-153 to 20 °C
7,First Record:,2nd millennium BC
8,Recorded By:,Egyptian astronomers


In [26]:
#facts_df.set_index('properties', inplace=True)
#facts_df

Unnamed: 0_level_0,data
properties,Unnamed: 1_level_1
Equatorial Diameter:,"6,792 km"
Polar Diameter:,"6,752 km"
Mass:,6.42 x 10^23 kg (10.7% Earth)
Moons:,2 (Phobos & Deimos)
Orbit Distance:,"227,943,824 km (1.52 AU)"
Orbit Period:,687 days (1.9 years)
Surface Temperature:,-153 to 20 °C
First Record:,2nd millennium BC
Recorded By:,Egyptian astronomers


In [27]:
facts_html_table = facts_df.to_html(index=False).replace('\n', '')
facts_html_table

'<table border="1" class="dataframe">  <thead>    <tr style="text-align: right;">      <th></th>      <th>data</th>    </tr>    <tr>      <th>properties</th>      <th></th>    </tr>  </thead>  <tbody>    <tr>      <th>Equatorial Diameter:</th>      <td>6,792 km</td>    </tr>    <tr>      <th>Polar Diameter:</th>      <td>6,752 km</td>    </tr>    <tr>      <th>Mass:</th>      <td>6.42 x 10^23 kg (10.7% Earth)</td>    </tr>    <tr>      <th>Moons:</th>      <td>2 (Phobos &amp; Deimos)</td>    </tr>    <tr>      <th>Orbit Distance:</th>      <td>227,943,824 km (1.52 AU)</td>    </tr>    <tr>      <th>Orbit Period:</th>      <td>687 days (1.9 years)</td>    </tr>    <tr>      <th>Surface Temperature:</th>      <td>-153 to 20 °C</td>    </tr>    <tr>      <th>First Record:</th>      <td>2nd millennium BC</td>    </tr>    <tr>      <th>Recorded By:</th>      <td>Egyptian astronomers</td>    </tr>  </tbody></table>'

In [28]:
mars_data['mars facts table'] = facts_html_table
mars_data

{'news title': 'Curiosity on the Move Again',
 'news teaser': "NASA's Mars Curiosity rover drove about 197 feet over the weekend to a site called Lake Orcadie, pushing its total odometry to over 12 miles.",
 'news date': 'November  6, 2018',
 'featured image link': 'https://www.jpl.nasa.gov/spaceimages/images/largesize/PIA16613_hires.jpg',
 'mars weather': 'Sol 2224 (2018-11-08), high 0C/32F, low -72C/-97F, pressure at 8.65 hPa, daylight 06:19-18:36',
 'mars facts table': '<table border="1" class="dataframe">  <thead>    <tr style="text-align: right;">      <th></th>      <th>data</th>    </tr>    <tr>      <th>properties</th>      <th></th>    </tr>  </thead>  <tbody>    <tr>      <th>Equatorial Diameter:</th>      <td>6,792 km</td>    </tr>    <tr>      <th>Polar Diameter:</th>      <td>6,752 km</td>    </tr>    <tr>      <th>Mass:</th>      <td>6.42 x 10^23 kg (10.7% Earth)</td>    </tr>    <tr>      <th>Moons:</th>      <td>2 (Phobos &amp; Deimos)</td>    </tr>    <tr>      <th>Orb

# Mars Hemispheres

In [29]:
hemi_base_url = 'https://astrogeology.usgs.gov'
hemi_url = hemi_base_url+'/search/results?q=hemisphere+enhanced&k1=target&v1=Mars'
hemi_soup = cook_soup(hemi_url)

In [30]:
hemi_list = hemi_soup.find_all('div',class_='item')


In [31]:
hemi_image_urls=[]
for element in hemi_list:
    hemi_dict = {}
    link = hemi_base_url+element.find('a')['href']
    title = element.find('h3').text
    hemi_dict['title'] = title
    
    soup = cook_soup(link)
    ori_img_link = soup.find_all('a',target='_blank')[0]['href']
    hemi_dict['img_url'] = ori_img_link
    
    hemi_image_urls.append(hemi_dict)
    time.sleep(3)
hemi_image_urls

[{'title': 'Cerberus Hemisphere Enhanced',
  'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/cerberus_enhanced.tif/full.jpg'},
 {'title': 'Schiaparelli Hemisphere Enhanced',
  'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/schiaparelli_enhanced.tif/full.jpg'},
 {'title': 'Syrtis Major Hemisphere Enhanced',
  'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/syrtis_major_enhanced.tif/full.jpg'},
 {'title': 'Valles Marineris Hemisphere Enhanced',
  'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/valles_marineris_enhanced.tif/full.jpg'}]

In [32]:
mars_data['hemisphere picture'] = hemi_image_urls
mars_data

{'news title': 'Curiosity on the Move Again',
 'news teaser': "NASA's Mars Curiosity rover drove about 197 feet over the weekend to a site called Lake Orcadie, pushing its total odometry to over 12 miles.",
 'news date': 'November  6, 2018',
 'featured image link': 'https://www.jpl.nasa.gov/spaceimages/images/largesize/PIA16613_hires.jpg',
 'mars weather': 'Sol 2224 (2018-11-08), high 0C/32F, low -72C/-97F, pressure at 8.65 hPa, daylight 06:19-18:36',
 'mars facts table': '<table border="1" class="dataframe">  <thead>    <tr style="text-align: right;">      <th></th>      <th>data</th>    </tr>    <tr>      <th>properties</th>      <th></th>    </tr>  </thead>  <tbody>    <tr>      <th>Equatorial Diameter:</th>      <td>6,792 km</td>    </tr>    <tr>      <th>Polar Diameter:</th>      <td>6,752 km</td>    </tr>    <tr>      <th>Mass:</th>      <td>6.42 x 10^23 kg (10.7% Earth)</td>    </tr>    <tr>      <th>Moons:</th>      <td>2 (Phobos &amp; Deimos)</td>    </tr>    <tr>      <th>Orb

In [33]:
#import scrape_mars

In [34]:
#test_dict = scrape_mars.scrape()

In [35]:
#test_dict

In [36]:
#test_dict.keys()

In [37]:
#test_dict['news title']

In [38]:
#test_dict['hemisphere picture'][0]['title']

In [39]:
#test_dict['hemisphere picture'][0]['img_url']