In [1]:
# Import dependencies
import os
from bs4 import BeautifulSoup as bs
import requests
from splinter import Browser
from splinter.exceptions import ElementDoesNotExist
import pandas as pd

In [2]:
# Choose the executable path to driver
executable_path = {'executable_path': 'chromedriver.exe'}
browser = Browser('chrome', **executable_path, headless=False)

### NASA Mars News

#### Scrape the NASA Mars News Site and collect the latest News Title and Paragraph Text. Assign the text to variables that you can reference later.

In [3]:
# use splinter to visit news page
url_news = "https://mars.nasa.gov/news/"
browser.visit(url_news)

In [4]:
# create BS object to scrape from
html_news=browser.html
soup_news=bs(html_news,'html.parser')

In [5]:
#scrape the latest news title and article paragraph
news_title=soup_news.find('div',class_="content_title").text
news_p=soup_news.find('div',class_="article_teaser_body").text
print(news_title)
print(news_p)

InSight Captures Sunrise and Sunset on Mars
InSight joins the rest of NASA's Red Planet surface missions, all of which have photographed either the start or end of a Martian day.


#### JPL Mars Space Images - Featured Image

In [6]:
# use splinter to visit JPL page
url_jpl = "https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars"
browser.visit(url_jpl)

In [7]:
# create BS object to scrape from
html_jpl=browser.html
soup_jpl=bs(html_jpl,'html.parser')

In [8]:
# create scrape_link from Chrome inspector
scrape_link=soup_jpl.find('div',class_="carousel_items").a["data-fancybox-href"]

In [9]:
# base link is different from url_jpl (open the real link to find out)
base_link="https://www.jpl.nasa.gov"
featured_image_url=base_link+scrape_link
print(featured_image_url)

https://www.jpl.nasa.gov/spaceimages/images/mediumsize/PIA00063_ip.jpg


#### Mars Weather

In [10]:
#https://dev.to/ayushsharma/
#a-guide-to-web-scraping-in-python-using-beautifulsoup-1kgo
# referred to above page for method
url_mwt = "https://twitter.com/marswxreport?lang=en"
data=requests.get(url_mwt)

In [11]:
# use select to parse html into timeline
html_mwt=bs(data.text,'html.parser')
all_tweets=[]
timeline=html_mwt.select('#timeline li.stream-item')

In [12]:
# iterate through each tweet and convert text we want into dictionary
for tweet in timeline:
    tweet_text=tweet.select('p.tweet-text')[0].get_text()
    all_tweets.append({'text':tweet_text})
print(f"we have scraped: {len(all_tweets)} tweets")

we have scraped: 20 tweets


In [13]:
# Remove last picture url by using [:] 
# because it's the same len of character each time
print(f"Latest tweet about weather is: {all_tweets[6]['text'][0:-26]}")

Latest tweet about weather is: InSight sol 149 (2019-04-28) low -98.8ºC (-145.8ºF) high -19.5ºC (-3.0ºF)
winds from the SW at 4.6 m/s (10.2 mph) gusting to 14.2 m/s (31.7 mph)
pressure at 7.40 hPa


#### Mars Facts

In [14]:
# use read_html to read table from html to pandas
url_fact = "https://space-facts.com/mars/"
tables=pd.read_html(url_fact)
tables

[                      0                              1
 0  Equatorial Diameter:                       6,792 km
 1       Polar Diameter:                       6,752 km
 2                 Mass:  6.42 x 10^23 kg (10.7% Earth)
 3                Moons:            2 (Phobos & Deimos)
 4       Orbit Distance:       227,943,824 km (1.52 AU)
 5         Orbit Period:           687 days (1.9 years)
 6  Surface Temperature:                  -153 to 20 °C
 7         First Record:              2nd millennium BC
 8          Recorded By:           Egyptian astronomers]

In [15]:
# rename columns
df_fact=tables[0]
# df=df.columns=["Parameter","Value"]
df_fact.columns = ['Description','Value']
df_fact

Unnamed: 0,Description,Value
0,Equatorial Diameter:,"6,792 km"
1,Polar Diameter:,"6,752 km"
2,Mass:,6.42 x 10^23 kg (10.7% Earth)
3,Moons:,2 (Phobos & Deimos)
4,Orbit Distance:,"227,943,824 km (1.52 AU)"
5,Orbit Period:,687 days (1.9 years)
6,Surface Temperature:,-153 to 20 °C
7,First Record:,2nd millennium BC
8,Recorded By:,Egyptian astronomers


In [16]:
# convert table into html table ready to be used
html_fact=df_fact.to_html(header=True,index=False,justify='center').replace('\n', '')
html_fact

'<table border="1" class="dataframe">  <thead>    <tr style="text-align: center;">      <th>Description</th>      <th>Value</th>    </tr>  </thead>  <tbody>    <tr>      <td>Equatorial Diameter:</td>      <td>6,792 km</td>    </tr>    <tr>      <td>Polar Diameter:</td>      <td>6,752 km</td>    </tr>    <tr>      <td>Mass:</td>      <td>6.42 x 10^23 kg (10.7% Earth)</td>    </tr>    <tr>      <td>Moons:</td>      <td>2 (Phobos &amp; Deimos)</td>    </tr>    <tr>      <td>Orbit Distance:</td>      <td>227,943,824 km (1.52 AU)</td>    </tr>    <tr>      <td>Orbit Period:</td>      <td>687 days (1.9 years)</td>    </tr>    <tr>      <td>Surface Temperature:</td>      <td>-153 to 20 °C</td>    </tr>    <tr>      <td>First Record:</td>      <td>2nd millennium BC</td>    </tr>    <tr>      <td>Recorded By:</td>      <td>Egyptian astronomers</td>    </tr>  </tbody></table>'

#### Mars Hemispheres

In [17]:
# use splinter to visit Hemisphere page
url_hemi = "https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars"
browser.visit(url_hemi)

In [18]:
# create BS object to scrape from
html_hemi=browser.html
soup_hemi=bs(html_hemi,'html.parser')

In [19]:
# find all "item" containing links
scrape_hemis=soup_hemi.find_all('div', class_="item")

In [20]:
# construct links using base_url_hemi and store into a list
base_url_hemi='https://astrogeology.usgs.gov'
link_hemis=[]
for hemi in scrape_hemis:
    link_hemis.append(base_url_hemi+hemi.a['href'])
print(f"There are totally {len(link_hemis)} links")

There are totally 4 links


In [21]:
#loop through the list of links to extract title and img links
hemisphere_image_urls=[]
for link in link_hemis:
    browser.visit(link)
    soup_img=bs(browser.html,'html.parser')
    img_title=soup_img.find('h2',class_="title").text
    img_url=soup_img.find('div',class_='downloads').ul.a["href"]
    hemisphere_image_urls.append({"title":img_title, "img_url":img_url})

hemisphere_image_urls

[{'title': 'Cerberus Hemisphere Enhanced',
  'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/cerberus_enhanced.tif/full.jpg'},
 {'title': 'Schiaparelli Hemisphere Enhanced',
  'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/schiaparelli_enhanced.tif/full.jpg'},
 {'title': 'Syrtis Major Hemisphere Enhanced',
  'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/syrtis_major_enhanced.tif/full.jpg'},
 {'title': 'Valles Marineris Hemisphere Enhanced',
  'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/valles_marineris_enhanced.tif/full.jpg'}]


### End of Step 1