## Scrape websites for the latest information about planet Mars

In [2]:
#import required libraries
import os
from splinter import Browser
from bs4 import BeautifulSoup as bs
import time
from selenium import webdriver  
import requests as req
import pandas as pd
from lxml import html

### Get news from https://mars.nasa.gov/news/

In [3]:
#Chromedriver execute
executable_path = {'executable_path': 'chromedriver.exe'}
browser = Browser('chrome', **executable_path, headless=True)
#visit first url
url="https://mars.nasa.gov/news/"
browser.visit(url)
#save html and assign parser
html=browser.html
soup = bs(html, "html.parser" )
#get first title from the url
news_title = soup.find('li',class_="slide").find('div', class_="content_title").text
print(news_title)
# get first paragraph under first title
news_p = soup.find('li',class_="slide").find('div', class_="article_teaser_body").text
print(news_p)

8 Martian Postcards to Celebrate Curiosity's Landing Anniversary
The NASA rover touched down eight years ago, on Aug. 5, 2012, and will soon be joined by a second rover, Perseverance.


### Get featured image from https://www.jpl.nasa.gov/spaceimages/

In [4]:
# url2
url2 = "https://www.jpl.nasa.gov/spaceimages/"
#visit url2 click on full image button, wait for a response
browser.visit(url2)
browser.find_by_id('full_image').click()
time.sleep(5)

#find and click on more info button
browser.links.find_by_partial_text('more info').click()

#get an image url
featured_image_url = browser.find_by_xpath("//img[@class='main_image']")._element.get_attribute("src")

print(featured_image_url)

https://www.jpl.nasa.gov/spaceimages/images/largesize/PIA16606_hires.jpg


### Get Mars weather from https://twitter.com/marswxreport?lang=en

In [9]:
#url3 for mars weather 
url3 = "https://twitter.com/marswxreport?lang=en"
#visit url save html and close

# splinter didn't give me expected result, so I switched to selenium
# browser.visit(url3)
# browser.url
# html = browser.html

driver = webdriver.Chrome()
driver.get(url3)
html = driver.page_source
driver.close()

#scrap tweets into a list
soup = bs(html, 'html.parser')
tweets = soup.find_all('div', class_="css-901oao r-hkyrab r-1qd0xha r-a023e6 r-16dba41 r-ad9z0x r-bcqeeo r-bnwqim r-qvutc0")


##### *Note: if result is not weather data change tweets[0] to tweets[1]

In [10]:
#find required tweet with weather info
mars_weather = tweets[0].text
print(mars_weather)

InSight sol 598 (2020-08-01) low -91.6ºC (-132.9ºF) high -15.1ºC (4.8ºF)
winds from the WNW at 7.1 m/s (15.8 mph) gusting to 19.2 m/s (43.0 mph)
pressure at 7.90 hPa


### Get Mars Facts table structure from https://space-facts.com/mars/

In [11]:
# url4 mars facts
url4 = "https://space-facts.com/mars/"

# use pandas to find all tables on website
df_list = pd.read_html(url4)
#pick first table
table_df = df_list[0]
#rename columns
table_df = table_df.rename(columns={0: 'Parameter', 1: 'Value'})
#
html_table = table_df.to_html()

##### *Note: table structure used in index.html 

In [12]:
# clean html_table from '\n' values
print(html_table)

<table border="1" class="dataframe">
  <thead>
    <tr style="text-align: right;">
      <th></th>
      <th>Parameter</th>
      <th>Value</th>
    </tr>
  </thead>
  <tbody>
    <tr>
      <th>0</th>
      <td>Equatorial Diameter:</td>
      <td>6,792 km</td>
    </tr>
    <tr>
      <th>1</th>
      <td>Polar Diameter:</td>
      <td>6,752 km</td>
    </tr>
    <tr>
      <th>2</th>
      <td>Mass:</td>
      <td>6.39 × 10^23 kg (0.11 Earths)</td>
    </tr>
    <tr>
      <th>3</th>
      <td>Moons:</td>
      <td>2 (Phobos &amp; Deimos)</td>
    </tr>
    <tr>
      <th>4</th>
      <td>Orbit Distance:</td>
      <td>227,943,824 km (1.38 AU)</td>
    </tr>
    <tr>
      <th>5</th>
      <td>Orbit Period:</td>
      <td>687 days (1.9 years)</td>
    </tr>
    <tr>
      <th>6</th>
      <td>Surface Temperature:</td>
      <td>-87 to -5 °C</td>
    </tr>
    <tr>
      <th>7</th>
      <td>First Record:</td>
      <td>2nd millennium BC</td>
    </tr>
    <tr>
      <th>8</th>
      

### Get Mars Facts table data from https://space-facts.com/mars/

In [13]:
browser.visit(url4)

html = browser.html
soup = bs(html, "html.parser")

tables = soup.findChildren('table')
table_data=[]
my_table = tables[0]
rows = my_table.findChildren(['th', 'tr'])   

for row in rows:
    title = row.find('td', class_="column-1").text.strip()
    value = row.find('td', class_="column-2").text.strip()
    table_data.append({'title': title, 'value': value})

table_data


[{'title': 'Equatorial Diameter:', 'value': '6,792 km'},
 {'title': 'Polar Diameter:', 'value': '6,752 km'},
 {'title': 'Mass:', 'value': '6.39 × 10^23 kg (0.11 Earths)'},
 {'title': 'Moons:', 'value': '2 (Phobos & Deimos)'},
 {'title': 'Orbit Distance:', 'value': '227,943,824 km (1.38 AU)'},
 {'title': 'Orbit Period:', 'value': '687 days (1.9 years)'},
 {'title': 'Surface Temperature:', 'value': '-87 to -5 °C'},
 {'title': 'First Record:', 'value': '2nd millennium BC'},
 {'title': 'Recorded By:', 'value': 'Egyptian astronomers'}]

### Get Mars hemispheres images from https://astrogeology.usgs.gov

In [14]:
#url5 mars hemispheres
url5 = "https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars"

browser.visit(url5)
browser.url
html = browser.html
#assign parser, scrap list of images
soup = bs(html, "html.parser")
images = soup.find_all('div', class_="description")
#use link as f-string
link = f"https://astrogeology.usgs.gov"

In [15]:
# loop thorugh images list, pick href and add it to link, visit new link, scrap for image url and title, append to a list:
hemisphere_image_urls = []
for image in images:
    img_link = f"{link}{image.find('a')['href']}"
    browser.visit(img_link)
    img_url = browser.find_by_xpath("//img[@class='wide-image']")._element.get_attribute("src")
    title = browser.find_by_xpath("//h2[@class='title']").text
    title = title.rstrip('Enhanced')
    hemisphere_image_urls.append({"title" : title, "img_url" : img_url})


### Insert all information about planet Mars to a dictionary 

In [22]:
#create a dictionary for DB
mars_website_dict={
'news_title': news_title, 'news_paragraph': news_p, 'featured_image_url': featured_image_url, 
'mars_weather': mars_weather, 'row1_title': table_data[0]['title'], 'row1_value': table_data[0]['value'], 'row2_title': table_data[1]['title'], 'row2_value': table_data[1]['value'], 'row3_title': table_data[2]['title'], 'row3_value': table_data[2]['value'], 'row4_title': table_data[3]['title'], 'row4_value': table_data[3]['value'], 'row5_title': table_data[4]['title'], 'row5_value': table_data[4]['value'], 'row6_title': table_data[5]['title'], 'row6_value': table_data[5]['value'], 'row7_title': table_data[6]['title'], 'row7_value': table_data[6]['value'], 'row8_title': table_data[7]['title'], 'row8_value': table_data[7]['value'], 'row9_title': table_data[8]['title'], 'row9_value': table_data[8]['value'], 
'url1_title': hemisphere_image_urls[0]['title'], 'url1_img': hemisphere_image_urls[0]['img_url'],
'url2_title': hemisphere_image_urls[1]['title'], 'url2_img': hemisphere_image_urls[1]['img_url'],
'url3_title': hemisphere_image_urls[2]['title'], 'url3_img': hemisphere_image_urls[2]['img_url'],
'url4_title': hemisphere_image_urls[3]['title'], 'url4_img': hemisphere_image_urls[3]['img_url']              
}


In [23]:
mars_website_dict

{'news_title': "8 Martian Postcards to Celebrate Curiosity's Landing Anniversary",
 'news_paragraph': 'The NASA rover touched down eight years ago, on Aug. 5, 2012, and will soon be joined by a second rover, Perseverance.',
 'featured_image_url': 'https://www.jpl.nasa.gov/spaceimages/images/largesize/PIA16606_hires.jpg',
 'mars_weather': 'InSight sol 598 (2020-08-01) low -91.6ºC (-132.9ºF) high -15.1ºC (4.8ºF)\nwinds from the WNW at 7.1 m/s (15.8 mph) gusting to 19.2 m/s (43.0 mph)\npressure at 7.90 hPa',
 'row1_title': 'Equatorial Diameter:',
 'row1_value': '6,792 km',
 'row2_title': 'Polar Diameter:',
 'row2_value': '6,752 km',
 'row3_title': 'Mass:',
 'row3_value': '6.39 × 10^23 kg (0.11 Earths)',
 'row4_title': 'Moons:',
 'row4_value': '2 (Phobos & Deimos)',
 'row5_title': 'Orbit Distance:',
 'row5_value': '227,943,824 km (1.38 AU)',
 'row6_title': 'Orbit Period:',
 'row6_value': '687 days (1.9 years)',
 'row7_title': 'Surface Temperature:',
 'row7_value': '-87 to -5 °C',
 'row8_ti