In [5]:
from splinter import Browser
from bs4 import BeautifulSoup as Soup
import pandas as pd
from webdriver_manager.chrome import ChromeDriverManager

In [14]:
#Splinter setup
executablepath = {"executable_path": ChromeDriverManager().install()}
browser = Browser("chrome", **executablepath, headless=False)



Current google-chrome version is 90.0.4430
Get LATEST driver version for 90.0.4430
Driver [C:\Users\Clovis\.wdm\drivers\chromedriver\win32\90.0.4430.24\chromedriver.exe] found in cache


## Nasa Mars News Site

In [9]:
#Go to the URL for the news site
url = "https://www.redplanetscience.com"
browser.visit(url)

#Create a Soup object
html = browser.html
news_soup = Soup(html, "html.parser")

#Find the list text
element = news_soup.select_one("div.list_text")
print(element)

<div class="list_text">
<div class="list_date">June 1, 2021</div>
<div class="content_title">Mars Is Getting a New Robotic Meteorologist</div>
<div class="article_teaser_body">Sensors on NASA's Perseverance will help prepare for future human exploration by taking weather measurements and studying dust particles.</div>
</div>


In [11]:
#Find the news title
news_title = element.find("div", class_="content_title").get_text()
print(news_title)

Mars Is Getting a New Robotic Meteorologist


In [12]:
news_paragraph = element.find("div", class_="article_teaser_body").get_text()
print(news_paragraph)

Sensors on NASA's Perseverance will help prepare for future human exploration by taking weather measurements and studying dust particles.


## JPL Mars Space Images

In [37]:
#Go to the URL for the images site
url = "https://www.spaceimages-mars.com"
browser.visit(url)

html = browser.html

In [40]:
#Find the image button
image_element = browser.find_by_tag("button")[1]
image_element.click()

In [41]:
#After clicking the image
#Create a Soup object
html = browser.html
image_soup = Soup(html, "html.parser")

In [42]:
#Find the image URL
image_url = image_soup.find('img', class_='fancybox-image').get('src')
print(image_url)

image/featured/mars3.jpg


In [44]:
img_url = f"{url}/{image_url}"
print(img_url)

https://www.spaceimages-mars.com/image/featured/mars3.jpg


## Mars Facts

In [60]:
#Use Pandas to read the page
df = pd.read_html("https://www.galaxyfacts-mars.com")[0]
df

Unnamed: 0,0,1,2
0,Mars - Earth Comparison,Mars,Earth
1,Diameter:,"6,779 km","12,742 km"
2,Mass:,6.39 × 10^23 kg,5.97 × 10^24 kg
3,Moons:,2,1
4,Distance from Sun:,"227,943,824 km","149,598,262 km"
5,Length of Year:,687 Earth days,365.24 days
6,Temperature:,-87 to -5 °C,-88 to 58°C


In [61]:
#Re-configure the data frame
df.columns = ["Description", "Mars", "Earth"]
df.set_index("Description", inplace=True)
df

Unnamed: 0_level_0,Mars,Earth
Description,Unnamed: 1_level_1,Unnamed: 2_level_1
Mars - Earth Comparison,Mars,Earth
Diameter:,"6,779 km","12,742 km"
Mass:,6.39 × 10^23 kg,5.97 × 10^24 kg
Moons:,2,1
Distance from Sun:,"227,943,824 km","149,598,262 km"
Length of Year:,687 Earth days,365.24 days
Temperature:,-87 to -5 °C,-88 to 58°C


In [62]:
df.to_html()

'<table border="1" class="dataframe">\n  <thead>\n    <tr style="text-align: right;">\n      <th></th>\n      <th>Mars</th>\n      <th>Earth</th>\n    </tr>\n    <tr>\n      <th>Description</th>\n      <th></th>\n      <th></th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>Mars - Earth Comparison</th>\n      <td>Mars</td>\n      <td>Earth</td>\n    </tr>\n    <tr>\n      <th>Diameter:</th>\n      <td>6,779 km</td>\n      <td>12,742 km</td>\n    </tr>\n    <tr>\n      <th>Mass:</th>\n      <td>6.39 × 10^23 kg</td>\n      <td>5.97 × 10^24 kg</td>\n    </tr>\n    <tr>\n      <th>Moons:</th>\n      <td>2</td>\n      <td>1</td>\n    </tr>\n    <tr>\n      <th>Distance from Sun:</th>\n      <td>227,943,824 km</td>\n      <td>149,598,262 km</td>\n    </tr>\n    <tr>\n      <th>Length of Year:</th>\n      <td>687 Earth days</td>\n      <td>365.24 days</td>\n    </tr>\n    <tr>\n      <th>Temperature:</th>\n      <td>-87 to -5 °C</td>\n      <td>-88 to 58°C</td>\n    </tr>\n  </tbody>

## Mars Hemispheres

In [85]:
#Go to the URL for the images site
url = "https://www.marshemispheres.com"
browser.visit(url)

html = browser.html

In [86]:
#Create a list to hold images and titles
hemisphere_urls = []

links = browser.find_by_css("a.product-item img")
for link in range(len(links)):
    hemisphere_dict = {}
    
    #Click the picture
    browser.find_by_css("a.product-item img")[link].click()
    
    #Find the text marked "sample"
    sample = browser.links.find_by_text("Sample").first
    #Find href
    hemisphere_dict["img_url"] = sample["href"]
    
    #Find headings and store them as titles
    hemisphere_dict["title"] = browser.find_by_css("h2.title").text
    
    #Append dictionary to list of URLs
    hemisphere_urls.append(hemisphere_dict)
    
    #Go back to main page
    browser.back()

In [87]:
hemisphere_urls

[{'img_url': 'https://www.marshemispheres.com/images/full.jpg',
  'title': 'Cerberus Hemisphere Enhanced'},
 {'img_url': 'https://www.marshemispheres.com/images/schiaparelli_enhanced-full.jpg',
  'title': 'Schiaparelli Hemisphere Enhanced'},
 {'img_url': 'https://www.marshemispheres.com/images/syrtis_major_enhanced-full.jpg',
  'title': 'Syrtis Major Hemisphere Enhanced'},
 {'img_url': 'https://www.marshemispheres.com/images/valles_marineris_enhanced-full.jpg',
  'title': 'Valles Marineris Hemisphere Enhanced'}]