# Module 11 Challenge
## Deliverable 1: Scrape Titles and Preview Text from Mars News

In [81]:
# Import Splinter and BeautifulSoup
from bs4 import BeautifulSoup
from splinter import Browser
import time
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
import json

In [82]:
browser = Browser('chrome')

### Step 1: Visit the Website

1. Use automated browsing to visit the [Mars news site](https://static.bc-edx.com/data/web/mars_news/index.html). Inspect the page to identify which elements to scrape.

      > **Hint** To identify which elements to scrape, you might want to inspect the page by using Chrome DevTools.

In [19]:
# Visit the Mars news site
url = 'https://mars.nasa.gov/news/'
browser.visit(url)

### Step 2: Scrape the Website

Create a Beautiful Soup object and use it to extract text elements from the website.

In [20]:
# Extract the HTML content
html = browser.html

# Create a Beautiful Soup object
soup = BeautifulSoup(html, 'html.parser')

In [21]:
# Extract all the text elements
text_elements = soup.get_text(separator=' ', strip=True)

### Step 3: Store the Results

Extract the titles and preview text of the news articles that you scraped. Store the scraping results in Python data structures as follows:

* Store each title-and-preview pair in a Python dictionary. And, give each dictionary two keys: `title` and `preview`. An example is the following:

  ```python
  {'title': "NASA's MAVEN Observes Martian Light Show Caused by Major Solar Storm", 
   'preview': "For the first time in its eight years orbiting Mars, NASA’s MAVEN mission witnessed two different types of ultraviolet aurorae simultaneously, the result of solar storms that began on Aug. 27."
  }
  ```

* Store all the dictionaries in a Python list.

* Print the list in your notebook.

In [80]:
# Function to visit a page and scrape news
def scrape_page(url):
    browser.visit(url)
    time.sleep(5)  # Wait for the page to load

    html = browser.html
    soup = BeautifulSoup(html, 'html.parser')

    # Find the container with all articles
    articles_container = soup.find('div', class_='grid-col-12')
    if articles_container:
        articles = articles_container.find_all('div', class_='hds-content-item')
        print(f"Found Articles: {len(articles)}")  # Number of articles found

        # Extract title and preview for each article
        for article in articles:
            title = article.find('h3', class_='heading-22 margin-0').get_text(strip=True)
            preview = article.find('p', class_='margin-top-0 margin-bottom-1').get_text(strip=True)
            news_item = {
                'title': title,
                'preview': preview
            }
            all_news.append(news_item)
            print(f"Title: {title}\nPreview: {preview}\n")  # Print each news item with a new line

    try:
        # Find the "Next" button and click it
        wait = WebDriverWait(browser.driver, 10)  # 10 seconds timeout
        next_button = wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, 'button.next.page-numbers')))
        
        # Scroll to the "Next" button to ensure it's clickable
        actions = ActionChains(browser.driver)
        actions.move_to_element(next_button).perform()
        
        # Click the "Next" button
        next_button.click()
        print("Clicked 'Next' button, moving to next page...")
        time.sleep(5)  # Wait for the new page to load

        # Extract next page URL from the button's aria-label
        next_page_url = browser.url  # The current URL should be updated after clicking
        return next_page_url

    except Exception as e:
        print(f"An error occurred: {e}")
        return None

# Initialize a list to store news items
all_news = []

# Start scraping from the initial page
next_page_url = 'https://mars.nasa.gov/news/'  # Initial URL

while next_page_url:
    next_page_url = scrape_page(next_page_url)

# Close the browser
browser.quit()

# Print the list of all news articles
print(all_news)


Found Articles: 10
Title: NASA Invites Media, Public to Attend Deep Space Food Challenge Finale
Preview: NASA invites the media and public to explore the nexus of space and food innovation at the agency’s Deep Space Food Challenge symposium and winners’ announcement at the Nationwide and Ohio Farm Bureau 4-H Center in Columbus, Ohio, on Friday,…

Title: NASA’s Perseverance Rover Scientists Find Intriguing Mars Rock
Preview: The six-wheeled geologist found a fascinating rock that has some indications it may have hosted microbial life billions of years ago, but further research is needed. A vein-filled rock is catching the eye of the science team of NASA’s Perseverance…

Title: UPDATED: 10 Things for Mars 10
Preview: Scientists from around the world are gathering this week in California to take stock of the state of science from Mars and discuss goals for the next steps in exploration of the Red Planet. In the spirit of Mars 10,…

Title: NASA’s Curiosity Rover Discovers a Surprise in a M

In [None]:
# Export the data to a JSON file
with open('mars_news.json', 'w') as json_file:
    json.dump(all_news, json_file, indent=4)

print("Data has been exported to mars_news.json")