# Module 11 Challenge
## Deliverable 1: Scrape Titles and Preview Text from Mars News

In [4]:
# Import Splinter and BeautifulSoup
from splinter import Browser
# Explanation:
# From splinter import Browser
    # This imports the Browser class from the splint library.
    # Splinter is a tool that automates browser actions, allowing us to vissit websites, interact with 
    # elements, and scrape data.
    # We'll use Browser to open a web page and extract its HTML.
from bs4 import BeautifulSoup
# Explanation:
# From bs4 import BeautifulSoup
    # This imports the BeautifulSoup class from the bs4 library.
    # BeautifulSoup is a tool that helps us parse HTML and XML documents.
    # We'll use BeautifulSoup to parse the HTML and extract the data we need.

In [5]:
browser = Browser('chrome')
# Explanation:
# browser = Browser('chrome')
    # This line sets up Splinter. It tells Python to use the Chrome browser.
    # We're creating an instance of a Splinter browser, which we'll use to scrape a website.
    # We're storing the browser instance in the variable "browser".

### Step 1: Visit the Website

1. Use automated browsing to visit the [Mars news site](https://static.bc-edx.com/data/web/mars_news/index.html). Inspect the page to identify which elements to scrape.

      > **Hint** To identify which elements to scrape, you might want to inspect the page by using Chrome DevTools.

In [6]:
# Visit the Mars news site
url = 'https://static.bc-edx.com/data/web/mars_news/index.html'
# Explanation:
# url = 'https://static.bc-edx.com/data/web/mars_news/index.html'
    # This line assigns the URL of the page we want to scrape to a variable called "url".
    # We'll use this variable to tell Splinter which site we want to visit.
    # The URL is the web address of the site we want to scrape.
browser.visit(url)
# Explanation:
# browser.visit(url)
    # This line tells Splinter to visit the URL we assigned to the "url" variable.
    # We're using the browser instance we created earlier to open the web page.
    # This is similar to typing the URL into the address bar of a web browser and pressing Enter.

### Step 2: Scrape the Website

Create a Beautiful Soup object and use it to extract text elements from the website.

In [7]:
# Create a Beautiful Soup object
html = browser.html
# Explanation:
# html = browser.html
    # This line assigns the HTML content of the page to a variable called "html".
    # We're using the browser instance we created earlier to extract the HTML content of the page.
    # This is similar to viewing the page source in a web browser.
soup = BeautifulSoup(html, "html.parser")
# Explanation:
# soup = BeautifulSoup(html, "html.parser")
    # This line creates a BeautifulSoup object from the HTML.
    # We're using the BeautifulSoup library to parse the HTML content of the page.
    # We're storing the parsed HTML in a variable called "soup".
    # The "html.parser" argument tells BeautifulSoup to use the HTML parser to parse the content.

In [None]:
# # Extract all the text elements
# titles = soup.find_all("div", class_="content_title")

# for title in titles:
#     print(title.get_text())

# paragraphs = soup.find_all("div", class_="article_teaser_body")

# for paragraph in paragraphs:
#     print(paragraph.get_text())

# Extract all article elements
articles = soup.find_all("div", class_="list_text")
# Explanation:
# articles = soup.find_all("div", class_="list_text")
    # This line extracts all the article elements from the page.
    # We're using the BeautifulSoup object we created earlier to find all the elements with the class "list_text".
    # We're storing the extracted elements in a variable called "articles".
    # Each article element contains the title and preview of an article.

# Loop through each article and extract title & preview together
for article in articles:
# Explanation:
# for article in articles:
    # This line loops through each article element in the "articles" variable.
    # We're using a for loop to iterate through the list of articles.
    # Each article element contains the title and preview of an article.
    # We'll extract the title and preview of each article and print them together for better readability
    title = article.find("div", class_="content_title").get_text()
    # Explanation:
    # title = article.find("div", class_="content_title").get_text()
        # This line extracts the title of the article.
        # We're using the article element to find the element with the class "content_title".
        # We're using the get_text() method to extract the text content of the element.
        # We're storing the extracted title in a variable called "title
    preview = article.find("div", class_="article_teaser_body").get_text()
    # Explanation:
    # preview = article.find("div", class_="article_teaser_body").get_text()
        # This line extracts the preview of the article.
        # We're using the article element to find the element with the class "article_teaser_body".
        # We're using the get_text() method to extract the text content of the element.
        # We're storing the extracted preview in a variable called "preview
    
    # Print them together for better readability
    print(f"Title: {title}")
    # Explanation:
    # print(f"Title: {title}")
        # This line prints the title of the article.
        # We're using an f-string to format the output.
        # We're printing the title of the article.
    print(f"Preview: {preview}")
    # Explanation:
    # print(f"Preview: {preview}")
        # This line prints the preview of the article.
        # We're using an f-string to format the output.
        # We're printing the preview of the article.
    print("-" * 80)  # Separator for better readability
    # Explanation:
    # print("-" * 80)  # Separator for better readability
        # This line prints a separator to improve readability.
        # We're using the "-" character to create a separator.
        # We're repeating the character 80 times to create a long separator.




Title: NASA's MAVEN Observes Martian Light Show Caused by Major Solar Storm
Preview: For the first time in its eight years orbiting Mars, NASA’s MAVEN mission witnessed two different types of ultraviolet aurorae simultaneously, the result of solar storms that began on Aug. 27.
--------------------------------------------------------------------------------
Title: NASA Prepares to Say 'Farewell' to InSight Spacecraft
Preview: A closer look at what goes into wrapping up the mission as the spacecraft’s power supply continues to dwindle.
--------------------------------------------------------------------------------
Title: NASA and ESA Agree on Next Steps to Return Mars Samples to Earth
Preview: The agency’s Perseverance rover will establish the first sample depot on Mars.
--------------------------------------------------------------------------------
Title: NASA's InSight Lander Detects Stunning Meteoroid Impact on Mars
Preview: The agency’s lander felt the ground shake during the impac

### Step 3: Store the Results

Extract the titles and preview text of the news articles that you scraped. Store the scraping results in Python data structures as follows:

* Store each title-and-preview pair in a Python dictionary. And, give each dictionary two keys: `title` and `preview`. An example is the following:

  ```python
  {'title': "NASA's MAVEN Observes Martian Light Show Caused by Major Solar Storm", 
   'preview': "For the first time in its eight years orbiting Mars, NASA’s MAVEN mission witnessed two different types of ultraviolet aurorae simultaneously, the result of solar storms that began on Aug. 27."
  }
  ```

* Store all the dictionaries in a Python list.

* Print the list in your notebook.

### Step 3: Store the Results

Extract the titles and preview text of the news articles that you scraped. Store the scraping results in Python data structures as follows:

* Store each title-and-preview pair in a Python dictionary. And, give each dictionary two keys: `title` and `preview`. An example is the following:

  ```python
  {'title': "NASA's MAVEN Observes Martian Light Show Caused by Major Solar Storm", 
   'preview': "For the first time in its eight years orbiting Mars, NASA’s MAVEN mission witnessed two different types of ultraviolet aurorae simultaneously, the result of solar storms that began on Aug. 27."
  }
  ```

* Store all the dictionaries in a Python list.

* Print the list in your notebook.

In [None]:
# Create an empty list to store the dictionaries
mars_news = []
# Explanation:
# mars_news = []
    # This line creates an empty list called "mars_news".
    # We'll use this list to store the dictionaries containing the title and preview of each article.

In [None]:
# Loop through the text elements
for article in articles:
    # Explanation:
    # for article in articles:
        # This line loops through each article element in the "articles" variable.
        # We're using a for loop to iterate through the list of articles.
        # Each article element contains the title and preview of an article.
    title = article.find("div", class_="content_title").get_text()
    # Explanation:
    # title = article.find("div", class_="content_title").get_text()
        # This line extracts the title of the article.
        # We're using the article element to find the element with the class "content_title".
        # We're using the get_text() method to extract the text content of the element.
        # We're storing the extracted title in a variable called "title".
    preview = article.find("div", class_="article_teaser_body").get_text()
    # Explanation:
    # preview = article.find("div", class_="article_teaser_body").get_text()
        # This line extracts the preview of the article.
        # We're using the article element to find the element with the class "article_teaser_body".
        # We're using the get_text() method to extract the text content of the element.
        # We're storing the extracted preview in a variable called "preview".
    article_dict = {"title": title, "preview": preview}
    # Explanation:
    # article_dict = {"title": title, "preview": preview}
        # This line creates a dictionary containing the title and preview of an article.
        # We're using the extracted title and preview to create a dictionary.
        # We're storing the dictionary in a variable called "article_dict".
    mars_news.append(article_dict)
    # Explanation:
    # mars_news.append(article_dict)
        # This line adds the dictionary to the "mars_news" list.
        # We're using the append() method to add the dictionary to the list.
        # The list will contain dictionaries for all the articles on the page.
# Extract the title and preview text from the elements
# Store each title and preview pair in a dictionary
# Add the dictionary to the list


In [None]:
# Print the list to confirm success
import pprint
pprint.pprint(mars_news)
# Explanation:
# import pprint
    # This line imports the pprint module.
    # pprint is a module that provides a capability to "pretty-print" data structures.
    # We'll use pprint to print the list of dictionaries in a more readable format.
# pprint.pprint(mars_news)
    # This line prints the list of dictionaries containing the title and preview of each article.
    # We're using the pprint() function to print the list in a more readable format.
    # The output will be easier to read and understand.

[{'title': "NASA's MAVEN Observes Martian Light Show Caused by Major Solar Storm",
  'preview': 'For the first time in its eight years orbiting Mars, NASA’s MAVEN mission witnessed two different types of ultraviolet aurorae simultaneously, the result of solar storms that began on Aug. 27.'},
 {'title': "NASA Prepares to Say 'Farewell' to InSight Spacecraft",
  'preview': 'A closer look at what goes into wrapping up the mission as the spacecraft’s power supply continues to dwindle.'},
 {'title': 'NASA and ESA Agree on Next Steps to Return Mars Samples to Earth',
  'preview': 'The agency’s Perseverance rover will establish the first sample depot on Mars.'},
 {'title': "NASA's InSight Lander Detects Stunning Meteoroid Impact on Mars",
  'preview': 'The agency’s lander felt the ground shake during the impact while cameras aboard the Mars Reconnaissance Orbiter spotted the yawning new crater from space.'},
 {'title': 'NASA To Host Briefing on InSight, Mars Reconnaissance Orbiter Findings',


In [None]:
browser.quit()
# Explanation:
# browser.quit()
    # This line closes the browser instance.
    # We're using the quit() method to close the browser.
    # This is similar to closing the browser window after we're done using it.
    # We're closing the browser to free up system resources and prevent memory leaks.   
    