1.  Scrape news headlines from a chosen website and return them as a list of dictionaries.

In [1]:
import requests
from bs4 import BeautifulSoup



In [2]:
# we define a clean_text function to clean the output of unwanted characters for better readability
def clean_text(str):
    return str.replace("\xa0", " ")

In [3]:
def get_news_headlines(url):
    
    HEADERS = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3'}

    response = requests.get(url, headers=HEADERS)

    # Check if the request was successful (status 200)
    
    if response.status_code == 200:
        # Analyze the page's HTML content
        soup = BeautifulSoup(response.text, "html.parser")
        
        titles = soup.findAll(["h3"])
        
        title_list = []

        for title in titles :
            news_i = {}
            news_i['title'] = clean_text(title.text.strip())

            title_list.append(news_i)
    
       
        print(title_list)
    else:
        print("The request failed. Status : ", response.status_code)

In [4]:
# we apply the function to  The Guardian website
url = "https://www.theguardian.com/international"

get_news_headlines(url)



2. a. Store scraped headlines in a CSV file.

In [5]:
import csv

def store_headlines_to_csv(url, csv_file_name) :
    
    HEADERS = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3'}

    response = requests.get(url, headers=HEADERS)

    if response.status_code == 200:
        soup = BeautifulSoup(response.text, "html.parser")
            
        titles = soup.findAll(["h3"])
          
          # we use the with open Python command to open our file securely
        with open(csv_file_name, 'w', newline='') as csvfile:
            
            # csv.writer is a class in the Python csv module that provides methods for writing lines inopen to create
            csv_writer = csv.writer(csvfile)

            csv_writer.writerow(['News headlines'])

            
            for title in titles:
                
                # we use this method to write each title to our csv file
                csv_writer.writerow([title.text.strip()])
                csv_writer.writerow('\n')


        print(f"The titles have been stored in the file : {csv_file_name}" )
    else:
        print("The request failed. Status : ", response.status_code)

In [6]:
# we apply the function to  The Guardian website
csv_file_name = "latest_news.csv"

store_headlines_to_csv(url, csv_file_name)

The titles have been stored in the file : latest_news.csv


2. b. Search headlines based on a keyword.

In [7]:
def search_headlines(keyword, csv_file_name) :
    
    # we define a list that will take the results of titles found with the given keyword
    
    results = []

    with open(csv_file_name, 'r') as csvfile:
        csv_reader = csv.reader(csvfile)
        
        # Skip header
        next(csv_reader)

        for row in csv_reader:
            title = row[0]

            # Check if the keyword is present in the title (case-insensitive)
            if keyword.lower() in title.lower():
                results.append(title)

    
        if results:
            print(f"Search results for the keyword : '{keyword}':")
            print('')
            for title in results:
                print(title)
                print('-----------------')
        else:
            print(f"No title found for the keyword :  '{keyword}'.")
            
    return results

In [8]:
# Use function to search for titles in CSV file

csv_file_name = "latest_news.csv"
keyword = 'gaza'  
results = search_headlines(keyword, csv_file_name)

Search results for the keyword : 'gaza':

Full reportTwo major Gaza hospitals close to new patients as heavy fighting rages
-----------------
‘It was nearly sunset when I saw death up close’: a Gaza diary
-----------------
CartoonNicola Jennings on Macron’s Gaza ceasefire call
-----------------
People in GazaHow have you been affected by the Israel-Hamas war?
-----------------
Full reportNetanyahu sets out uncompromising postwar vision as Israel pounds Gaza
-----------------
