1.  Scrape news headlines from a chosen website and return them as a list of dictionaries.

In [11]:
import requests
from bs4 import BeautifulSoup



In [12]:
# we define a clean_text function to clean the output of unwanted characters for better readability
def clean_text(str):
    return str.replace("\xa0", " ")

In [13]:
def get_news_headlines(url):
    
    HEADERS = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3'}

    response = requests.get(url, headers=HEADERS)

    # Check if the request was successful (status 200)
    
    if response.status_code == 200:
        # Analyze the page's HTML content
        soup = BeautifulSoup(response.text, "html.parser")
        
        # titles = soup.findAll(["a","h1", "h2","h3"])
        #to make better use of our working example with the guardian site,
        #  we remove the tags "a","h1", "h2" so as not to retrieve unwanted elements from our dictionary.
        
        titles = soup.findAll(["h3"])
        
        # the following list will retrieve all titles found
        title_list = []

        for title in titles :
            # we create the dictionary that will retrieve the title for each data item
            news_i = {}
            # apply the clean_text function to clean the text of "\xa0" characters
            news_i['title'] = clean_text(title.text.strip())
            # each title is added to the list
            title_list.append(news_i)
    
        #display
        print(title_list)
    else:
        print("The request failed. Status : ", response.status_code)

In [14]:
# we apply the function to  The Guardian website
url = "https://www.theguardian.com/international"

get_news_headlines(url)



2. a. Store scraped headlines in a CSV file.

In [15]:
import csv

def store_headlines_to_csv(url, csv_file_name) :
    
    HEADERS = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3'}

    response = requests.get(url, headers=HEADERS)

    if response.status_code == 200:
        soup = BeautifulSoup(response.text, "html.parser")
            
        
        titles = soup.findAll(["h3"]) 
          # we use the with open Python command to open our file securely
        with open(csv_file_name, 'w', newline='') as csvfile:
            
            # csv.writer is a class in the Python csv module that provides methods for writing lines inopen to create
            csv_writer = csv.writer(csvfile)
            # our first line will be the title of our file
            csv_writer.writerow(['NEWS HEADLINES'])
            # skip a line for better readability
            csv_writer.writerow('\n')
            
            for title in titles:
                
                # we use this method to write each title to our csv file
                csv_writer.writerow([title.text.strip()])
                # skip a line for better readability
                csv_writer.writerow('\n')

        #display
        print(f"The titles have been stored in the file : {csv_file_name}" )
    else:
        print("The request failed. Status : ", response.status_code)

In [16]:
# we apply the function to  The Guardian website
csv_file_name = "latest_news.csv"

store_headlines_to_csv(url, csv_file_name)

The titles have been stored in the file : latest_news.csv


2. b. Search headlines based on a keyword.

In [17]:
def search_headlines(keyword, csv_file_name) :
    
    # we define a list that will take the results of titles found with the given keyword
    
    results = []

    # open the file in read mode
    with open(csv_file_name, 'r') as csvfile:
        csv_reader = csv.reader(csvfile)
        
        # Skip header
        next(csv_reader)

        # browse the lines in our file
        for row in csv_reader:
            title = row[0]

            # Check if the keyword is present in the title (case-insensitive)
            if keyword.lower() in title.lower():
                results.append(title)
                # the title found is added to the list


        # for each result obtained, the title is displayed
        if results:
            print(f"Search results for the keyword : '{keyword}':")
            print('')
            for title in results:
                print(title)
                print('-----------------')
        else:
            print(f"No title found for the keyword :  '{keyword}'.")
            
    return results

In [18]:
# Use function to search for titles in CSV file

csv_file_name = "latest_news.csv"
keyword = 'gaza'  
results = search_headlines(keyword, csv_file_name)

Search results for the keyword : 'gaza':

Full reportTwo major Gaza hospitals close to new patients as heavy fighting rages
-----------------
GazaIsraeli forces at gates of main hospital with hundreds trapped
-----------------
People in GazaHow have you been affected by the Israel-Hamas war?
-----------------
Full reportNetanyahu sets out uncompromising postwar vision as Israel pounds Gaza
-----------------


3. implementation of a graphical interface

In [20]:
import tkinter as tk

# function to exit the window
def exit():
    window.destroy() 

# function to search for titles based on a keyword and insert them in our listbox
def search():
    # keyword entry
    keyword = keyword_entry.get()
    # results retrieves all titles found using the keyword
    results = search_headlines(keyword, csv_file_name = "latest_news.csv")
    #To delete items from the Listbox widget, we use the delete(0, END) method.
    result_listbox.delete(0, tk.END)
    
    # our listbox retrieves all titles found using the keyword
    if results:
        for title in results:
            result_listbox.insert(tk.END, title)
            result_listbox.insert(tk.END, ' ')
            # skip a line for better readability
    else:
        result_listbox.insert(tk.END, "No results found.")
        
        


# Create main window
window = tk.Tk()

# we define the size of our window
window.geometry("1000x1000")

# we define the title of our window
window.title("HEADLINES OF THE DAY ")


# reading the List of news headlines
with open(csv_file_name, 'r') as csvfile:
    #create reader
    csv_reader = csv.reader(csvfile)
    next(csv_reader)  # skip header
    
    # Iterate over CSV file
    titles = [row[0] for row in csv_reader]

# Create a listbox to display titles
title_listbox = tk.Listbox(window, height=20,width=100)

# Iterate over our titles to insert them in our title_listbox
for title in titles:
    title_listbox.insert(tk.END, title)
title_listbox.pack()
# the pack() method declares the position of widgets in relation to each other.

# Search label and input field
keyword_label = tk.Label(window, text="search by keyword:")
keyword_label.pack()

# to accept text strings as keywords from a user.
keyword_entry = tk.Entry(window)
keyword_entry.pack()

# Search button
rechercher_bouton = tk.Button(window, text="Search", command=search)
rechercher_bouton.pack()

# List of results
result_listbox = tk.Listbox(window, width=100)
result_listbox.pack()

# Button to exit the window using the exit() function
btn_exit = tk.Button(window,text="Exit application",command=exit)
btn_exit.pack()

# Window execution
window.mainloop()


Search results for the keyword : 'evacuation':

LiveEvacuation attempt at al-Quds hospital fails due to ‘continuing shelling’, Palestine Red Crescent Society says
-----------------
