In [5]:
import requests
from bs4 import BeautifulSoup
import csv
import datetime
import pandas as pd

The script:

1.Fetches(pobiera) the Hacker News homepage using requests
2.Parses the HTML using BeautifulSoup
3.Extracts all titles from the story listings
4.Saves them to a CSV file with a timestamp in the filename


When you run the script, it will:

-Show how many titles were found
-Save them to a CSV file in your current directory
-Display the filename of the saved CSV


In [2]:
def scrape_hackernews():
    # URL of Hacker News
    url = "https://news.ycombinator.com/"
    
    # Send a GET request to the URL
    response = requests.get(url)
    
    # Check if the request was successful
    if response.status_code != 200:
        print(f"Failed to retrieve the webpage: Status code {response.status_code}")
        return None
    
    # Parse the HTML content
    soup = BeautifulSoup(response.text, 'html.parser')
    
    # Find all story titles (they are in <span class="titleline"> elements)
    title_elements = soup.find_all('span', class_='titleline')
    
    # Extract the titles
    titles = []
    for title_element in title_elements:
        # The title is in the first a tag inside the span
        a_tag = title_element.find('a')
        if a_tag:
            titles.append(a_tag.text)
    
    return titles

def save_to_csv(titles):
    # Generate a filename with current date and time
    timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
    filename = f"hackernews_titles_{timestamp}.csv"
    
    # Write titles to CSV file
    with open(filename, 'w', newline='', encoding='utf-8') as csvfile:
        writer = csv.writer(csvfile)
        writer.writerow(['Title'])  # Header row
        for title in titles:
            writer.writerow([title])
    
    return filename

In [3]:
def main():
    print("Scraping Hacker News titles...")
    titles = scrape_hackernews()
    
    if titles:
        print(f"Found {len(titles)} titles.")
        filename = save_to_csv(titles)
        print(f"Titles saved to {filename}")
    else:
        print("No titles were found or there was an error.")

if __name__ == "__main__":
    main()

Scraping Hacker News titles...
Found 30 titles.
Titles saved to hackernews_titles_20250407_104106.csv


In [8]:
df = pd.read_csv('hackernews_titles_20250407_104106.csv')

In [9]:
df

Unnamed: 0,Title
0,Rsync replaced with openrsync on macOS Sequoia
1,AI masters Minecraft: DeepMind program finds d...
2,Glamorous Toolkit
3,Dark Mirror Ideologies
4,We asked camera companies why their RAW format...
5,Writing C for Curl
6,A Multiwavelength Look at Proxima Centauri's F...
7,New Theoretical Research Trends in Cartography...
8,"Standard Ebooks: liberated ebooks, carefully p..."
9,How the Atlantic's Jeffrey Goldberg Got Added ...
