In [9]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

In [10]:
url = "https://news.ycombinator.com/"
response = requests.get(url)

# check status code
if response.status_code == 200:
    print('status_code is 200 --> continue')
else:
    print("there is a problem")

status_code is 200 --> continue


In [11]:
soup = BeautifulSoup(response.text, "html.parser")
print("Web title:\n", soup.title.text)

Web title:
 Hacker News


In [12]:
main_rows = soup.find_all("tr", class_="athing")


ranks = []
titles = []
links = []
scores = []
authors = []
ages = []
comments_list = []


for row in main_rows:
    
    # Rank
    rank_tag = row.find("span", class_="rank")
    rank = rank_tag.text.strip().replace(".", "") if rank_tag else None

    # Title + Link
    titleline = row.find("span", class_="titleline")
    a_tag = titleline.find("a") if titleline else None
    title = a_tag.text.strip() if a_tag else None
    link = a_tag.get("href") if a_tag else None

    # standardize link (simple)
    if link and not link.startswith("http"):
        link = "https://news.ycombinator.com/" + link

    # Meta row (next row)
    meta_row = row.find_next_sibling("tr")

    # Score
    score_tag = meta_row.find("span", class_="score") if meta_row else None
    score_text = score_tag.text.strip() if score_tag else "0 points"

    # Author
    author_tag = meta_row.find("a", class_="hnuser") if meta_row else None
    author = author_tag.text.strip() if author_tag else None

    # Age
    age_tag = meta_row.find("span", class_="age") if meta_row else None
    age = age_tag.text.strip() if age_tag else None

    # Comments (last link usually)
    a_links = meta_row.find_all("a") if meta_row else []
    comments_text = a_links[-1].text.strip() if a_links else "0 comments"

    # store
    ranks.append(rank)
    titles.append(title)
    links.append(link)
    scores.append(score_text)
    authors.append(author)
    ages.append(age)
    comments_list.append(comments_text)

print("Lengths:", len(ranks), len(titles), len(links), len(scores), len(authors), len(ages), len(comments_list))




df = pd.DataFrame({
    "Rank": ranks,
    "Title": titles,
    "Link": links,
    "Score": scores,
    "Author": authors,
    "Age": ages,
    "Comments": comments_list
})


df.head()


Lengths: 30 30 30 30 30 30 30


Unnamed: 0,Rank,Title,Link,Score,Author,Age,Comments
0,1,Project Patchouli: Open-source electromagnetic...,https://patchouli.readthedocs.io/en/latest/,245 points,ffin,6 hours ago,23 comments
1,2,A closer look at a BGP anomaly in Venezuela,https://blog.cloudflare.com/bgp-route-leak-ven...,159 points,ChrisArchitect,5 hours ago,61 comments
2,3,The Napoleon Technique: Postponing things to i...,https://effectiviology.com/napoleon/,101 points,Khaine,5 hours ago,43 comments
3,4,Kernel bugs hide for 2 years on average. Some ...,https://pebblebed.com/blog/kernel-bugs,187 points,kmavm,9 hours ago,72 comments
4,5,Open Infrastructure Map,https://openinframap.org,198 points,efskap,8 hours ago,44 comments


In [13]:
df.to_csv("hackernews_frontpage.csv", index=False)
print("Saved: hackernews_frontpage.csv")

Saved: hackernews_frontpage.csv
