In [1]:
import requests

url = "https://github.com/topics"
headers = {"User-Agent": "Mozilla/5.0"}

r = requests.get(url, headers=headers, timeout=30)
print("Status code:", r.status_code) 
print("HTML preview:", r.text[:100]) 


with open("webpage.html", "w", encoding=r.encoding or "utf-8") as f:
    f.write(r.text)
print("Saved to webpage.html")


Status code: 200
HTML preview: 

<!DOCTYPE html>
<html
  lang="en"
  
  data-color-mode="auto" data-light-theme="light" data-dark-t
Saved to webpage.html


In [2]:
from bs4 import BeautifulSoup

with open("webpage.html", "r", encoding="utf-8") as f:
    html = f.read()

soup = BeautifulSoup(html, "html.parser")


In [3]:
title_selectors = [
    'a.no-underline.d-flex.flex-column', 
    'h3 a[href^="/topics/"]', 
    'a[href^="/topics/"][data-view-component="true"]'
]

desc_selectors = [
    'p.color-fg-muted',
    'div p',
]

def pick_first_nonempty(selectors):
    for css in selectors:
        found = soup.select(css)
        if found:
            return found
    return []

title_nodes = pick_first_nonempty(title_selectors)
desc_nodes  = pick_first_nonempty(desc_selectors)

titles = [t.get_text(strip=True) for t in title_nodes]

seen = set()
clean_titles = []
for t in titles:
    if t and t.lower() not in {"topics", "explore topics"} and t not in seen:
        clean_titles.append(t)
        seen.add(t)

descriptions = [d.get_text(strip=True) for d in desc_nodes if d.get_text(strip=True)]

print("Titles count:", len(clean_titles))
print(clean_titles[:10])

print("Descriptions count:", len(descriptions))
print(descriptions[:10])


Titles count: 16
['React NativeReact Native is a JavaScript mobile framework developed by Facebook.', 'ChromeChrome is a web browser from the tech company Google.', 'Node.jsNode.js is a tool for executing JavaScript in a variety of environments.', 'Awesome ListsAn awesome list is a list of awesome things curated by the community.', 'Code qualityAutomate your code review with style, quality, security, and test‑coverage checks when you need them.', 'CompilerCompilers are software that translate higher-level programming languages to lower-level languages (e.g. machine code).', 'CSSCascading Style Sheets (CSS) is a language used most often to style and improve upon the appearance of views.', 'DatabaseA database is a structured set of data held in a computer, usually a server.', 'Front endFront end is the programming and layout that people see and interact with.', 'JavaScriptJavaScript (JS) is a lightweight interpreted programming language with first-class functions.']
Descriptions count: 2

In [7]:
import pandas as pd

n = min(len(clean_titles), len(descriptions))
data = {
    "title": clean_titles[:n],
    "description": descriptions[:n]
}

df = pd.DataFrame(data)
print("Shape:", df.shape)
df.head(10)


Shape: (16, 2)


Unnamed: 0,title,description
0,React NativeReact Native is a JavaScript mobil...,"To see all available qualifiers, see ourdocume..."
1,ChromeChrome is a web browser from the tech co...,Browse popular topics on GitHub.
2,Node.jsNode.js is a tool for executing JavaScr...,React Native is a JavaScript mobile framework ...
3,Awesome ListsAn awesome list is a list of awes...,Chrome is a web browser from the tech company ...
4,Code qualityAutomate your code review with sty...,Node.js is a tool for executing JavaScript in ...
5,CompilerCompilers are software that translate ...,An awesome list is a list of awesome things cu...
6,CSSCascading Style Sheets (CSS) is a language ...,Chrome is a web browser from the tech company ...
7,DatabaseA database is a structured set of data...,"Automate your code review with style, quality,..."
8,Front endFront end is the programming and layo...,Compilers are software that translate higher-l...
9,JavaScriptJavaScript (JS) is a lightweight int...,Cascading Style Sheets (CSS) is a language use...


In [8]:
n = min(len(clean_titles), len(descriptions))
data = {
    "title": clean_titles[:n],
    "description": descriptions[:n]
}

df = pd.DataFrame(data)
print("Shape:", df.shape)
df.head(10)

Shape: (16, 2)


Unnamed: 0,title,description
0,React NativeReact Native is a JavaScript mobil...,"To see all available qualifiers, see ourdocume..."
1,ChromeChrome is a web browser from the tech co...,Browse popular topics on GitHub.
2,Node.jsNode.js is a tool for executing JavaScr...,React Native is a JavaScript mobile framework ...
3,Awesome ListsAn awesome list is a list of awes...,Chrome is a web browser from the tech company ...
4,Code qualityAutomate your code review with sty...,Node.js is a tool for executing JavaScript in ...
5,CompilerCompilers are software that translate ...,An awesome list is a list of awesome things cu...
6,CSSCascading Style Sheets (CSS) is a language ...,Chrome is a web browser from the tech company ...
7,DatabaseA database is a structured set of data...,"Automate your code review with style, quality,..."
8,Front endFront end is the programming and layo...,Compilers are software that translate higher-l...
9,JavaScriptJavaScript (JS) is a lightweight int...,Cascading Style Sheets (CSS) is a language use...
