# Get data

## Get ipinfo domains

In [2]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

url_template = "https://ipinfo.io/hosting/{number}"
number = 1
rows = []
while True:
    url = url_template.format(number=number)
    print(f"Fetching page {number} from {url}")
    resp = requests.get(url, allow_redirects=False)
    if resp.status_code == 302:
        break
    if resp.status_code != 200:
        print(f"Error: {resp.status_code}")
        break
    soup = BeautifulSoup(resp.text, 'html.parser')
    table = soup.find("table", class_="table table-responsive")
    for tr in table.find_all("tr"):
        tds = tr.find_all("td")
        if len(tds) < 2:  # Header Ã¼berspringen
            continue
        asn_cell = tds[0]
        asn_text = asn_cell.find("a").get_text(strip=True)  # z.B. "AS13335"
        asn_num = int(asn_text.removeprefix("AS"))
        count_text = tds[1].get_text(strip=True)  # "63,477,595"
        domains = int(count_text.replace(",", ""))
        rows.append({"ASN": asn_num, "domains": domains})
    number += 1
    print(f"Collected {len(rows)} rows so far.")
    print("Last row:", rows[-1])

domains_df = pd.DataFrame(rows)
domains_df.head()

Fetching page 1 from https://ipinfo.io/hosting/1
Collected 2500 rows so far.
Last row: {'ASN': 21472, 'domains': 2540}
Fetching page 2 from https://ipinfo.io/hosting/2
Collected 5000 rows so far.
Last row: {'ASN': 11595, 'domains': 605}
Fetching page 3 from https://ipinfo.io/hosting/3
Collected 7500 rows so far.
Last row: {'ASN': 201201, 'domains': 248}
Fetching page 4 from https://ipinfo.io/hosting/4
Collected 10000 rows so far.
Last row: {'ASN': 41811, 'domains': 128}
Fetching page 5 from https://ipinfo.io/hosting/5
Collected 12500 rows so far.
Last row: {'ASN': 399358, 'domains': 75}
Fetching page 6 from https://ipinfo.io/hosting/6
Collected 15000 rows so far.
Last row: {'ASN': 9555, 'domains': 48}
Fetching page 7 from https://ipinfo.io/hosting/7
Collected 17500 rows so far.
Last row: {'ASN': 264562, 'domains': 32}
Fetching page 8 from https://ipinfo.io/hosting/8
Collected 20000 rows so far.
Last row: {'ASN': 150788, 'domains': 22}
Fetching page 9 from https://ipinfo.io/hosting/9
Co

Unnamed: 0,ASN,domains
0,16509,139276485
1,13335,63477595
2,52925,32915972
3,396982,24543491
4,47846,17833760


# Export data

In [3]:
domains_df.to_csv("ipinfo_domains.csv", index=False)