In [None]:
# scrape data and save to csv

# Import necessary libraries
import requests
from bs4 import BeautifulSoup
import pandas as pd

# URL of the page to scrape
url = "https://rowlandward.org/rwrb-table/"

# Send a GET request to the webpage
response = requests.get(url)

# Check if the request was successful (status code 200)
if response.status_code == 200:
    # Parse the HTML content
    soup = BeautifulSoup(response.content, 'html.parser')
    
    # Find the table - assuming it's the main table on the page
    table = soup.find('table')
    
    # Extract table headers
    headers = []
    for th in table.find_all('th'):
        headers.append(th.text.strip())
    
    # Extract table rows
    rows = []
    for tr in table.find_all('tr')[1:]:  # Skip header row
        row = []
        for td in tr.find_all('td'):
            row.append(td.text.strip())
        if row:  # Only append non-empty rows
            rows.append(row)
    
    # Create a DataFrame
    df = pd.DataFrame(rows, columns=headers)
    
    # Display the first few rows
    print("First 5 rows of the scraped data:")
    display(df.head())
    
    # Display basic info about the dataset
    print("\nDataset Info:")
    print(f"Number of rows: {len(df)}")
    print(f"Number of columns: {len(df.columns)}")
    
    # Optional: Save to CSV file
    df.to_csv('rowlandward_data.csv', index=False)
    print("\nData saved to 'rowlandward_data.csv'")
    
else:
    print(f"Failed to retrieve the webpage. Status code: {response.status_code}")

First 5 rows of the scraped data:


Unnamed: 0,L-left,L-right,C-left,C-right,T to T,Locality,Date,Hunter/Owner,Current Owner,Method
0,73 7/8,71 5/8,11,11,27 7/8,"Mozambique, Save R.",1963,Dr. Carlo Caldesi,Dr. Carlo Caldesi,PU
1,62 5/8,72 5/8,10 1/8,0,23 5/8,"Namibia, Hochfeld",2001,Johann Rohrer,Johann Rohrer,R
2,72 4/8,57 7/8,9 3/8,10 1/8,33,"RSA, Limpopo Prov., Hoedspruit",2010,Dewald Joubert,Dewald Joubert,R
3,66 2/8,69 2/8,11,0,40,"RSA, E. Transvaal",1916,James Cole Rous,James Cole Rous,R
4,68 7/8,68,11 4/8,11 3/8,22 7/8,"RSA, Mpumalanga, Marble Hall",1998,Nico Coetzee,Nico Coetzee,R



Dataset Info:
Number of rows: 1991
Number of columns: 10

Data saved to 'rowlandward_data.csv'
