In [9]:
import pandas as pd
import requests
from bs4 import BeautifulSoup

# Scrape the skyscraper center website
url = "https://www.skyscrapercenter.com/buildings"

# Get the webpage
response = requests.get(url)
soup = BeautifulSoup(response.content, 'html.parser')

# Try using pandas to read tables
tables = pd.read_html(url)
df = tables[0]

#Check actual column names
print(df.columns.tolist())

# The column names are really long, so let's find the ones we need
# We'll select columns that START WITH these words
name_col = [col for col in df.columns if col.startswith('Name')][0]
city_col = [col for col in df.columns if col.startswith('City')][0]
height_col = [col for col in df.columns if col.startswith('Height')][0]
floors_col = [col for col in df.columns if col.startswith('Floors')][0]

# Select just those columns
df_clean = df[[name_col, city_col, height_col, floors_col]].copy()

# Rename to simple names
df_clean.columns = ['building_name', 'city', 'height', 'floors']

# Remove rows with missing data
df_tidy = df_clean.dropna()

# Keep top 20
df_tidy = df_tidy.head(21)

# Save to CSV
df_tidy.to_csv('tallest_buildings.csv', index=False)

# Display to verify
print(df_tidy)


['Rank', 'Name', 'City', 'Status  Completed  Architecturally Topped Out  Structurally Topped Out  Under Construction  On Hold  Proposed  Vision  Never Completed  Demolished  Competition Entry  Canceled  Proposed Renovation  Under Renovation  Renovated  Under Demolition  See all status definitions', 'Completion  A Completed building must fulfill all the following criteria:  Topped out structurally and architecturally  Fully clad  Open for business, or at least partially occupiable', 'Height  The level of the lowest, significant, open-air, pedestrian entrance to the architectural top of the building, including spires, but not including antennae, signage, flagpoles or other functional-technical equipment.', 'Floors  Includes all above-ground floors, including the ground floor itself, and significant mezzanine floors / major mechanical plant floors, unless they have a significantly smaller floor area than the major floors below. Mechanical penthouses or plant rooms above the general roof a