# Extract

In [1]:
import pandas as pd

In [2]:
url = 'https://en.wikipedia.org/wiki/List_of_tallest_structures_in_Thailand'

In [3]:
df = pd.read_html(url)

In [4]:
len(df)

16

In [5]:
df = df[0]

In [6]:
df.head()

Unnamed: 0,Rank,Building,Image,Height,Height.1,Floors,Year,Location,Notes
0,1,Magnolias Waterfront Residences Iconsiam,,316 m,"1,037 ft",70,2018,"Bangkok .mw-parser-output .geo-default,.mw-par...",Tallest building in Bangkok and Thailand.[9][10]
1,2,King Power MahaNakhon,,314 m,"1,030 ft",79,2016,Bangkok 13°43′24″N 100°31′42″E﻿ / ﻿13.72340°N ...,Tallest building in Thailand by height to roof...
2,3,Baiyoke Tower II,,304 m,997 ft,85,1997,Bangkok 13°45′17″N 100°32′25″E﻿ / ﻿13.75459°N ...,Tallest building in Thailand from 1997 to 2016...
3,4,Four Seasons Private Residences,,301 m,988 ft,74,2019,Bangkok 13°42′45″N 100°30′37″E﻿ / ﻿13.71257°N ...,Official height is 301.0m (measured to top of ...
4,5,One City Centre,,275.8 m,905 ft,61,2023,Bangkok 13°44′36″N 100°32′48″E﻿ / ﻿13.74327°N ...,Tallest office building in Thailand.[19][20]


In [7]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 180 entries, 0 to 179
Data columns (total 9 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   Rank      180 non-null    object 
 1   Building  180 non-null    object 
 2   Image     0 non-null      float64
 3   Height    180 non-null    object 
 4   Height.1  180 non-null    object 
 5   Floors    180 non-null    int64  
 6   Year      180 non-null    int64  
 7   Location  180 non-null    object 
 8   Notes     151 non-null    object 
dtypes: float64(1), int64(2), object(6)
memory usage: 12.8+ KB


# Transform 

In [8]:
#delete column image
df.drop('Image', axis=1, inplace=True)

In [9]:
df.head()

Unnamed: 0,Rank,Building,Height,Height.1,Floors,Year,Location,Notes
0,1,Magnolias Waterfront Residences Iconsiam,316 m,"1,037 ft",70,2018,"Bangkok .mw-parser-output .geo-default,.mw-par...",Tallest building in Bangkok and Thailand.[9][10]
1,2,King Power MahaNakhon,314 m,"1,030 ft",79,2016,Bangkok 13°43′24″N 100°31′42″E﻿ / ﻿13.72340°N ...,Tallest building in Thailand by height to roof...
2,3,Baiyoke Tower II,304 m,997 ft,85,1997,Bangkok 13°45′17″N 100°32′25″E﻿ / ﻿13.75459°N ...,Tallest building in Thailand from 1997 to 2016...
3,4,Four Seasons Private Residences,301 m,988 ft,74,2019,Bangkok 13°42′45″N 100°30′37″E﻿ / ﻿13.71257°N ...,Official height is 301.0m (measured to top of ...
4,5,One City Centre,275.8 m,905 ft,61,2023,Bangkok 13°44′36″N 100°32′48″E﻿ / ﻿13.74327°N ...,Tallest office building in Thailand.[19][20]


In [10]:
#clean Rank column
df["Rank"] = df["Rank"].str.replace('\D','',regex=True).astype('int')

In [11]:
#clean Height column
df["Height"] = df["Height"].str.replace(r'[^\d.]','',regex=True).astype('float')

In [12]:
#clean Height.1 column
df["Height.1"] = df["Height.1"].str.replace('\D','',regex=True).astype('int')

In [13]:
#add Province column
df['Province']= df['Location'].str.split('13',expand = True)[0]
df['Province']= df['Province'].str.split('.',expand = True)[0]

In [14]:
#clean Notes column
df['Notes'] = df["Notes"].str.split('[',expand = True)[0]

In [15]:
df[df["Notes"].isna()].head()

Unnamed: 0,Rank,Building,Height,Height.1,Floors,Year,Location,Notes,Province
59,60,Park Origin Phrom Phong Tower II,186.0,611,50,2017,Bangkok 13°43′33″N 100°33′56″E﻿ / ﻿13.72574°N ...,,Bangkok
61,62,Park Origin Phrom Phong Tower V,184.0,603,48,2018,Bangkok 13°43′29″N 100°33′57″E﻿ / ﻿13.72464°N ...,,Bangkok
67,66,Bhiraj Tower at EmQuartier,180.0,591,45,2015,Bangkok 13°43′52″N 100°34′11″E﻿ / ﻿13.73117°N ...,,Bangkok
88,89,Ashton Asok-Rama 9 Omega Tower,168.0,552,49,2020,Bangkok 13°45′19″N 100°33′52″E﻿ / ﻿13.75540°N ...,,Bangkok
101,100,The Waterford Diamond,165.0,541,46,1999,Bangkok 13°43′38″N 100°34′25″E﻿ / ﻿13.72709°N ...,,Bangkok


In [16]:
df["Notes"] = df["Notes"].fillna(' ')

In [17]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 180 entries, 0 to 179
Data columns (total 9 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   Rank      180 non-null    int32  
 1   Building  180 non-null    object 
 2   Height    180 non-null    float64
 3   Height.1  180 non-null    int32  
 4   Floors    180 non-null    int64  
 5   Year      180 non-null    int64  
 6   Location  180 non-null    object 
 7   Notes     180 non-null    object 
 8   Province  180 non-null    object 
dtypes: float64(1), int32(2), int64(2), object(4)
memory usage: 11.4+ KB


In [18]:
#set index
df.set_index('Rank', inplace=True)

In [19]:
df.rename(columns={'Height':'Height_m','Height.1':'Height_ft'}, inplace=True)

In [20]:
df.head()

Unnamed: 0_level_0,Building,Height_m,Height_ft,Floors,Year,Location,Notes,Province
Rank,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
1,Magnolias Waterfront Residences Iconsiam,316.0,1037,70,2018,"Bangkok .mw-parser-output .geo-default,.mw-par...",Tallest building in Bangkok and Thailand.,Bangkok
2,King Power MahaNakhon,314.0,1030,79,2016,Bangkok 13°43′24″N 100°31′42″E﻿ / ﻿13.72340°N ...,Tallest building in Thailand by height to roof.,Bangkok
3,Baiyoke Tower II,304.0,997,85,1997,Bangkok 13°45′17″N 100°32′25″E﻿ / ﻿13.75459°N ...,Tallest building in Thailand from 1997 to 2016...,Bangkok
4,Four Seasons Private Residences,301.0,988,74,2019,Bangkok 13°42′45″N 100°30′37″E﻿ / ﻿13.71257°N ...,Official height is 301.0m (measured to top of ...,Bangkok
5,One City Centre,275.8,905,61,2023,Bangkok 13°44′36″N 100°32′48″E﻿ / ﻿13.74327°N ...,Tallest office building in Thailand.,Bangkok


In [21]:
df = df[['Building','Province','Year','Height_m','Height_ft','Floors','Location','Notes']]

In [22]:
df.head()

Unnamed: 0_level_0,Building,Province,Year,Height_m,Height_ft,Floors,Location,Notes
Rank,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
1,Magnolias Waterfront Residences Iconsiam,Bangkok,2018,316.0,1037,70,"Bangkok .mw-parser-output .geo-default,.mw-par...",Tallest building in Bangkok and Thailand.
2,King Power MahaNakhon,Bangkok,2016,314.0,1030,79,Bangkok 13°43′24″N 100°31′42″E﻿ / ﻿13.72340°N ...,Tallest building in Thailand by height to roof.
3,Baiyoke Tower II,Bangkok,1997,304.0,997,85,Bangkok 13°45′17″N 100°32′25″E﻿ / ﻿13.75459°N ...,Tallest building in Thailand from 1997 to 2016...
4,Four Seasons Private Residences,Bangkok,2019,301.0,988,74,Bangkok 13°42′45″N 100°30′37″E﻿ / ﻿13.71257°N ...,Official height is 301.0m (measured to top of ...
5,One City Centre,Bangkok,2023,275.8,905,61,Bangkok 13°44′36″N 100°32′48″E﻿ / ﻿13.74327°N ...,Tallest office building in Thailand.


# Load

In [23]:
df.to_csv('list_tallest_buildings.csv')