# Scrape VEI Pandas Table from Wikipedia's Volcanic explosivity index article

In [127]:
import pandas as pd

In [128]:
# Read in HTML tables into a DataFrame
df = pd.read_html('https://en.wikipedia.org/wiki/Volcanic_explosivity_index',encoding='utf-8')

In [129]:
# Select the first table to get the VEI table
vei_df = df[0]

In [130]:
# remove the second level of columns (i.e., the 'Examples' columns)
vei_df.columns = vei_df.columns.droplevel(level=1)

In [131]:
# rename the Stratospheric injection[2] column to remove the footnote hyperlink
vei_df.rename(columns={'Stratospheric injection[2]': 'Stratospheric injection'}, inplace=True)

In [132]:
# drop the last Unnamed: 8_level_0 full of NaN values
vei_df = vei_df.iloc[:, :-1]

In [133]:
# Remove the duplicate VEI rows with links to specific volcanos from the DataFrame
vei_df = vei_df.iloc[[0, 2, 4, 6, 8, 10, 12, 14]]

In [134]:
vei_df.head(10)

Unnamed: 0,VEI,Ejecta volume (bulk),Classification,Description,Plume,Periodicity,Tropospheric injection,Stratospheric injection
0,0,< 104 m3,Hawaiian,Effusive,< 100 m,constant,negligible,none
2,1,> 104 m3,Hawaiian / Strombolian,Gentle,100 m – 1 km,daily,minor,none
4,2,> 106 m3,Strombolian / Vulcanian,Explosive,1–5 km,2 weeks,moderate,none
6,3,> 107 m3,Strombolian / Vulcanian / Peléan / Sub-Plinian,Severe,3–15 km,3 months,substantial,possible
8,4,> 0.1 km3,Peléan / Plinian / Sub-Plinian,Catastrophic,> 10 km,18 months,substantial,definite
10,5,> 1 km3,Peléan / Plinian,Cataclysmic,> 10 km,12 years,substantial,significant
12,6,> 10 km3,Plinian / Ultra-Plinian,Colossal,> 20 km,50–100 years,substantial,substantial
14,7,> 100 km3,Ultra-Plinian,Super-colossal,> 20 km,"500–1,000 years",substantial,substantial


In [137]:
# Adjust some of the cells that need to be reformatted 
vei_df.loc[16, 'Periodicity'] = "> 50,000 years"
new_values = ['< 10^4 m^3', '> 10^4 m^3', '> 10^6 m^3','> 10^7 m^3','> 0.1 km^3','> 1 km3','> 10 km^3','> 100 km^3','> 1,000 km^3']
vei_df['Ejecta  volume (bulk)']=new_values

In [138]:
vei_df.head(10)

Unnamed: 0,VEI,Ejecta volume (bulk),Classification,Description,Plume,Periodicity,Tropospheric injection,Stratospheric injection
0,0.0,< 10^4 m^3,Hawaiian,Effusive,< 100 m,constant,negligible,none
2,1.0,> 10^4 m^3,Hawaiian / Strombolian,Gentle,100 m – 1 km,daily,minor,none
4,2.0,> 10^6 m^3,Strombolian / Vulcanian,Explosive,1–5 km,2 weeks,moderate,none
6,3.0,> 10^7 m^3,Strombolian / Vulcanian / Peléan / Sub-Plinian,Severe,3–15 km,3 months,substantial,possible
8,4.0,> 0.1 km^3,Peléan / Plinian / Sub-Plinian,Catastrophic,> 10 km,18 months,substantial,definite
10,5.0,> 1 km3,Peléan / Plinian,Cataclysmic,> 10 km,12 years,substantial,significant
12,6.0,> 10 km^3,Plinian / Ultra-Plinian,Colossal,> 20 km,50–100 years,substantial,substantial
14,7.0,> 100 km^3,Ultra-Plinian,Super-colossal,> 20 km,"500–1,000 years",substantial,substantial
16,,"> 1,000 km^3",,,,"> 50,000 years",,


In [112]:
vei_df.to_csv('vei_df.csv', index=False, encoding='utf-8')