# Scrape VEI Pandas Table from Wikipedia's Volcanic explosivity index article

In [1]:
import pandas as pd

In [2]:
# Read in HTML tables into a DataFrame
df = pd.read_html('https://en.wikipedia.org/wiki/Volcanic_explosivity_index',encoding='utf-8')

In [3]:
# Select the first table to get the VEI table
vei_df = df[0]

In [4]:
# remove the second level of columns (i.e., the 'Examples' columns)
vei_df.columns = vei_df.columns.droplevel(level=1)

In [5]:
# rename the Stratospheric injection[2] column to remove the footnote hyperlink
vei_df.rename(columns={'Stratospheric injection[2]': 'Stratospheric injection'}, inplace=True)

In [6]:
# drop the last Unnamed: 8_level_0 full of NaN values
vei_df = vei_df.iloc[:, :-1]

In [7]:
# Remove the duplicate VEI rows with links to specific volcanos from the DataFrame
vei_df = vei_df.iloc[[0, 2, 4, 6, 8, 10, 12, 14,16]]

In [8]:
vei_df.head(11)

Unnamed: 0,VEI,Ejecta volume (bulk),Classification,Description,Plume,Periodicity,Tropospheric injection,Stratospheric injection
0,0,< 104 m3,Hawaiian,Effusive,< 100 m,constant,negligible,none
2,1,> 104 m3,Hawaiian / Strombolian,Gentle,100 m – 1 km,daily,minor,none
4,2,> 106 m3,Strombolian / Vulcanian,Explosive,1–5 km,2 weeks,moderate,none
6,3,> 107 m3,Strombolian / Vulcanian / Peléan / Sub-Plinian,Severe,3–15 km,3 months,substantial,possible
8,4,> 0.1 km3,Peléan / Plinian / Sub-Plinian,Catastrophic,> 10 km,18 months,substantial,definite
10,5,> 1 km3,Peléan / Plinian,Cataclysmic,> 10 km,12 years,substantial,significant
12,6,> 10 km3,Plinian / Ultra-Plinian,Colossal,> 20 km,50–100 years,substantial,substantial
14,7,> 100 km3,Ultra-Plinian,Super-colossal,> 20 km,"500–1,000 years",substantial,substantial
16,8,"> 1,000 km3",Ultra-Plinian,Mega-colossal,> 20 km,"> 50,000 years[3][4]",vast,vast


In [9]:
# Adjust some of the cells that need to be reformatted 
vei_df.loc[16, 'Periodicity'] = "> 50,000 years"
new_values = ['< 10^4 m^3', '> 10^4 m^3', '> 10^6 m^3','> 10^7 m^3','> 0.1 km^3','> 1 km3','> 10 km^3','> 100 km^3','> 1,000 km^3']
vei_df['Ejecta  volume (bulk)']=new_values

In [10]:
vei_df.head(10)

Unnamed: 0,VEI,Ejecta volume (bulk),Classification,Description,Plume,Periodicity,Tropospheric injection,Stratospheric injection
0,0,< 10^4 m^3,Hawaiian,Effusive,< 100 m,constant,negligible,none
2,1,> 10^4 m^3,Hawaiian / Strombolian,Gentle,100 m – 1 km,daily,minor,none
4,2,> 10^6 m^3,Strombolian / Vulcanian,Explosive,1–5 km,2 weeks,moderate,none
6,3,> 10^7 m^3,Strombolian / Vulcanian / Peléan / Sub-Plinian,Severe,3–15 km,3 months,substantial,possible
8,4,> 0.1 km^3,Peléan / Plinian / Sub-Plinian,Catastrophic,> 10 km,18 months,substantial,definite
10,5,> 1 km3,Peléan / Plinian,Cataclysmic,> 10 km,12 years,substantial,significant
12,6,> 10 km^3,Plinian / Ultra-Plinian,Colossal,> 20 km,50–100 years,substantial,substantial
14,7,> 100 km^3,Ultra-Plinian,Super-colossal,> 20 km,"500–1,000 years",substantial,substantial
16,8,"> 1,000 km^3",Ultra-Plinian,Mega-colossal,> 20 km,"> 50,000 years",vast,vast


In [12]:
vei_df.to_csv('Output/vei_df.csv', index=False, encoding='utf-8')

In [14]:
# Create dataframe from image from opulations around Holocene volcanoes and development of a Population Exposure IndexPublisher: Cambridge University PressEditors: Susan C. Loughlin, Steve Sparks, Sarah K. Brown, Susanna F. Jenkins, Charlotte Vye-Brown
#https://www.researchgate.net/publication/280714829_Populations_around_Holocene_volcanoes_and_development_of_a_Population_Exposure_Index
data = {
    'Population Exposure Index': [1, 2, 3, 4, 5, 6, 7],
    'Number of Volcanoes': [197, 642, 157, 178, 188, 128, 61],  # Add None for missing data
    'Percentage of Volcanoes': [12.7, 41.4, 10.1, 11.5, 12.1, 8.3, 3.9],
    'Percentage of Total Weighted Population': [0, 0.4, 1.0, 3.5, 11.4, 23.8, 59.9]
}

pei_df = pd.DataFrame(data)
pei_df

Unnamed: 0,Population Exposure Index,Number of Volcanoes,Percentage of Volcanoes,Percentage of Total Weighted Population
0,1,197,12.7,0.0
1,2,642,41.4,0.4
2,3,157,10.1,1.0
3,4,178,11.5,3.5
4,5,188,12.1,11.4
5,6,128,8.3,23.8
6,7,61,3.9,59.9


In [None]:
pei_df.to_csv('Output/pei_df.csv', index=False, encoding='utf-8')