# Scrape VEI Pandas Table from Wikipedia's Volcanic explosivity index article

In [1]:
import pandas as pd

In [2]:
# Read in HTML tables into a DataFrame
df = pd.read_html('https://en.wikipedia.org/wiki/Volcanic_explosivity_index',encoding='utf-8')

In [3]:
# Select the first table to get the VEI table
vei_df = df[0]

In [4]:
# remove the second level of columns (i.e., the 'Examples' columns)
vei_df.columns = vei_df.columns.droplevel(level=1)

In [5]:
# rename the Stratospheric injection[2] column to remove the footnote hyperlink
vei_df.rename(columns={'Stratospheric injection[2]': 'Stratospheric injection'}, inplace=True)

In [6]:
# drop the last Unnamed: 8_level_0 full of NaN values
vei_df = vei_df.iloc[:, :-1]

In [7]:
# Remove the duplicate VEI rows with links to specific volcanos from the DataFrame
vei_df = vei_df.iloc[[0, 2, 4, 6, 8, 10, 12, 14,16]]

In [8]:
vei_df.head(11)

Unnamed: 0,VEI,Ejecta volume (bulk),Classification,Description,Plume,Periodicity,Tropospheric injection,Stratospheric injection
0,0,< 104 m3,Hawaiian,Effusive,< 100 m,constant,negligible,none
2,1,> 104 m3,Hawaiian / Strombolian,Gentle,100 m – 1 km,daily,minor,none
4,2,> 106 m3,Strombolian / Vulcanian,Explosive,1–5 km,2 weeks,moderate,none
6,3,> 107 m3,Strombolian / Vulcanian / Peléan / Sub-Plinian,Severe,3–15 km,3 months,substantial,possible
8,4,> 0.1 km3,Peléan / Plinian / Sub-Plinian,Catastrophic,> 10 km,18 months,substantial,definite
10,5,> 1 km3,Peléan / Plinian,Cataclysmic,> 10 km,12 years,substantial,significant
12,6,> 10 km3,Plinian / Ultra-Plinian,Colossal,> 20 km,50–100 years,substantial,substantial
14,7,> 100 km3,Ultra-Plinian,Super-colossal,> 20 km,"500–1,000 years",substantial,substantial
16,8,"> 1,000 km3",Ultra-Plinian,Mega-colossal,> 20 km,"> 50,000 years[3][4]",vast,vast


In [9]:
# Adjust some of the cells that need to be reformatted 
vei_df.loc[16, 'Periodicity'] = "> 50,000 years"
new_values = ['< 10^4 m^3', '> 10^4 m^3', '> 10^6 m^3','> 10^7 m^3','> 0.1 km^3','> 1 km3','> 10 km^3','> 100 km^3','> 1,000 km^3']
vei_df['Ejecta  volume (bulk)']=new_values

In [10]:
vei_df.head(10)

Unnamed: 0,VEI,Ejecta volume (bulk),Classification,Description,Plume,Periodicity,Tropospheric injection,Stratospheric injection
0,0,< 10^4 m^3,Hawaiian,Effusive,< 100 m,constant,negligible,none
2,1,> 10^4 m^3,Hawaiian / Strombolian,Gentle,100 m – 1 km,daily,minor,none
4,2,> 10^6 m^3,Strombolian / Vulcanian,Explosive,1–5 km,2 weeks,moderate,none
6,3,> 10^7 m^3,Strombolian / Vulcanian / Peléan / Sub-Plinian,Severe,3–15 km,3 months,substantial,possible
8,4,> 0.1 km^3,Peléan / Plinian / Sub-Plinian,Catastrophic,> 10 km,18 months,substantial,definite
10,5,> 1 km3,Peléan / Plinian,Cataclysmic,> 10 km,12 years,substantial,significant
12,6,> 10 km^3,Plinian / Ultra-Plinian,Colossal,> 20 km,50–100 years,substantial,substantial
14,7,> 100 km^3,Ultra-Plinian,Super-colossal,> 20 km,"500–1,000 years",substantial,substantial
16,8,"> 1,000 km^3",Ultra-Plinian,Mega-colossal,> 20 km,"> 50,000 years",vast,vast


In [11]:
# Write vei_df to csv
vei_df.to_csv('Output/vei_df.csv', index=False, encoding='utf-8')

In [18]:
# Create dataframe from image from opulations around Holocene volcanoes and development of a Population Exposure IndexPublisher: Cambridge University PressEditors: Susan C. Loughlin, Steve Sparks, Sarah K. Brown, Susanna F. Jenkins, Charlotte Vye-Brown
#https://www.researchgate.net/publication/280714829_Populations_around_Holocene_volcanoes_and_development_of_a_Population_Exposure_Index
data = {
    'Population Exposure Index': [1, 2, 3, 4, 5, 6, 7],
    'Number of Volcanoes': [197, 642, 157, 178, 188, 128, 61],
    'Percentage of Volcanoes (%)': [12.7, 41.4, 10.1, 11.5, 12.1, 8.3, 3.9],
    'Percentage of Total Weighted Population (%)': [0, 0.4, 1.0, 3.5, 11.4, 23.8, 59.9]
}

pei_df = pd.DataFrame(data)
pei_df

Unnamed: 0,Population Exposure Index,Number of Volcanoes,Percentage of Volcanoes (%),Percentage of Total Weighted Population (%)
0,1,197,12.7,0.0
1,2,642,41.4,0.4
2,3,157,10.1,1.0
3,4,178,11.5,3.5
4,5,188,12.1,11.4
5,6,128,8.3,23.8
6,7,61,3.9,59.9


In [19]:
# Write vei_df to csv
pei_df.to_csv('Output/pei_df.csv', index=False, encoding='utf-8')

In [17]:
# Import and visualize volcano data
volcano_df = pd.read_csv('Output/volcano_data.csv', encoding='utf-8')
volcano_df.head(10)

Unnamed: 0,VolcanoID,V_Name,Country,Region,Subregion,Latitude,Longitude,PEI,H_active,VEI_Holoce,hazard,class,risk
0,210010,West Eifel Volcanic Field,Germany,Mediterranean and W Asia,Western Europe,50.17,6.85,6,0,Unknown VEI,,U-HR,
1,210020,Cha?ne des Puys,France,Mediterranean and W Asia,Western Europe,45.775,2.97,7,0,Unknown VEI,,U-HR,
2,210030,Olot Volcanic Field,Spain,Mediterranean and W Asia,Western Europe,42.17,2.53,5,0,No confirmed eruptions,,U-NHHR,
3,210040,Calatrava Volcanic Field,Spain,Mediterranean and W Asia,Western Europe,38.87,-4.02,6,0,Unknown VEI,,U-HR,
4,211001,Larderello,Italy,Mediterranean and W Asia,Italy,43.25,10.87,4,0,3,,U-HR,
5,211003,Vulsini,Italy,Mediterranean and W Asia,Italy,42.6,11.93,5,0,Unknown VEI,,U-HR,
6,211004,Alban Hills,Italy,Mediterranean and W Asia,Italy,41.73,12.7,7,0,No confirmed eruptions,,U-NHHR,
7,211010,Campi Flegrei,Italy,Mediterranean and W Asia,Italy,40.827,14.139,7,1,5,3.0,,3.0
8,211020,Vesuvius,Italy,Mediterranean and W Asia,Italy,40.821,14.426,7,1,5,3.0,,3.0
9,211030,Ischia,Italy,Mediterranean and W Asia,Italy,40.73,13.897,5,0,3,,U-HR,


In [29]:
# Read in HTML tables into a DataFrame
df = pd.read_html('https://en.wikipedia.org/wiki/List_of_volcanic_eruptions_by_death_toll',encoding='utf-8')
volcanic_eruptions_by_death_toll_df= df[1]

In [30]:
volcanic_eruptions_by_death_toll_df=volcanic_eruptions_by_death_toll_df.iloc[:, :-1]

In [31]:
volcanic_eruptions_by_death_toll_df.head(10)

Unnamed: 0,Human death toll,Volcano,VEI,Location,Year,Eruption
0,"71,000 to 250,100+",Mount Tambora,7,Indonesia,1815,"1815 eruption of Mount Tambora, Year Without a..."
1,"36,000+",Krakatoa,6,Indonesia,1883,1883 eruption of Krakatoa
2,30000,Mount Pelée,4,Martinique,1902,1902 eruption of Mount Pelée
3,23000,Nevado del Ruiz,3,Colombia,1985,Armero tragedy
4,"20,000~ (estimated)",Santorini,7,Greece,c. 1600 BC,Minoan eruption
5,"15,000 to 20,000",Mount Samalas,7,Indonesia,1257,1257 Samalas eruption
6,15000,Mount Unzen,2,Japan,1792,1792 Unzen earthquake and tsunami
7,"13,000+ (estimated)",Mount Vesuvius,5,Italy,79,Eruption of Mount Vesuvius in 79 AD
8,"10,000+",Laki and Grímsvötn,4,Iceland,1783,Laki#1783 eruption
9,10000,Kelud,5,Indonesia,1586,


In [32]:
volcanic_eruptions_by_death_toll_df.to_csv('Output/volcanic_eruptions_by_death_toll_df.csv', index=False, encoding='utf-8')