**NARGEOT** Melvine
**PATTE** Mathys

# Projet Outil Informatique

https://www.sciencedirect.com/topics/earth-and-planetary-sciences/volcanic-aerosol

The Greenhouse Effect, Aerosols, and Climate Change

Daniel Kirk-Davidoff, in Green Chemistry, 2018

"Volcanoes emit sulfur dioxide gas (SO2), which reacts with water in the atmosphere to form sulfuric acid (H2SO4). When volcanic plumes are emitted powerfully enough to reach the stratosphere,a the H2SO4 can form a persistent haze of liquid droplets, reflecting away sunlight and cooling the earth for a year or two."

In [None]:
%pip install numpy matplotlib pandas # Handle data
%pip install xarray netcdf4 h5netcdf # Handle NetCFD file
%pip install folium # World map
%pip install beautifulsoup4 # Web scrapping
%pip install geopandas
%pip install slugify

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import xarray as xr
import folium as fl
from bs4 import BeautifulSoup
from urllib.request import urlopen
import geopandas as gpd

## Données

### Sources

https://www.ngdc.noaa.gov/hazel/hazard-service/api/v1/volcanoes?page=1

https://volcano.si.edu/search_eruption.cfm

https://www.kaggle.com/datasets/jessemostipak/volcano-eruptions/

https://data.cerdi.uca.fr/erup-vol/

https://public.opendatasoft.com/explore/dataset/significant-volcanic-eruption-database/export/

https://www.ngdc.noaa.gov/hazel/view/hazards/volcano/event-search/

https://berkeleyearth.org/data/

https://disc.gsfc.nasa.gov/datasets?keywords=%22msvolso2l4%22%7C%22tomsn7so2%22%7C%22OMPS_NPP_NMSO2_PCA_L2%22%7C%22MSAQSO2L4%22&page=1&subject=Aerosols

https://measures.gesdisc.eosdis.nasa.gov/data/SO2/MSVOLSO2L4.4/MSVOLSO2L4_v04-00-2022m0505.txt

### Import

In [None]:
from urllib.request import urlopen
from io import StringIO

# We retrieve the file and remove the 47 first lines which are comments
# https://measures.gesdisc.eosdis.nasa.gov/data/SO2/MSVOLSO2L4.4/MSVOLSO2L4_v04-00-2022m0505.txt
data = urlopen("https://melvin-klein.github.io/outinfo/so2.txt").readlines()[47:]

# Some lines have too many cells, we delete them.
data[1172] = data[1172][:-4] + b'\n'
data[2033] = data[2033][:-4] + b'\n'

# We change the list of byte to a single string
data = str(b''.join(data), 'utf-8')

# We convert string to a file-like object
# and we create the dataframe
data = StringIO(data) 
df_raw_so2 = pd.read_csv(data, sep="\t")
df_raw_so2

In [None]:
# We clear the dataframe from NaN values and keep significative columns
columns = ['volcano', 'lat', 'lon', 'yyyy', 'mm', 'dd', 'so2(kt)']

df_cleared_so2 = df_raw_so2.dropna(subset=columns)

df_cleared_so2 = df_cleared_so2[columns]

df_cleared_so2

In [None]:
# We retrieve the significant volcaniv eruption file
gdf_raw_eruptions = gpd.read_file("https://public.opendatasoft.com/api/explore/v2.1/catalog/datasets/significant-volcanic-eruption-database/exports/geojson?lang=fr&timezone=Europe%2FBerlin")

# We clear the geo dataframe from NaN values and keep significative columns
columns = ['name', 'country', 'elevation', 'type', 'year', 'geometry']
gdf_cleared_eruptions = gdf_raw_eruptions[columns]
gdf_cleared_eruptions = gdf_cleared_eruptions.dropna(subset=columns)

gdf_cleared_eruptions

In [None]:
from shapely.geometry import Point

# We convert the so2 dataframe to a geo dataframe before joining the so2 and significative eruption dataframes
# based on their latitude and longitude
geometry = [Point(xy) for xy in zip(df_cleared_so2['lon'], df_cleared_so2['lat'])]
gdf_cleared_so2 = gpd.GeoDataFrame(df_cleared_so2, geometry=geometry, crs='EPSG:4326')
gdf_cleared_so2

In [None]:
# We fix the CRS of the dataframes before the spatial join operation
gdf_cleared_so2 = gdf_cleared_so2.to_crs(3857)
gdf_cleared_eruptions = gdf_cleared_eruptions.to_crs(3857)

# We join the dataframes
joined_gdf = gdf_cleared_so2.sjoin_nearest(gdf_cleared_eruptions, max_distance=4000)

# We keep only lines where years are equals
filtered_gdf = joined_gdf[joined_gdf['yyyy'] == joined_gdf['year']]

filtered_gdf

In [None]:
# We keep significative columns and reset indexes
filtered_gdf = filtered_gdf[['name', 'country', 'lat', 'lon', 'yyyy', 'mm', 'dd', 'elevation', 'type', 'geometry', 'so2(kt)']]
filtered_gdf.reset_index(inplace=True, drop=True)

# We rename the dataframe's variable
df_nargeot_patte = filtered_gdf

df_nargeot_patte

In [None]:
#volcano_weather = pd.DataFrame(columns=['year', 'month', 'volcano_name', 'country', 'eruption_so2', 'lat', 'lng'])

In [None]:
volcano_map = fl.Map(zoom_start=10)

for index, row in filtered_gdf.drop_duplicates(subset='name', keep='first').iterrows():
    fl.Marker(
        location=[row['lat'], row['lon']],
        popup=row['name']
    ).add_to(volcano_map)

volcano_map

In [None]:
url = "https://www.data.jma.go.jp/obd/stats/etrn/view/monthly_s3_en.php?block_no=47807&view=1"
page = urlopen(url)
html = page.read().decode("utf-8")
soup = BeautifulSoup(html, "html.parser")

In [None]:
rows = soup.find_all("tr", {"class": "mtx"})

df_japan_temperature = pd.DataFrame(columns=['year', 'jan', 'feb', 'mar', 'apr', 'may', 'jun', 'jul', 'aug', 'sep', 'oct', 'nov', 'dec', 'ann'])

for idx, row in enumerate(rows):
    data = row.get_text(" ").split(' ')
    if len(data) == 14:
        df_japan_temperature.loc[idx] = data

df_japan_temperature

## Analyse

In [None]:
monthly_t = pd.read_json("https://pkgstore.datahub.io/core/global-temp/monthly_json/data/4c7af7363a20648a68b8f2038a6765d6/monthly_json.json") # Reading the json file of global monthly temperature anomaly

monthly_t = monthly_t.loc[monthly_t["Source"] == "GISTEMP"] # Only keep values from GISTEMP (nasa)
monthly_t.reset_index(inplace=True) # Reset row index to manipulate rows easily

# Splitting the date in format yyyy-mm-dd into Year and Month to use it with other data later
year = [monthly_t["Date"][date].year for date in range(len(monthly_t["Date"]))] # Create a 'Year' column from the timestamp given in 'Date'
monthly_t["year"] = year

month = [monthly_t["Date"][date].month for date in range(len(monthly_t["Date"]))] # Create a 'Month' column from the timestamp given in 'Date'
monthly_t["month"] = month

monthly_t = monthly_t.drop(['Date', 'Source'], axis=1) # Delete useless rows for later use
monthly_t = monthly_t.reindex(columns=['year', 'month', 'Mean']) # Put date first then temperature anomaly
monthly_t.sort_values(by=['year','month'], inplace=True) # Sort by ascending date
monthly_t.reset_index(inplace=True, drop=True) # Reset index for readability 
monthly_t = monthly_t.rename(columns={"Mean" : "t_anomaly"}) # rename Mean for readability
monthly_t


In [None]:
monthly_t.reset_index(inplace=True, drop=True)
volcano_weather.reset_index(inplace=True, drop=True)

date = [str(volcano_weather["year"][date]) + str(volcano_weather["month"][date]) for date in range(len(volcano_weather["year"]))]
volcano_weather["date"] = date
date = [str(monthly_t["year"][date]) + str(monthly_t["month"][date]) for date in range(len(monthly_t["year"]))]
monthly_t["Date"] = date

monthly_t = monthly_t[monthly_t['Date'].isin(volcano_weather["date"])]


volcano_temperature = pd.merge(monthly_t, volcano_weather, on=["year", "month"], how="right")
print(volcano_temperature)


In [None]:
plt.figure(figsize=(9,4), dpi=100)
plt.plot(volcano_temperature["year"], volcano_temperature["t_anomaly"])
plt.xlabel('Year')
plt.ylabel('Temperature anomaly')
plt.legend(['Anomaly of temperature for each year'], loc=(0.02, 0.88))
plt.title('Comparison of temperature and sulfure emission', pad=10, fontsize=14, style='italic')
plt.subplot()
plt.plot(volcano_temperature["year"], volcano_temperature["eruption_so2"]/10000); plt.legend("Sulfure emission for each year")

In [None]:
plt.figure(figsize=(8,4), dpi=100)
plt.plot(volcano_temperature["t_anomaly"], np.log(volcano_temperature["eruption_so2"]))
plt.xlabel('temperature anomaly'); plt.ylabel('sulfure emission'); # labels
plt.title('Visualisation de données éparses', pad=10, fontsize=14, style='italic'); # titre