Getting into events per day format in a csv. Set up for Mt St Helens - created December 2022

additional event plots (clusters with new events, years with new events, etc.) below - created May 2024

In [None]:
import pandas as pd
import yaml
import numpy as np
import csv
import matplotlib.pyplot as plt
import matplotlib
from glob import glob

In [None]:
with open('/home/smocz/expand_redpy/scripts/config.yaml') as file:
    config = yaml.load(file, Loader=yaml.FullLoader)

vv = config['vv']
volc_list_names = config['volc_list_names']
volc = volc_list_names[vv]
readdir = config['readdir']
homedir = config['homedir']
years = config['years']

---

In [None]:
#read the St_Helens csvs

St_Helens = pd.read_csv(readdir+'MountStHelens_catalog.csv')

# Combining borehole and local catalogs with St_Helens

Helens_Borehole = pd.read_csv(readdir+'MSHborehole_catalog.csv')
Helens_Borehole['Clustered'] += 2000 
# Cluster 0 in Helens_Borehole is now Cluster 2000 in St_Helens
Helens_Local = pd.read_csv(readdir+'MSHlocal_catalog.csv')
Helens_Local['Clustered'] += 3000
# Cluster 0 in Helens_Local is now Cluster 3000 in St_Helens

# Use St_Helens to access all three St Helens catalogs
St_Helens = pd.concat([St_Helens,Helens_Borehole,Helens_Local])

In [None]:
#read Hood csv
Hood = pd.read_csv(readdir+'Hood_catalog.csv')

In [None]:
#make events_per_day csv

###################
# SET UP FOR HOOD #
###################

dt_list = Hood['datetime'].values.tolist() #get a list of datetimes

d_list = [] #get list of dates
for dt in dt_list:
    d = dt[:10] #take the date out of the datetime
    d_list.append(d) #append to list

uni_d_list = np.unique(d_list)

num_list = [] #list of number of events on that date, index is the same as uni_d_list
for i in uni_d_list:
    num = d_list.count(i)
    num_list.append(num)

df = pd.DataFrame(list(zip(uni_d_list,num_list)))
df.columns = ['Date','Number_of_Events']
print(df)

# df.to_csv(homedir+'Mt_Hood_events_per_day.csv',index=False)

Plot

In [None]:
#read pnsn dates FOR HOOD
pnsn = pd.read_csv(homedir+'pnsn_Hood.csv')

dt_list = pnsn['Time UTC'].values.tolist() #get a list of datetimes

d_list = [] #get list of dates
for dt in dt_list:
    d = dt[:10] #take the date out of the datetime
    d_list.append(d) #append to list

uni_d_list = np.unique(d_list)

num_list = [] #list of number of events on that date, index is the same as uni_d_list
for i in uni_d_list:
    num = d_list.count(i)
    num_list.append(num)

pnsn_df = pd.DataFrame(list(zip(uni_d_list,num_list)))
pnsn_df.columns = ['Date','Number_of_Events']
print(pnsn_df)

In [None]:
#read new events (from backfilling)

new_e = pd.read_csv('/home/smocz/expand_redpy_new_files/final_catalogs/Hood_updated_catalog.csv')

dt_list =  new_e['Earliest_Detection_Time'].values.tolist() #get a list of datetimes

d_list = [] #get list of dates
for dt in dt_list:
    d = dt[:10] #take the date out of the datetime
    d_list.append(d) #append to list

uni_d_list = np.unique(d_list)

num_list = [] #list of number of events on that date, index is the same as uni_d_list
for i in uni_d_list:
    num = d_list.count(i)
    num_list.append(num)

new_df = pd.DataFrame(list(zip(uni_d_list,num_list)))
new_df.columns = ['Date','Number_of_Events']
print(new_df)

In [None]:
#font
font = {'family' : 'DejaVu Sans',
        'weight' : 'bold',
        'size'   : 14}

matplotlib.rc('font', **font)
fig, ax = plt.subplots()
ax.set_title('Test')

In [None]:
df = pd.read_csv(homedir+'Mt_Hood_events_per_day.csv')
#for REDPy
x = df['Date'].values.tolist() #read date values
str_x = [str(i) for i in x] #make sure they are all strings
x = pd.to_datetime(str_x) #change to datetime for plotting

y = df['Number_of_Events'].values.tolist() #number of events per day, same index as date list(x)

#for PNSN
px = pnsn_df['Date'].values.tolist()
str_px = [str(i) for i in px]
px = pd.to_datetime(str_px)

py = pnsn_df['Number_of_Events'].values.tolist()

#for NEW
nx = new_df['Date'].values.tolist()
str_nx = [str(i) for i in nx]
nx = pd.to_datetime(str_nx)

ny = new_df['Number_of_Events'].values.tolist()

fig, ax = plt.subplots(figsize=(15,7.5))

ax.grid(which='major')
ax.grid(which='minor',color='#EEEEEE')

ax.scatter(px,py,s=40,alpha=0.75,marker='.',label='PNSN Events', color='purple') 
ax.scatter(x,y,s=40,alpha=0.75,marker='.',label='REDPy Events', color='red')
ax.scatter(nx,ny,s=40,marker='.',label='Backfilled Events',color='black') 

ax.set_xlabel('Date')
ax.set_ylabel('Number of Events')
ax.set_yscale('log')
ax.set_title('Number of Events Each Day on Mt Hood')
plt.legend(loc='upper center')



Additional event analysis plots - May 2024

In [None]:
#new events per year

#clusters of new events

In [None]:
##########################
# INCLUDES REDPY OVERLAP #
##########################


##read in event csvs for volcano selected in config.yaml
# event_csvs = glob(f'{homedir}events/{volc_list_names[vv]}_*_events.csv')
# print(event_csvs[0])

# event_dfs = []
# for csv in event_csvs:
#     event_dfs.append(pd.read_csv(csv))
    
# event_df = pd.concat(event_dfs,ignore_index=True)

In [None]:
##########################
# EXCLUDES REDPY OVERLAP #
##########################

event_df = pd.read_csv(homedir+f'final_catalogs/{volc_list_names[vv]}_updated_catalog.csv')

In [None]:
#show df
event_df

In [None]:
#get info

#count of cl
cl_unique = list(np.unique(event_df["Cluster_ID"])) #ordered, non-repeating list of cluster IDs
cl_count = [] #number of instances of each cluster ID, same index as cl_unique
for cl in cl_unique:
    cl_count.append(list(event_df["Cluster_ID"]).count(cl))
    
#get count of events per year
year_list = [pd.to_datetime(i).year for i in event_df["Earliest_Detection_Time"]]
year_unique = np.unique(year_list) #ordered, non-repeating list of years with at least one detection
year_count = [] #how many detections in each year, same index as year_list
for yy in year_unique:
    year_count.append(year_list.count(yy))

In [None]:
#plot cluster count as bar chart

fig, ax = plt.subplots()

for ii, i in enumerate(cl_unique):
    ax.vlines(i,0,cl_count[ii], lw=2)
ax.set_xlabel('Cluster ID')
ax.set_ylabel('Count')
ax.set_title(f'{volc_list_names[vv]} New Events by Cluster')

In [None]:
#plot cluster count as pie chart

plot_cl_unique = []
plot_cl_count = []
#pick out clusters with more than so many events
for ii, i in enumerate(cl_unique):
    if cl_count[ii] > 5:
        plot_cl_unique.append(i)
        plot_cl_count.append(cl_count[ii])
        
count_percent = [i/sum(plot_cl_count) for i in plot_cl_count]

fig, ax = plt.subplots()
ax.pie(count_percent, labels=plot_cl_unique)

In [None]:
#plot year count

fig, ax = plt.subplots()

for ii, i in enumerate(year_unique):
    ax.vlines(i,0,year_count[ii],lw=8)
ax.set_xlabel('Year')
ax.set_ylabel('Count')
ax.set_title(f'{volc_list_names[vv]} New Events per Year (2002-2021)')
ax.set_xticks([2000,2002,2004,2006,2008,2010,2012,2014,2016,2018,2020,2022])