# Tutorial

In [None]:
# import all packages you will need
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import scipy
import scipy.signal
import datetime
import glob
import math
import sys

In [None]:
import sys, os
# Get the current script’s directory
current_directory = os.getcwd()
# Go back one folder level
parent_directory = os.path.abspath(os.path.join(current_directory, os.pardir))
sys.path.insert(0, parent_directory)
from mtsthelens import preprocessing_functions, manipulation_functions, plotting_functions

## Read & preprocess data

In [None]:
# Read input data
df =  preprocessing_functions.read_data('../example/example_data/example_data_eruption.csv')

# Data smoothing
df = df.rolling('6H', center=True).median()

# Remove outliers
df = df.apply(preprocessing_functions.mask_df,axis=0) # peak detection
df

## Data manipulation

### Stack in Time

In [None]:
# Find the seasonal trends in the data, and create a new dataframe with the seasonality removed
df_seasonal_trends, df_seasonality_removed = manipulation_functions.stackInTime(df)
df_seasonal_trends
# Save those dataframes as output csv files
# manipulation_functions.export_csv('example_data_eruption_stacktime', df_seasonal_trends)
# manipulation_functions.export_csv('example_data_eruption_seasonality_removed', df_seasonality_removed)

### Stack in Space

In [None]:
# Find the differences between the stations, and the average 
df_median_stackSpace, df_stackSpace_year = manipulation_functions.stackInSpace(df)
df_yearlyParam = manipulation_functions.stackSpace_yearParam(df_stackSpace_year)
df_stackSpace_year
# Save those dataframes as output csv files
# manipulation_functions.export_csv('example_data_eruption_stackspaceYear',df_stackSpace_year)
# manipulation_functions.export_csv('example_data_eruption_stackspaceParam',df_yearlyParam)

### Apply Filter

In [None]:
df = df.fillna(0)
df_filter = manipulation_functions.filter_data(df)
# manipulation_functions.export_csv('example_data_eruption_filter',df_filter)
print(df_filter)
plt.figure()
plt.plot(df_filter)


## Data plotting

In [None]:
# read extrusion rate data
df_dome = pd.read_csv('../example/example_data/dome_extrusion.txt', header=0, skiprows=0)
df_dome.set_index('Date of photography',inplace=True)
df_dome.index = pd.to_datetime(df_dome.index).tz_localize(None)
# df_dome['diff'] = df_dome['Total volume change(x 106 m3)']-df_dome['Total volume change(x 106 m3)'].shift(1)
df_dome.head()

In [None]:
# df_dome.plot(marker='o')

In [None]:
# # load station coordinates and drop the stations which are not of interresst
# sta_list =['BLIS', 'CDF', 'EDM', 'ELK', 'FL2', 'HOA', 'HSR', 'JRO', 'JUN', # specify the stations you want to use
#            'LOO', 'MIDE', 'NED', 'RAFT', 'REM', 'SEP', 'SHW', 'SOS', 'SPN5',
#            'STD', 'SUG', 'SWFL', 'TDL', 'USFR', 'VALT', 'YEL'] 

# df_sta = pd.read_csv('./example_data/sta_log_long.txt', sep='|', header=0) # coordinates
# df = df_sta[~df_sta['Station'].isin(list(set(df_sta.Station)-set(sta_list)))] # delete Stations which are not of interresst
# df_sta = df_sta.drop_duplicates(subset=['Station']) # drop one station if the station is not unique
# df_sta = df_sta.reset_index(drop=True)
# df_sta.head()

In [None]:
# # get the distance between the stations and sort them in increasing order (relative to station SEP -> crater center)
# ref_sta = 'SEP' # define the reference station, we will get distance from all other stations to this station
# df_sta['dist'] = df_sta.apply(lambda x: preprocessing_functions.calculate_distance(x['latitude'],df_sta.latitude[df_sta['Station']==ref_sta] , x['longitude'],df_sta.longitude[df_sta['Station']==ref_sta] ), axis=1)
# df_sta = df_sta.sort_values(by=['dist'])
# sta_sorted = df_sta['Station'].to_list()
# df_sta.head()

In [None]:
# Plotting Time Stack vs Raw Data
plotting_functions.plot_stack_vs_raw(df_seasonality_removed, df)
# Plotting Filtered Timestack vs Raw Data
plotting_functions.plot_stack_vs_raw(df_filter, df)
# Plotting the min, max, mean and median values
plotting_functions.plot_space_params(df_yearlyParam)
plotting_functions.plot_extrusion(df_dome, df, df_seasonality_removed, df_filter)


### Bring data into used shape

In [None]:
df.head(), df_seasonality_removed.head(), df_filter.head()

In [None]:
df


In [None]:
df_stat = manipulation_functions.stackSpace_yearParam(df) # extract statistical values

# append latitude and longitude of the station as rows for plotting
df_stat.loc['latitude'] = [df_sta.loc[df_sta['Station'] == sta, 'latitude'].values[0] for sta in df_stat.columns]
df_stat.loc['longitude'] = [df_sta.loc[df_sta['Station'] == sta, 'longitude'].values[0] for sta in df_stat.columns]
df_stat

In [None]:
# Create a dictionary with Date as key for, DataFrames
dict_test = manipulation_functions.df2dict(df, 'year')
dict_stat = {}
for key, value in dict_test.items():
    df_stat = manipulation_functions.stackSpace_yearParam(value) # extract statistical values

    # append latitude and longitude of the station as rows for plotting
    df_stat.loc['latitude'] = [df_sta.loc[df_sta['Station'] == sta, 'latitude'].values[0] for sta in df_stat.columns]
    df_stat.loc['longitude'] = [df_sta.loc[df_sta['Station'] == sta, 'longitude'].values[0] for sta in df_stat.columns]
    dict_stat[key] = df_stat

# Save the dictionary as npy
np.save('output/data/my_file.npy', dict_stat) 

### Create Plots for Animation

In [None]:
read_dictionary = np.load('output/data/my_file.npy',allow_pickle='TRUE').item()
read_dictionary

In [None]:
plotting_functions.animation(read_dictionary, 'median', 'inferno')