# Lab 5. Abstraction and reusability
#### Computational Methods for Geoscience - EPS 400/522
#### Instructor: Eric Lindsey

Due: Oct. 5, 2023

---------

Adrian Marziliano

In [27]:
# some useful imports and settings
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import glob
import os

# better looking figures on high-resolution screens
%config InlineBackend.figure_format = 'retina'

# reload modules if they have changed - necessary when you are editing your own module
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


### 1. Using glob to find files

The folder 'timeseries' (you will have to unzip it first) contains a set of GNSS timeseries from the UNR MAGNET site. Let's explore how 'glob' can interact with these files.

1. Use glob to get a list of all the files, and print out each filename.

2. The sites starting with a letter 'P' were installed under a single project called the 'Plate Boundary Observatory'. Suppose we wanted to list only those files - can you use 'glob' with wildcards to return only the list of names starting with P?

In [28]:
# Verify that the current working directory has changed
print("Current working directory:", os.getcwd())

Current working directory: /Users/Adrian/Documents/VS CODE/CompMethods_EPS522/Labs/Lab 5


In [29]:
# Make sure directory is set to 'timeseries' folder
#os.chdir('/home/jovyan/CompMethods_EPS522/Labs/Lab 5/timeseries/')
os.chdir('../Lab 5/timeseries')

In [30]:
# Get the list of tenv3 files from the "timeseries" folder
tenv3_files = glob.glob('*.tenv3')
#print('All files: ',tenv3_files)

# Get a list of the tenv3 files
tenv3_Pfiles = glob.glob('P*.tenv3')
print(f'1. All files: {tenv3_files}\n2. Site files with letter P: {tenv3_Pfiles}')

1. All files: ['RG01.NA.tenv3', 'SC01.NA.tenv3', 'AZCN.NA.tenv3', 'P034.NA.tenv3', 'P028.NA.tenv3', 'P029.NA.tenv3', 'NMLG.NA.tenv3', 'CTI4.NA.tenv3', 'MC10.NA.tenv3', 'TC01.NA.tenv3']
2. Site files with letter P: ['P034.NA.tenv3', 'P028.NA.tenv3', 'P029.NA.tenv3']


In [31]:
file_path ='../timeseries/AZCN.NA.tenv3'
tenv3_sample = pd.read_csv(file_path, delim_whitespace=True )
tenv3_sample

Unnamed: 0,site,YYMMMDD,yyyy.yyyy,__MJD,week,d,reflon,_e0(m),__east(m),____n0(m),...,_ant(m),sig_e(m),sig_n(m),sig_u(m),__corr_en,__corr_eu,__corr_nu,_latitude(deg),_longitude(deg),__height(m)
0,AZCN,99MAY10,1999.3539,51308,1009,1,-107.9,-977,-0.744686,4078731,...,0.0,0.000894,0.001071,0.003738,0.001207,0.067395,-0.150295,36.839793,-107.910961,1862.93747
1,AZCN,99MAY11,1999.3566,51309,1009,2,-107.9,-977,-0.741628,4078731,...,0.0,0.000838,0.001050,0.003623,0.021724,-0.031813,-0.130306,36.839793,-107.910961,1862.93534
2,AZCN,99MAY12,1999.3593,51310,1009,3,-107.9,-977,-0.742445,4078731,...,0.0,0.000868,0.001055,0.003642,-0.001512,-0.034516,-0.086356,36.839793,-107.910961,1862.93970
3,AZCN,99MAY13,1999.3621,51311,1009,4,-107.9,-977,-0.744588,4078731,...,0.0,0.001016,0.001200,0.004131,0.036789,0.025548,-0.159457,36.839793,-107.910961,1862.93930
4,AZCN,99MAY14,1999.3648,51312,1009,5,-107.9,-977,-0.746577,4078731,...,0.0,0.001342,0.001580,0.005565,-0.067832,-0.069363,-0.131309,36.839793,-107.910961,1862.93748
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4105,AZCN,11JUN15,2011.4524,55727,1640,3,-107.9,-977,-0.757917,4078731,...,0.0,0.000828,0.001068,0.003440,0.024802,-0.068325,-0.079274,36.839793,-107.910963,1862.93348
4106,AZCN,11AUG10,2011.6057,55783,1648,3,-107.9,-977,-0.761025,4078731,...,0.0,0.001333,0.001489,0.005270,0.178436,-0.087887,-0.207460,36.839793,-107.910963,1862.93815
4107,AZCN,11AUG11,2011.6085,55784,1648,4,-107.9,-977,-0.757568,4078731,...,0.0,0.002455,0.002242,0.006725,0.080286,-0.065957,-0.107477,36.839793,-107.910963,1862.92224
4108,AZCN,11AUG12,2011.6112,55785,1648,5,-107.9,-977,-0.761642,4078731,...,0.0,0.008280,0.003808,0.008938,0.032072,-0.074917,-0.182729,36.839793,-107.910963,1862.92939


### 2. Write a module to interact with the GNSS timeseries

The module should have (at a minimum) the following four functions with their definitions:

fit_timeseries(tlist,ylist) - accepts two lists: t (decimal year) and y (displacement timeseries)  as 1-D numpy arrays, and returns the least-squares velocity and uncertainty for that timeseries. If possible, try to re-use the line-fitting code you wrote for Lab 3 for this purpose.

fit_velocities(filename) - accepts a filename, reads in the data, and uses fit_timeseries() to estimate the E, N and U components of velocity for that site.

get_coordinates(filename) - accepts a filename and returns the average latitude, longitude, and elevation for that site over the time period.

fit_all_velocities(folder,pattern) - accepts a folder name and a 'glob' pattern and returns a pandas data frame with the site name, coordinates, velocities and uncertainties.

Finally, import your module and demonstrate each function below to show how it works and what it returns.

### Module:

In [22]:
# velocity_analysis.py

import numpy as np
import pandas as pd
import glob

def import_tenv3(filename):
    data = pd.read_csv(filename, delimiter='\s+')
    return data

def fit_timeseries(tlist, ylist):
    coeffs = np.polyfit(tlist, ylist, 1)
    residuals = ylist - np.polyval(coeffs, tlist)
    sigma = np.sqrt(np.sum(residuals**2) / (len(tlist) - 2))
    sigma_coeffs = sigma / np.sqrt(np.sum((tlist - np.mean(tlist))**2))
    return coeffs[0], sigma_coeffs

def fit_velocities(filename, direction):
    data = import_tenv3(filename)
    tlist = data['yyyy.yyyy'].values
    ylist = data[direction].values
    velocity, uncertainty = fit_timeseries(tlist, ylist)
    return velocity, uncertainty

def get_coordinates(filename):
    data = import_tenv3(filename)
    latitude = np.mean(data['_latitude(deg)'].values)
    longitude = np.mean(data['_longitude(deg)'].values)
    elevation = np.mean(data['__height(m)'].values)
    return latitude, longitude, elevation

def fit_all_velocities(folder, pattern):
    filenames = glob.glob(f"{folder}/{pattern}")
    sites = []
    coordinates = []
    elevation = []
    velocities_up = []
    uncertainties_up = []
    velocities_north = []
    uncertainties_north = []
    velocities_east = []
    uncertainties_east = []
    for filename in filenames:
        site = filename.split('/')[-1].split('.')[0]
        lat, lon, elev = get_coordinates(filename)
        coordinates.append([lat, lon])
        elevation.append(elev)
        
        vel_up, unc_up = fit_velocities(filename, '____up(m)')
        velocities_up.append(vel_up)
        uncertainties_up.append(unc_up)

        vel_north, unc_north = fit_velocities(filename, '_north(m)')
        velocities_north.append(vel_north)
        uncertainties_north.append(unc_north)

        vel_east, unc_east = fit_velocities(filename, '__east(m)')
        velocities_east.append(vel_east)
        uncertainties_east.append(unc_east)

        sites.append(site)

    df = pd.DataFrame({
        'Site': sites,
        'Coordinates': coordinates,
        'Elevation': elevation,
        'Velocity_Up': velocities_up,
        'Uncertainty_Up': uncertainties_up,
        'Velocity_North': velocities_north,
        'Uncertainty_North': uncertainties_north,
        'Velocity_East': velocities_east,
        'Uncertainty_East': uncertainties_east
    })
    return df


### Demonstration: Fit_timeseries function
Calculates the least_squares velocity and uncertainty for a decimal year ('yyyy.yyyy') and displacement value.

In [79]:
import velocity_analysis

# Example for AZCN file:
folder = "/Users/Adrian/Documents/VS CODE/CompMethods_EPS522/Labs/Lab 5/timeseries"
pattern = "AZCN.NA.tenv3"

filenames = glob.glob(f"{folder}/{pattern}")

for filename in filenames:
    data = pd.read_csv(filename, delimiter='\s+')
    data = import_tenv3(filename)
    tlist = data['yyyy.yyyy'].values
    ylist = data['____up(m)'].values

    fit_timeseries = velocity_analysis.fit_timeseries(tlist, ylist)

    print(f'Imput of tlist: e.g., {tlist[0]} and displacement (up [m]): e.g., {ylist[0]} returns uncertainty: {fit_timeseries[0]} and velocity: {fit_timeseries[1]}')



Imput of tlist: e.g., 1999.3539 and displacement (up [m]): e.g., 0.937473 returns uncertainty: -0.0008614640076356199 and velocity: 2.843238017220529e-05


### Demonstration: fit_velocities function

Uses the 'fit_timeseries function to pull velocity and uncertainty calculations for each direction (Up, North, East).

In [91]:
import velocity_analysis

# Example for AZCN file:
folder = "/Users/Adrian/Documents/VS CODE/CompMethods_EPS522/Labs/Lab 5/timeseries"
pattern = "AZCN.NA.tenv3"

filenames = glob.glob(f"{folder}/{pattern}")

for filename in filenames:
    data = pd.read_csv(filename, delimiter='\s+')
    direction = '____up(m)'
    fit_velocities = velocity_analysis.fit_velocities(filename, direction)

print(f'Uses fit_timeseries function for each file, e.g., {pattern}, and for each direction, e.g., {direction}, to pull velocity: {fit_velocities[0]} and uncertainty: {fit_velocities[1]}')

Uses fit_timeseries function for each file, e.g., AZCN.NA.tenv3, and for each direction, e.g., ____up(m), to pull velocity: -0.0008614640076356199 and uncertainty: 2.843238017220529e-05


### Demonstration: get_coordinates function
Pulls the latitude, longitude, and elevation data for each site (file), and calculates the mean latitude, longitude, and elevation.

In [67]:
import velocity_analysis

# Example for AZCN file:
folder = "/Users/Adrian/Documents/VS CODE/CompMethods_EPS522/Labs/Lab 5/timeseries"
pattern = "AZCN.NA.tenv3"

filenames = glob.glob(f"{folder}/{pattern}")

for filename in filenames:
    data = pd.read_csv(filename, delimiter='\s+')
    coordinates = velocity_analysis.get_coordinates(filename)

print(f'From file {pattern}, pull latitude: {coordinates[0]}, longitude: {coordinates[1]}, and height (elevation) [m]: {coordinates[2]}')

From file AZCN.NA.tenv3, pull latitude: 36.839792950604185, longitude: -107.910962404209, and height (elevation) [m]: 1862.9388360243308


### Demonstration: fit_all_velocities function
Utilizes 'glob' to look through all the .tenv3 files to get the velocity and uncertainty calculations for each direction (North, East, Up), the mean coordinates (latidue, longitude), and mean elevation for each site (file)

In [76]:
# Example usage
import velocity_analysis

folder = "/Users/Adrian/Documents/VS CODE/CompMethods_EPS522/Labs/Lab 5/timeseries"
pattern = "*.tenv3"
result_df = velocity_analysis.fit_all_velocities(folder, pattern)

# Display the result
print(result_df)


   Site                                Coordinates    Elevation  Velocity_Up  \
0  RG01   [34.667072418745406, -108.0438129456121]  2157.544590    -0.001694   
1  SC01    [34.0679525903593, -106.96654343614426]  2097.379776     0.000436   
2  AZCN    [36.839792950604185, -107.910962404209]  1862.938836    -0.000861   
3  P034     [34.94561936925882, -106.459267663182]  1810.912904    -0.000354   
4  P028   [36.03168469970933, -107.90840970082284]  1933.112591    -0.000650   
5  P029  [38.439190420106776, -107.63804444204796]  2455.374920     0.002059   
6  NMLG   [35.03995261508493, -107.37233835135558]  1763.225418    -0.000454   
7  CTI4   [37.15291819162043, -107.75609089748136]  2017.964552    -0.002306   
8  MC10  [38.455598434474396, -107.87845670801819]  1808.589875    -0.001166   
9  TC01   [37.93803350458362, -107.81333275154333]  2677.537224    -0.000501   

   Uncertainty_Up  Velocity_North  Uncertainty_North  Velocity_East  \
0        0.000022        0.001949           0.00

### 3. Upload the module to GitHub, along with a README.md file explaining briefly how to use it.

Enter a link to your GitHub repository here for me to check out: 

GitHub: [AdrianMarzil](https://github.com/AdrianMarzil)
<br>Lab 5 Link: [Lab5_Abstraction](https://github.com/AdrianMarzil/CompMethods_EPS522/tree/672690f5eae0104104de0fa46089611b3882a693/Labs/Lab%205)

### 4. Use the timeseries calculation module you created

Using at most 5 lines of code, import the module you created above and use it to estimate the timeseries for all 10 of the sites, print them out, and save the results to a new file 'site_velocities.csv'. Feel free to download more sites as well and put them in the folder too!


In [90]:
import velocity_analysis

# Specify the folder and pattern for your timeseries files
folder_path = '/Users/Adrian/Documents/VS CODE/CompMethods_EPS522/Labs/Lab 5/timeseries'
file_pattern = '*.NA.tenv3'

# Use the module to estimate velocities for all sites
df = velocity_analysis.fit_all_velocities(folder_path, file_pattern)

# Print the results
print(df)

# Save the results to a CSV file
df.to_csv('site_velocities.csv', index=False)


   Site                                Coordinates    Elevation  Velocity_Up  \
0  RG01   [34.667072418745406, -108.0438129456121]  2157.544590    -0.001694   
1  SC01    [34.0679525903593, -106.96654343614426]  2097.379776     0.000436   
2  AZCN    [36.839792950604185, -107.910962404209]  1862.938836    -0.000861   
3  P034     [34.94561936925882, -106.459267663182]  1810.912904    -0.000354   
4  P028   [36.03168469970933, -107.90840970082284]  1933.112591    -0.000650   
5  P029  [38.439190420106776, -107.63804444204796]  2455.374920     0.002059   
6  NMLG   [35.03995261508493, -107.37233835135558]  1763.225418    -0.000454   
7  CTI4   [37.15291819162043, -107.75609089748136]  2017.964552    -0.002306   
8  MC10  [38.455598434474396, -107.87845670801819]  1808.589875    -0.001166   
9  TC01   [37.93803350458362, -107.81333275154333]  2677.537224    -0.000501   

   Uncertainty_Up  Velocity_North  Uncertainty_North  Velocity_East  \
0        0.000022        0.001949           0.00

### 5. Re-use your module to estimate sea level rise rates

Go to the following page and download at least 5 monthly sea level timeseries spanning at least 100 years: https://psmsl.org/products/gloss/glossmap.html. Place them in a new folder.

(To download the data: click a station icon on the map, then click the station number/name (first link in the pop-up, e.g. "155: Honolulu". Then right-click the link next to the plot of monthly data ("Download monthly mean sea level data.") and save it as a file.)

Now, create a new function "fit_tide_gauge" in your module that re-uses your function "fit_timeseries" to return the relative sea level rate of change for a given station. 

Next, modify your function "fit_all_velocities" to accept a "type" parameter (GNSS or tide), and re-use it to estimate the rates for all the tide gauges you downloaded. Print out the results below.

Finally, update your github repository with this new version of the module.


In [None]:
# tidal_analysis.py

import numpy as np
import pandas as pd
import glob

def import_rlrdata(filename):
    data = pd.read_csv(filename, delimiter=';', header=None, usecols=[0, 1], names=['yyyy.yyyy', 'elevation'])
    data.replace(-9999, np.nan, inplace=True)  # Replace -9999 with NaN
    return data

def fit_timeseries(tlist, ylist):
    coeffs = np.polyfit(tlist, ylist, 1)
    residuals = ylist - np.polyval(coeffs, tlist)
    sigma = np.sqrt(np.sum(residuals**2) / (len(tlist) - 2))
    sigma_coeffs = sigma / np.sqrt(np.sum((tlist - np.mean(tlist))**2))
    return coeffs[0], sigma_coeffs

def fit_tide_gauge(filename):
    data = import_rlrdata(filename)
    tlist = data['yyyy.yyyy'].values
    ylist = data['elevation'].values + 3.096  # Adding 3.096 to all elevation values
    rate, uncertainty = fit_timeseries(tlist, ylist)
    return rate, uncertainty

def fit_all_rates(folder, pattern):
    filenames = glob.glob(f"{folder}/{pattern}")
    stations = []
    rates = []
    uncertainties = []
    for filename in filenames:
        station = filename.split('/')[-1].split('.')[0]
        rate, uncertainty = fit_tide_gauge(filename)
        stations.append(station)
        rates.append(rate)
        uncertainties.append(uncertainty)
    df = pd.DataFrame({
        'Station': stations,
        'Rate (m)': rates,
        'Uncertainty': uncertainties
    })
    return df


In [102]:
import tidal_analysis

# Specify the folder and pattern for your timeseries files
folder_path = '/Users/Adrian/Documents/VS CODE/CompMethods_EPS522/Labs/Lab 5/tidal_timeseries'
file_pattern = '*.rlrdata'

# Use the module to estimate velocities for all sites
df = tidal_analysis.fit_all_rates(folder_path, file_pattern)

# Print the results
print(df)

# Save the results to a CSV file
df.to_csv('site_tides.csv', index=False)

  Station        Rate  Uncertainty
0     167  584.196581    29.364620
1     165  648.266070    32.474093
2      10   -1.822044     1.538391
3     256   79.665016    18.556294
4    1355 -746.889132   265.745177
