# Notes on abstraction in python
#### Computational Methods for Geoscience - EPS 400/522
#### Instructor: Eric Lindsey

---------

## Example 1

A series of code blocks that do the same thing, but make use of increasing levels of abstraction

In [None]:
# Sample 1
mass1 = 10.0  # in grams
volume1 = 2.0  # in cubic centimeters
density1 = mass1 / volume1

# Sample 2
mass2 = 20.0  # in grams
volume2 = 4.0  # in cubic centimeters
density2 = mass2 / volume2

# Sample 3
mass3 = 30.0  # in grams
volume3 = 6.0  # in cubic centimeters
density3 = mass3 / volume3

print(f"Density of sample 1: {density1} g/cm^3")
print(f"Density of sample 2: {density2} g/cm^3")
print(f"Density of sample 3: {density3} g/cm^3")

Let's improve this with a for loop instead of writing out the code multiple times:

In [None]:
masses = [10.0, 20.0, 30.0]
volumes = [2.0, 4.0, 6.0]
densities = []

for i in range(len(masses)):
    density = masses[i] / volumes[i]
    densities.append(density)

print(f"Densities: {densities} g/cm^3")


A function can help clarify where the calculations are happening and separate that part from the lists:

In [None]:
def calculate_density(mass, volume):
    return mass / volume

masses = [10.0, 20.0, 30.0]
volumes = [2.0, 4.0, 6.0]
densities = []

for i in range(len(masses)):
    densities.append(calculate_density(masses[i], volumes[i]))

print(f"Densities: {densities} g/cm^3")


We can use a list comprehension in place of a for loop:

In [None]:
def calculate_density(mass, volume):
    return mass / volume

masses = [10.0, 20.0, 30.0]
volumes = [2.0, 4.0, 6.0]

# Note: zip() takes as input several lists and returns a list of tuples drawn from each list.
# e.g. zip([1,2,3],[4,5,6]) will return [(1,4),(2,5),(3,6)]. Think of it like a zipper - merging two lists!
densities = [calculate_density(mass, volume) for mass, volume in zip(masses, volumes)]

print(f"Densities: {densities} g/cm^3")


Finally, at the most "abstract" level we could define a custom class and hide all the calculations inside the class definition.

In [None]:
class RockSample:
    def __init__(self, mass, volume):
        self.mass = mass
        self.volume = volume

    def density(self):
        return self.mass / self.volume

samples = [RockSample(10.0, 2.0), RockSample(20.0, 4.0), RockSample(30.0, 6.0)]
densities = [sample.density() for sample in samples]

print(f"Densities: {densities} g/cm^3")


### Example 2: A "real" example fitting GNSS velocities

Here is another case where we've started out copying and pasting...

In [None]:
import pandas as pd
import numpy as np

# Read TENV_timeseries.csv and fit a line to E, N, U
df1 = pd.read_csv('TENV_timeseries.csv')
coeffs_E1 = np.polyfit(df1['Time'], df1['E'], 1)
coeffs_N1 = np.polyfit(df1['Time'], df1['N'], 1)
coeffs_U1 = np.polyfit(df1['Time'], df1['U'], 1)

# Read PXYZ_timeseries.csv and fit a line to E, N, U
df2 = pd.read_csv('PXYZ_timeseries.csv')
coeffs_E2 = np.polyfit(df2['Time'], df2['E'], 1)
coeffs_N2 = np.polyfit(df2['Time'], df2['N'], 1)
coeffs_U2 = np.polyfit(df2['Time'], df2['U'], 1)

# Repeat ad nauseam for every site in the dataset
# ...

print(f"TENV: E_velocity = {coeffs_E1[0]}, N_velocity = {coeffs_N1[0]}, U_velocity = {coeffs_U1[0]}")
print(f"PXYZ: E_velocity = {coeffs_E2[0]}, N_velocity = {coeffs_N2[0]}, U_velocity = {coeffs_U2[0]}")
# ...


Again, let's improve this with a for loop instead of writing out the code multiple times:

In [None]:
import pandas as pd
import numpy as np

site_names = ['TENV', 'PXYZ']
velocities = []

for site in site_names:
    df = pd.read_csv(f"{site}_timeseries.csv")
    coeffs_E = np.polyfit(df['Time'], df['E'], 1)
    coeffs_N = np.polyfit(df['Time'], df['N'], 1)
    coeffs_U = np.polyfit(df['Time'], df['U'], 1)
    velocities.append((site, coeffs_E[0], coeffs_N[0], coeffs_U[0]))

for v in velocities:
    print(f"{v[0]}: E_velocity = {v[1]}, N_velocity = {v[2]}, U_velocity = {v[3]}")


A function makes good sense here, since we might want to do this in different situations:

In [None]:
import pandas as pd
import numpy as np

def fit_velocity(site):
    df = pd.read_csv(f"{site}_timeseries.csv")
    coeffs_E = np.polyfit(df['Time'], df['E'], 1)
    coeffs_N = np.polyfit(df['Time'], df['N'], 1)
    coeffs_U = np.polyfit(df['Time'], df['U'], 1)
    return site, coeffs_E[0], coeffs_N[0], coeffs_U[0]

site_names = ['TENV', 'PXYZ']
velocities = [fit_velocity(site) for site in site_names]

for v in velocities:
    print(f"{v[0]}: E_velocity = {v[1]}, N_velocity = {v[2]}, U_velocity = {v[3]}")


Adding a list comprehension - not sure if this makes much difference in this case. However, we are now also outputting the data as a file, which is a very useful addition.

In [None]:
import pandas as pd
import numpy as np

def fit_velocity(site):
    df = pd.read_csv(f"{site}_timeseries.csv")
    coeffs_E = np.polyfit(df['Time'], df['E'], 1)
    coeffs_N = np.polyfit(df['Time'], df['N'], 1)
    coeffs_U = np.polyfit(df['Time'], df['U'], 1)
    return site, coeffs_E[0], coeffs_N[0], coeffs_U[0]

site_names = ['TENV', 'PXYZ']
velocities = [fit_velocity(site) for site in site_names]
velocity_df = pd.DataFrame(velocities, columns=['Site', 'E_velocity', 'N_velocity', 'U_velocity'])

print(velocity_df)
velocity_df.to_csv('velocities.csv', index=False)


Here's a big improvement: instead of manually entering our site names, we just loop over all the files in a certain folder:

In [None]:
import pandas as pd
import numpy as np
import glob

def fit_velocity_file(filename):
    df = pd.read_csv(filename)
    # Get the site name from the file name
    site = file.split('/')[-1].split('_')[0]
    coeffs_E = np.polyfit(df['Time'], df['E'], 1)
    coeffs_N = np.polyfit(df['Time'], df['N'], 1)
    coeffs_U = np.polyfit(df['Time'], df['U'], 1)
    return site, coeffs_E[0], coeffs_N[0], coeffs_U[0]

# Get the list of CSV files from the "timeseries" folder
csv_files = glob.glob('timeseries/*.csv')
velocities = [fit_velocity_file(file) for file in file_names]
velocity_df = pd.DataFrame(velocities, columns=['Site', 'E_velocity', 'N_velocity', 'U_velocity'])

print(velocity_df)
velocity_df.to_csv('velocities.csv', index=False)


Finally, we can create a custom module and hide the functions in there. This cleans things up but isn't actually shorter!

In [None]:
# this will not work - you have to create the module!
import my_gnss_functions

# find the velocities
velocity_df = my_gnss_functions.fit_velocities('timeseries/*.csv')

# print and save the velocities
print(velocity_df)
velocity_df.to_csv('velocities.csv', index=False)


### Making code readable for others

Here are some best practices for code formatting. This is not an exhaustive list - find a ton more examples here: https://peps.python.org/pep-0008/

In [None]:
# Use UpperCamelCase for class names
class SampleClass:
    pass


# Use ALL_CAPS for constant values
MAX_SIZE = 100


# Use snake_case for function, method, variable, and module names
def sample_function():
    pass


# Use self as the first parameter to instance methods
class AnotherClass:
    def __init__(self, value):
        # Use instance variables to store the object's state
        self.value = value
        
    def get_value(self):
        return self.value


# Import standard libraries first, followed by third-party libraries, and then your own modules
import os
import sys
import numpy as np
from my_module import my_function


# Use inline comments sparingly and make them meaningful
total = 0  # This is an inline comment that is not very useful
# This is a more meaningful comment explaining why we need to initialize total to zero
# before starting the accumulation process.


# Use docstrings to describe the purpose of functions and classes
def add_numbers(a, b):
    """
    This function adds two numbers and returns the result.
    
    :param a: The first number to add.
    :param b: The second number to add.
    :return: The sum of a and b.
    """
    return a + b


# Avoid using single-character variable names except for counters or in list comprehensions
for i in range(10):
    print(i)

# Instead of:
s = 'Hello, World!'
# Use:
mystring = 'Hello, World!'

# An exception: Use list comprehensions for concise and expressive list manipulations
squares = [x * x for x in range(10)]

# Use spaces around operators and after commas for readability
x = 1 + 2
my_list = [1, 2, 3]

# Use blank lines to separate logical sections of the code
def another_function():
    x = 10
    y = 20
    
    # Blank line to separate logical sections
    result = x + y
    return result


# break long expressions before operators, instead of after

# Wrong:
# operators sit far away from their operands
income = (gross_wages +
          taxable_interest +
          (dividends - qualified_dividends) -
          ira_deduction -
          student_loan_interest)

# Correct:
# easy to match operators with operands
income = (gross_wages
          + taxable_interest
          + (dividends - qualified_dividends)
          - ira_deduction
          - student_loan_interest)

