# Notes on data structures in python
#### Computational Methods for Geoscience - EPS 400/522
#### Instructor: Eric Lindsey

---------

## Lists

In [1]:
# Define a list of integers
# For example, this could represent a sequence of measurements.
a = [3, 6, 11, 15]
print("List a:", a)

# Accessing individual values from the list
# Remember, indexing starts from 0 in Python.
# Here, we print the second element from the list.
print("Second element:", a[1])


List a: [3, 6, 11, 15]
Second element: 6


In [2]:
# Modify values in the list
# For example, if the last measurement (15) was an error, you can set it to zero.
a[3] = 0
print("List after changing last value to zero:", a)


List after changing last value to zero: [3, 6, 11, 0]


In [3]:
# Append a new value to the list
# Useful for when new data becomes available.
# Here we add the integer 4 to the end of the list.
a.append(4)
print("List after appending a new value:", a)


List after appending a new value: [3, 6, 11, 0, 4]


In [4]:
# Extract a subset of the list using slicing
# This can be helpful for analyzing or plotting a portion of the data.
# The syntax is [start_index:end_index] and it will extract the elements from start_index to end_index - 1.
# For example, a[1:3] will return a list containing a[1] and a[2].
subset = a[1:3]
print("Subset of the list:", subset)


Subset of the list: [6, 11]


In [5]:
# You can also skip elements by adding a 'step' value in slice [start:end:step]
# Here we take every second element from the list.
subset_with_step = a[0:4:2]
print("Subset with step:", subset_with_step)


Subset with step: [3, 11]


In [6]:
# Concatenating two lists
# Useful for combining data sets. 
b = [5, 7, 9]
c = a + b
print("Concatenated list:", c)


Concatenated list: [3, 6, 11, 0, 4, 5, 7, 9]


In [7]:
# Finding the length of a list
# This could help you know how many data points you have.
length_of_list = len(a)
print("Length of the list:", length_of_list)


Length of the list: 5


In [8]:
# Summing up all elements in the list
# Useful for calculating statistics like mean.
sum_of_elements = sum(a)
print("Sum of all elements:", sum_of_elements)


Sum of all elements: 24


In [9]:
# Find the location of a particular value in the list
target = 11
iloc = a.index(target)
print(f"The value {target} is found at position {iloc} in a")

# question: what happens if the target value is not in the list?

The value 11 is found at position 2 in a


In [10]:
# You can also use min() and max() to find the smallest and largest values, respectively.
min_value = min(a)
max_value = max(a)
print(f"Min value: {min_value}, Max value: {max_value}")


Min value: 0, Max value: 11


In [11]:
# Using list comprehensions for data manipulation
# For example, to square each element in the list.
print("List a:", a)

squared_values = [x**2 for x in a]
print("Squared values:", squared_values)


List a: [3, 6, 11, 0, 4]
Squared values: [9, 36, 121, 0, 16]


In [12]:
# Using list comprehensions with conditionals
# For example, to keep only elements that are greater than 5.
filtered_values = [x for x in a if x > 5]
print("Filtered values (greater than 5):", filtered_values)


Filtered values (greater than 5): [6, 11]


In [13]:
# List can also contain mixed data types, although this is less common in numerical computations
mixed_list = [1, "a", 3.5]
print("Mixed data types:", mixed_list)


Mixed data types: [1, 'a', 3.5]


In [14]:
# Lists can be nested, which can be useful for multi-dimensional data (like 2D grids in geospatial analysis)
nested_list = [[1, 2, 3], [4, 5, 6], [7, 8, 9]]
print("Nested list:", nested_list)


Nested list: [[1, 2, 3], [4, 5, 6], [7, 8, 9]]


## Tuples

Tuples are similar to lists in many ways but are immutable, meaning you can't change their elements once they're created. This can be useful for data that should not be modified. 

In [15]:
# Define a tuple
# For example, this could represent the latitude and longitude of a location.
coordinates = (40.7128, -74.0060)
print("Coordinates:", coordinates)

# Access values in a tuple
# Just like lists, tuples are zero-indexed.
print("Latitude:", coordinates[0])
print("Longitude:", coordinates[1])


Coordinates: (40.7128, -74.006)
Latitude: 40.7128
Longitude: -74.006


In [16]:
# Trying to modify a tuple will result in an error, because tuples are immutable
coordinates[0] = 41.0


TypeError: 'tuple' object does not support item assignment

In [17]:
# You can concatenate tuples
# Useful for extending data sets that should remain immutable.
more_coordinates = (41.8902, 12.4924)
all_coordinates = coordinates + more_coordinates
print("All coordinates:", all_coordinates)


All coordinates: (40.7128, -74.006, 41.8902, 12.4924)


In [18]:
# Length of a tuple
# This could help you know how many data points you have.
print("Length of coordinates tuple:", len(coordinates))


Length of coordinates tuple: 2


In [19]:
# Minimum and maximum values in a tuple
# Useful for identifying range in data.
print(f"Min coordinate: {min(coordinates)}, Max coordinate: {max(coordinates)}")


Min coordinate: -74.006, Max coordinate: 40.7128


In [20]:
# Unpacking a tuple
# This is particularly useful when a function returns multiple values packed in a tuple.
coordinates = (40.7128, -74.0060)
print("Coordinates:", coordinates)

# now unpack the tuple
lat, lon = coordinates
print(f"Unpacked: Latitude: {lat}, Longitude: {lon}")


Coordinates: (40.7128, -74.006)
Unpacked: Latitude: 40.7128, Longitude: -74.006


In [21]:
# Nested tuples
# These can be used for more complex structures, like a set of 3D coordinates.
nested_coordinates = ((40.7128, -74.0060, 5), (41.8902, 12.4924, 10))
print("Nested coordinates:", nested_coordinates)


Nested coordinates: ((40.7128, -74.006, 5), (41.8902, 12.4924, 10))


In [22]:
# Unpacking nested tuples
(lat1, long1, alt1), (lat2, long2, alt2) = nested_coordinates
print(f"Unpacked nested coordinates - 1st set: Latitude {lat1}, Longitude {long1}, Altitude {alt1}")
print(f"Unpacked nested coordinates - 2nd set: Latitude {lat2}, Longitude {long2}, Altitude {alt2}")


Unpacked nested coordinates - 1st set: Latitude 40.7128, Longitude -74.006, Altitude 5
Unpacked nested coordinates - 2nd set: Latitude 41.8902, Longitude 12.4924, Altitude 10


In [23]:
# Function returning multiple values as a tuple
def min_max(data):
    return min(data), max(data)

data = [3, 6, 9, 12]
min_val, max_val = min_max(data)
print(f"Min: {min_val}, Max: {max_val}")

Min: 3, Max: 12


## Dictionaries

Dictionaries in Python are collections of key-value pairs and are extremely versatile for data storage and manipulation. They can be particularly useful for things like storing metadata, associating names with values, or even creating simple databases.

In [24]:
# Define a dictionary
# For example, this could represent the properties of a rock sample.
rock_sample = {'Name': 'Granite', 'Density': 2.75, 'Porosity': 0.1}
print("Rock sample properties:", rock_sample)

# Access values in a dictionary using keys
print("Density of the rock sample:", rock_sample['Density'])


Rock sample properties: {'Name': 'Granite', 'Density': 2.75, 'Porosity': 0.1}
Density of the rock sample: 2.75


In [25]:
# Modify values in a dictionary
rock_sample['Density'] = 2.80
print("Updated rock sample properties:", rock_sample)


Updated rock sample properties: {'Name': 'Granite', 'Density': 2.8, 'Porosity': 0.1}


In [26]:
# Add new key-value pairs to a dictionary
# For instance, adding a color property to the rock sample.
rock_sample['Color'] = 'Gray'
print("Rock sample with added color property:", rock_sample)


Rock sample with added color property: {'Name': 'Granite', 'Density': 2.8, 'Porosity': 0.1, 'Color': 'Gray'}


In [27]:
# Remove key-value pairs from a dictionary using 'del'
# If the 'Color' information is not needed, you can remove it.
del rock_sample['Color']
print("Rock sample after removing color property:", rock_sample)


Rock sample after removing color property: {'Name': 'Granite', 'Density': 2.8, 'Porosity': 0.1}


In [28]:
# Check if a key exists in a dictionary
# This is useful for avoiding errors before trying to access a key.
if 'Porosity' in rock_sample:
    print("Porosity exists:", rock_sample['Porosity'])


Porosity exists: 0.1


In [29]:
# Using the 'get()' method to access values safely
# This returns None if the key is not found, avoiding errors.
print("Safe way to get Porosity:", rock_sample.get('Porosity'))
print("Safe way to get unknown_key:", rock_sample.get('unknown_key'))


Safe way to get Porosity: 0.1
Safe way to get unknown_key: None


In [30]:
# Iterating over keys in a dictionary
# For example, to print out all properties of the rock sample.
print("Iterating over keys:")
for key in rock_sample:
    print(f"{key}: {rock_sample[key]}")


Iterating over keys:
Name: Granite
Density: 2.8
Porosity: 0.1


In [31]:
# Iterating over key-value pairs in a dictionary
# More direct than the above method when you need both key and value.
print("Iterating over key-value pairs:")
for key, value in rock_sample.items():
    print(f"{key}: {value}")


Iterating over key-value pairs:
Name: Granite
Density: 2.8
Porosity: 0.1


In [32]:
# Nested dictionaries
# These can be used for more complex structures like a collection of rock samples.
rock_samples = {
    'Sample_1': {'Name': 'Granite', 'Density': 2.75},
    'Sample_2': {'Name': 'Basalt', 'Density': 3.00}
}
print("Nested dictionary:", rock_samples)

# Accessing nested dictionaries
# To get the density of 'Sample_1'.
print("Density of Sample_1:", rock_samples['Sample_1']['Density'])



Nested dictionary: {'Sample_1': {'Name': 'Granite', 'Density': 2.75}, 'Sample_2': {'Name': 'Basalt', 'Density': 3.0}}
Density of Sample_1: 2.75


In [33]:
# Using dictionaries as simple databases
# For example, associating geological ages with corresponding periods.
geological_ages = {'Cambrian': 541, 'Ordovician': 485.4, 'Silurian': 443.8}
print("Geological ages:", geological_ages)


Geological ages: {'Cambrian': 541, 'Ordovician': 485.4, 'Silurian': 443.8}


In [34]:
# Function returning multiple values as a dictionary
def min_max_mean(data):
    """Return the minimum, maximum, and mean values from a list."""
    return {'min': min(data), 'max': max(data), 'mean': sum(data) / len(data)}

# Function usage
data = [3, 6, 9, 12]
stats = min_max_mean(data)
print(f"Data stats: Min value: {stats['min']}, Max value: {stats['max']}, Mean value: {stats['mean']}")


Data stats: Min value: 3, Max value: 12, Mean value: 7.5


In [35]:
# a (sort of) practical use case example: creating a lookup table

# Create a dictionary to store depth-seismic_velocity pairs
# The depths are in kilometers, and the seismic velocities are in km/s
depth_velocity_lookup = {
    0: 5.8,  # At surface
    10: 6.1,  # 10 km deep
    20: 6.3,  # 20 km deep
    30: 6.6,  # 30 km deep
    50: 8.0  # 50 km deep
}

# Function to look up the seismic velocity at a given depth
def find_seismic_velocity(depth):
    return depth_velocity_lookup.get(depth, "Unknown")

# Testing the lookup
print(find_seismic_velocity(0))  # Output will be 5.8
print(find_seismic_velocity(10))  # Output will be 6.1

# Output will be 'Unknown' because 100 km is not in the dictionary
print(find_seismic_velocity(100))  


5.8
6.1
Unknown


## Additional data types to know about
We won't use these much in this class, but it's good to know they exist.

### Sets

A set is an unordered collection of unique items. Sets are useful for tasks like removing duplicates from a list, testing for membership, or finding the intersection between two data collections.



In [36]:
# Create a set
minerals = set(['Quartz', 'Feldspar', 'Mica'])
print(minerals)

# Add an item to the set
minerals.add('Pyrite')
print(minerals)

# Remove duplicates (automatically done in sets)
minerals.add('Quartz')
print(minerals)

# Test for membership
print('Quartz' in minerals)


{'Mica', 'Quartz', 'Feldspar'}
{'Mica', 'Pyrite', 'Quartz', 'Feldspar'}
{'Mica', 'Pyrite', 'Quartz', 'Feldspar'}
True


### JSON Objects

JSON (JavaScript Object Notation) objects are commonly used for storing structured data, especially when reading from or writing to files. They resemble Python dictionaries.

In [37]:
import json

# Create a JSON object
rock_data_json = json.dumps({'Name': 'Granite', 'Density': 2.75})
print(rock_data_json)

# Parse a JSON object
rock_data_dict = json.loads(rock_data_json)
print(rock_data_dict)

{"Name": "Granite", "Density": 2.75}
{'Name': 'Granite', 'Density': 2.75}


### Strings
Strings in Python are actually sequences and share some properties with lists and tuples. They are often used for storing textual data, parsing, and more.

In [46]:
# Create a string
str_data = "Geology"

# String indexing and slicing
print("First letter:", str_data[0])
print("Last 3 letters:", str_data[-3:])


First letter: G
Last 3 letters: ogy


In [47]:
# check if a string contains another string:
place = '14km NW of Albuquerque, New Mexico'
state1 = 'New Mexico'
state2 = 'Arizona'
print(state1 in place)
print(state2 in place)

True
False


### Objects

Creating custom object types in Python often involves defining classes. Classes allow you to bundle data and functionalities together.

In [39]:
class RockSample:
    # The constructor (__init__) initializes the object
    def __init__(self, name, density, porosity, color=None):
        self.name = name          # Rock name (String)
        self.density = density    # Rock density (Float)
        self.porosity = porosity  # Rock porosity (Float)
        self.color = color        # Rock color (String), optional

    # A method to update density
    def update_density(self, new_density):
        self.density = new_density
        
    # Compute the mass of a sample, given volume
    def compute_mass(self, volume):
        return volume * self.density

    # A method to display the rock properties
    def display_properties(self):
        print(f"Rock Name: {self.name}")
        print(f"Density: {self.density} g/cm^3")
        print(f"Porosity: {self.porosity}")
        if self.color:
            print(f"Color: {self.color}")

In [40]:
# Creating an instance of the RockSample class
sample1 = RockSample('Granite', 2.75, 0.1)

# Displaying properties of sample1
print("Properties of Sample 1:")
sample1.display_properties()

# Updating the density of sample1
sample1.update_density(2.80)

# Displaying updated properties of sample1
print("\nUpdated Properties of Sample 1:")
sample1.display_properties()

Properties of Sample 1:
Rock Name: Granite
Density: 2.75 g/cm^3
Porosity: 0.1

Updated Properties of Sample 1:
Rock Name: Granite
Density: 2.8 g/cm^3
Porosity: 0.1


In [41]:
# Creating another instance with optional color information
sample2 = RockSample('Basalt', 3.00, 0.05, color='Black')

# Displaying properties of sample2
print("Properties of Sample 2:")
sample2.display_properties()

# compute mass of a 100 cm^3 sample
print("\nmass (g) for a volume of 100 cm^3:", sample2.compute_mass(100))

Properties of Sample 2:
Rock Name: Basalt
Density: 3.0 g/cm^3
Porosity: 0.05
Color: Black

mass (g) for a volume of 100 cm^3: 300.0
