# High‑Performance & Cloud Workflows

**1: Parallel Processing with Joblib**  
 Here, I am using `joblib.Parallel` to distribute our window‑by‑window feature extraction across all CPU cores.


In [2]:
# 1. Imports
from obspy.clients.fdsn import Client
from obspy import UTCDateTime

# 2. Connect to IRIS and download 10 minutes of BHZ data
client    = Client("IRIS")
starttime = UTCDateTime("2010-02-27T06:30:00")
endtime   = starttime + 10 * 60   # 10 minutes

st = client.get_waveforms(
    network   = "IU",
    station   = "ANMO",
    location  = "00",
    channel   = "BHZ",
    starttime = starttime,
    endtime   = endtime
)

# 3. Detrend & Band‑pass filter
st.detrend("linear")
st.filter("bandpass", freqmin=0.5, freqmax=5.0)

# 4. Select the first trace
tr = st[0]

# 5. Define window parameters
window_size = int(10 * tr.stats.sampling_rate)   # 10 s windows
step_size   = int( 5 * tr.stats.sampling_rate)   # 5 s steps


In [3]:
from joblib import Parallel, delayed
import numpy as np
import pandas as pd

# 6. Split into overlapping windows
windows = [
    tr.data[i : i + window_size]
    for i in range(0, len(tr.data) - window_size, step_size)
]

# 7. Feature‐extraction function
def extract_features(window):
    return {
        "mean":   np.mean(window),
        "std":    np.std(window),
        "max":    np.max(window),
        "min":    np.min(window),
        "energy": np.sum(window**2),
        "zeros":  np.count_nonzero(np.diff(np.sign(window))),
    }

# 8. Run in parallel over all CPU cores
results = Parallel(n_jobs=-1)(
    delayed(extract_features)(w) for w in windows
)

# 9. Convert to DataFrame and display
df_features = pd.DataFrame(results)
df_features.head()


Unnamed: 0,mean,std,max,min,energy,zeros
0,-0.552111,186.660783,964.249936,-703.608559,6968511.0,12
1,2.001824,24.704553,46.803192,-56.866891,122864.4,11
2,-3.005374,35.712502,60.877856,-84.317567,256883.0,9
3,-4.655337,50.160444,80.590743,-106.886256,507548.5,7
4,2.512402,44.990815,80.590743,-106.886256,406097.1,7
