# Sliding Window

In [1]:
import obspy

In [42]:
def normalize_time(time_str):
    parts = time_str.split(':')
    if len(parts) != 3:
        raise ValueError(f"Invalid time format: {time_str}")
    
    hours = int(parts[0])
    minutes = int(parts[1])
    seconds = float(parts[2])

    # Normalize seconds
    if seconds >= 60:
        extra_minutes = int(seconds // 60)
        seconds = seconds % 60
        minutes += extra_minutes
    
    # Normalize minutes
    if minutes >= 60:
        extra_hours = minutes // 60
        minutes = minutes % 60
        hours += extra_hours
    
    # Ensure hours are within 24-hour format
    hours = hours % 24

    return f"{str(hours).zfill(2)}:{str(minutes).zfill(2)}:{seconds:.3f}"


In [56]:
import obspy
from obspy import UTCDateTime
import pandas as pd

# Predefined constants
WINDOW_LENGTH = 95  # Example: length of each window in seconds
WINDOW_MOVE = 30  # Example: move window every 30 seconds
SAMPLES = 7601  # Number of samples per window

def get_windowed_data_with_labels(path, event_list, start_year, end_year):
    labeled_data = []
    extra_data = []

    def get_continuous_stream(start_year, end_year):
        def read_day_data(year, day):
            HHE = path + f"{year}/MN/WDD/HHE.D/MN.WDD..HHE.D.{year}.{str(day).zfill(3)}"
            HHN = path + f"{year}/MN/WDD/HHN.D/MN.WDD..HHN.D.{year}.{str(day).zfill(3)}"
            HHZ = path + f"{year}/MN/WDD/HHZ.D/MN.WDD..HHZ.D.{year}.{str(day).zfill(3)}"
            try:
                st = obspy.read(HHE)
                st += obspy.read(HHN)
                st += obspy.read(HHZ)
                st.merge(fill_value='interpolate')  # Ensure the data is continuous
                return st
            except Exception as e:
                print(f"Missing or corrupted file for {year}, {day}: {e}")
                return None

        st = obspy.Stream()
        # Iterate through each day in the specified year range
        for year in range(start_year, end_year + 1):
            for day in range(1, 5):
                st_day = read_day_data(year, day)
                if st_day is not None:
                    st += st_day

        st.merge(fill_value='interpolate')
        return st

    st = get_continuous_stream(start_year, end_year)

    # Ensure we have three channels of data
    if len(st) == 3:
        start_time = UTCDateTime(f"{start_year}-01-01T00:00:00.000000Z")
        end_time = st[0].stats.endtime

        while start_time + WINDOW_LENGTH <= end_time:
            windowed_data = st.slice(starttime=start_time, endtime=start_time + WINDOW_LENGTH)
            condition = len(windowed_data[0].data) == SAMPLES and len(windowed_data[1].data) == SAMPLES and len(windowed_data[2].data) == SAMPLES

            if condition:
                label = 0
                for _, row in event_list.iterrows():
                    
                    quake_time = obspy.UTCDateTime(row['date'] + normalize_time(row['pTime']))
                    s_time = obspy.UTCDateTime(row['date'] + normalize_time(row['sTime']))

                    if start_time <= quake_time <= start_time + WINDOW_LENGTH or start_time <= s_time <= start_time + WINDOW_LENGTH:
                        label = 1
                        break

                labeled_data.append((windowed_data, label))
                extra_data.append([start_time.year, start_time.julday, start_time, start_time + WINDOW_LENGTH, label])
            start_time += WINDOW_MOVE
    return labeled_data, extra_data

# Define the start and end years for the data
START_YEAR = 2011
END_YEAR = 2011

# Example event list structure
# Adjust the path to your event list CSV file
event_list = pd.read_csv('AllEventsDF.csv')

# Example path
path = "../../Data/"

labeled_data, extra_data = get_windowed_data_with_labels(path, event_list, START_YEAR, END_YEAR)

Number of labeled windows: 11516
Label: 0, Start time: 2011-01-01T00:00:29.998335Z, End time: 2011-01-01T00:02:04.998335Z
Label: 0, Start time: 2011-01-01T00:00:59.998335Z, End time: 2011-01-01T00:02:34.998335Z
Label: 0, Start time: 2011-01-01T00:01:29.998335Z, End time: 2011-01-01T00:03:04.998335Z
Label: 0, Start time: 2011-01-01T00:01:59.998335Z, End time: 2011-01-01T00:03:34.998335Z
Label: 0, Start time: 2011-01-01T00:02:29.998335Z, End time: 2011-01-01T00:04:04.998335Z


In [58]:
# Print results for verification
print(f"Number of labeled windows: {len(labeled_data)}")
for data, label in labeled_data:  # Print the first 5 windows for example
    if label == 1:
        print(f"Label: {label}, Start time: {data[0].stats.starttime}, End time: {data[0].stats.endtime}")

Number of labeled windows: 11516
Label: 1, Start time: 2011-01-02T14:39:59.998335Z, End time: 2011-01-02T14:41:34.998335Z
Label: 1, Start time: 2011-01-02T14:40:29.998335Z, End time: 2011-01-02T14:42:04.998335Z
Label: 1, Start time: 2011-01-02T14:40:59.998335Z, End time: 2011-01-02T14:42:34.998335Z
Label: 1, Start time: 2011-01-02T14:41:29.998335Z, End time: 2011-01-02T14:43:04.998335Z
Label: 1, Start time: 2011-01-02T14:41:59.998335Z, End time: 2011-01-02T14:43:34.998335Z
Label: 1, Start time: 2011-01-02T14:42:29.998335Z, End time: 2011-01-02T14:44:04.998335Z


In [62]:
import numpy as np
from joblib import load, dump
def apply_fft(data):
    fft_data = np.fft.fft(data)
    return np.abs(fft_data)

In [None]:
fft_data = [apply_fft(sample) for sample in raw_data]
dump(np.array(fft_data), "../Dump3/X_test_fft.joblib")

In [64]:
raw_X_Train = load("../../BEST SET/raw_X_Train.joblib")

In [66]:
raw_X_Train.shape

(1652, 3, 7601)

In [81]:
labels = []
data = []
for stream, label in labeled_data:
    data.append(apply_fft(stream))
    labels.append(label)
labels = np.array(labels)
data = np.array(data)

In [87]:
dump(labels, './Windowlabels.joblib')

['./Windowlabels.joblib']

In [88]:
dump(data, './Windowdata.joblib')

['./Windowdata.joblib']