In [2]:
from gwpy.timeseries import TimeSeries
from gwosc.datasets import run_segment
import numpy as np
import os

# === Settings ===
detector = "H1"  # Options: 'H1', 'L1', or 'V1'
sample_rate = 4096
duration = 4  # seconds
n_segments = 5000
output_dir = "./real-noise-files/"
os.makedirs(output_dir, exist_ok=True)

# === Get available time segment for O2 run ===
start, end = run_segment("O2")  # Fix: no [detector] indexing here

# Create candidate 4-second intervals
all_start_times = np.arange(start, end - duration, duration)
np.random.seed(42)
np.random.shuffle(all_start_times)

# === Download Loop ===
downloaded = 0
i = 0

while downloaded < n_segments and i < len(all_start_times):
    try:
        gps_start = int(all_start_times[i])
        gps_end = gps_start + duration
        ts = TimeSeries.fetch_open_data(detector, gps_start, gps_end, sample_rate=sample_rate)
        if len(ts) != sample_rate * duration:
            i += 1
            continue
        ts.write(os.path.join(output_dir, f"real_noise_{downloaded}.txt"))
        downloaded += 1
    except Exception as e:
        print(f"⚠️ Skipping GPS {gps_start}: {e}")
    i += 1
print(f"✅ Downloaded {downloaded} real noise segments from {detector} into: {output_dir}")

⚠️ Skipping GPS 1179896153: Cannot find a GWOSC dataset for H1 covering [1179896153, 1179896157)
⚠️ Skipping GPS 1167311297: Cannot find a GWOSC dataset for H1 covering [1167311297, 1167311301)
⚠️ Skipping GPS 1168564037: Cannot find a GWOSC dataset for H1 covering [1168564037, 1168564041)
⚠️ Skipping GPS 1179631533: Cannot find a GWOSC dataset for H1 covering [1179631533, 1179631537)
⚠️ Skipping GPS 1164998033: Cannot find a GWOSC dataset for H1 covering [1164998033, 1164998037)
⚠️ Skipping GPS 1184022745: Cannot find a GWOSC dataset for H1 covering [1184022745, 1184022749)
⚠️ Skipping GPS 1178736509: Cannot find a GWOSC dataset for H1 covering [1178736509, 1178736513)
⚠️ Skipping GPS 1168789997: Cannot find a GWOSC dataset for H1 covering [1168789997, 1168790001)
⚠️ Skipping GPS 1187476613: Cannot find a GWOSC dataset for H1 covering [1187476613, 1187476617)
⚠️ Skipping GPS 1180251369: Cannot find a GWOSC dataset for H1 covering [1180251369, 1180251373)
⚠️ Skipping GPS 1185104105: Ca

In [1]:
import os
import shutil

# === Paths ===
input_dir = "./real-noise-files/"
output_dir = "./dataset/real-noise-files/"
os.makedirs(output_dir, exist_ok=True)

# === Threshold for size check (800 KB) ===
MIN_SIZE_KB = 800

# === Move Valid Files ===
moved = 0
skipped = 0

for filename in os.listdir(input_dir):
    if not filename.endswith(".txt"):
        continue

    full_path = os.path.join(input_dir, filename)
    file_size_kb = os.path.getsize(full_path) / 1024

    if file_size_kb >= MIN_SIZE_KB:
        shutil.copy(full_path, os.path.join(output_dir, filename))
        moved += 1
    else:
        skipped += 1
        print(f"❌ Skipping {filename}: only {file_size_kb:.1f} KB")

print(f"\n✅ Done! {moved} files moved to '{output_dir}'")
print(f"⚠️ {skipped} files were too small and skipped.")


❌ Skipping real_noise_0.txt: only 464.0 KB
❌ Skipping real_noise_1009.txt: only 464.0 KB
❌ Skipping real_noise_1022.txt: only 464.0 KB
❌ Skipping real_noise_1035.txt: only 464.0 KB
❌ Skipping real_noise_1044.txt: only 464.0 KB
❌ Skipping real_noise_1049.txt: only 464.0 KB
❌ Skipping real_noise_1065.txt: only 464.0 KB
❌ Skipping real_noise_1077.txt: only 464.0 KB
❌ Skipping real_noise_1079.txt: only 464.0 KB
❌ Skipping real_noise_1093.txt: only 464.0 KB
❌ Skipping real_noise_110.txt: only 464.0 KB
❌ Skipping real_noise_1121.txt: only 464.0 KB
❌ Skipping real_noise_113.txt: only 464.0 KB
❌ Skipping real_noise_1138.txt: only 464.0 KB
❌ Skipping real_noise_1139.txt: only 464.0 KB
❌ Skipping real_noise_1167.txt: only 464.0 KB
❌ Skipping real_noise_1176.txt: only 464.0 KB
❌ Skipping real_noise_1179.txt: only 464.0 KB
❌ Skipping real_noise_1193.txt: only 464.0 KB
❌ Skipping real_noise_1196.txt: only 464.0 KB
❌ Skipping real_noise_1207.txt: only 464.0 KB
❌ Skipping real_noise_1211.txt: only 46