In [2]:
import segyio
import numpy as np
import torch
from sklearn.model_selection import train_test_split

# 读取SEGY文件
segy_file = '../SegyData/20220101_115300.sgy'

with segyio.open(segy_file, 'rb') as segy:
    # 获取地震数据的一维数组
    seismic_data = segy.trace.raw[:]

    # 获取地震道数量和每个地震道的时间采样点数量
    n_traces = segy.tracecount
    n_samples = segy.samples.size

    print("地震道数量:{}".format(n_traces))
    print("采样点数量：{}".format(n_samples))

    # 将一维数组重新形状为二维数组，形状为 (n_traces, n_samples)
    seismic_data = seismic_data.reshape((n_traces, n_samples))

# 随机划分为训练集和测试集
train_data, test_data = train_test_split(seismic_data, test_size=0.2, random_state=42)

# 将数据转换为PyTorch张量
train_data = torch.from_numpy(train_data).float()
test_data = torch.from_numpy(test_data).float()

print(f"训练集大小: {train_data.shape}")
print(f"测试集大小: {test_data.shape}")


地震道数量:42
采样点数量：15000
训练集大小: torch.Size([33, 15000])
测试集大小: torch.Size([9, 15000])


In [23]:
import segyio
import os
import numpy as np
import torch
from torch.utils.data import Dataset, DataLoader,Subset

class SegyDataset(Dataset):
    def __init__(self, folder_path):
        self.filenames = [os.path.join(folder_path, f) for f in os.listdir(folder_path) if f.endswith(".sgy")]
        
    def __len__(self):
        return len(self.filenames)
    
    def __getitem__(self, index):
        with segyio.open(self.filenames[index], "r") as f:
            # 读取数据并进行裁剪和缩放
            data = f.trace.raw[:15000, :]  # 提取所有15000个采样点
            data = data / np.max(np.abs(data))  # 归一化操作
            data = data.reshape(42,1,15000)

        # 将数据转换为张量并返回
        return torch.from_numpy(data).float() # 0 表示一个空标签


# 构建数据集
dataset = SegyDataset(folder_path="../SegyData/")
print(len(dataset))


# data = torch.stack([torch.tensor(dataset[i][0]) for i in range(len(dataset))])



812


In [26]:
import os
import numpy as np
import torch
from torch.utils.data import Dataset, DataLoader

class SegyDataset(Dataset):
    def __init__(self, folder_path):
        self.file_list = []
        for filename in os.listdir(folder_path):
            if filename.endswith(".sgy"):
                self.file_list.append(os.path.join(folder_path, filename))

    def __len__(self):
        return len(self.file_list)

    def __getitem__(self, idx):
        filename = self.file_list[idx]
        with segyio.open(filename, "r", ignore_geometry=True) as f:
            data = f.trace.raw[:, :15000]
            data = data / np.max(np.abs(data))
        return torch.from_numpy(data.astype(np.float32)).unsqueeze(0)

dataset = SegyDataset(folder_path="../SegyData/")
data_tensor = torch.cat([dataset[i] for i in range(len(dataset))], dim=0)

# train_size = int(0.7 * len(data_tensor))
# test_size = len(data_tensor) - train_size
# train_data, test_data = torch.utils.data.random_split(data_tensor, [train_size, test_size])

# train_loader = DataLoader(train_data, batch_size=16, shuffle=True)
# test_loader = DataLoader(test_data, batch_size=16, shuffle=False)


TypeError: trace indices must be integers or slices, not tuple

In [45]:
with segyio.open("../SegyData/20221004_071900.sgy", "rb", ignore_geometry=True) as f:
            # print("输出")
            data = f.trace.raw[:]
            print("输出")
            print(data.shape)
            # data = data / np.max(np.abs(data))

输出
(81, 15000)


In [1]:
import segyio
import numpy as np
import glob
import os

# 定义文件夹路径和文件名格式
folder_path = "../testData/"
file_pattern = "*.sgy"

# 获取所有匹配的文件名
file_list = glob.glob(os.path.join(folder_path, file_pattern))

# 定义张量的维度
num_files = len(file_list)
num_samples = 15000
num_channels = 42

# 创建一个空张量来保存所有数据
data = np.zeros((num_files, num_samples, num_channels))

count = 0
# 遍历所有文件并将它们保存到张量中
for i, file_name in enumerate(file_list):
    with segyio.open(file_name, "r", ignore_geometry=True) as segyfile:
        trace_data = segyfile.trace.raw[:]
        print(trace_data.shape)

        #  保存每一个台站的张量  42 x 15000的形状
        data[i, :, :] = np.reshape(trace_data, (num_samples,num_channels))
        # print("执行力")
        count += 1
        print(count)

# 打印张量的形状
print(data.shape)
print(count)


(42, 15000)
1
(42, 15000)
2
(42, 15000)
3
(42, 15000)
4
(42, 15000)
5
(42, 15000)
6
(42, 15000)
7
(42, 15000)
8
(42, 15000)
9
(42, 15000)
10
(42, 15000)
11
(42, 15000)
12
(42, 15000)
13
(42, 15000)
14
(42, 15000)
15
(42, 15000)
16
(42, 15000)
17
(42, 15000)
18
(42, 15000)
19
(42, 15000)
20
(42, 15000)
21
(42, 15000)
22
(42, 15000)
23
(42, 15000)
24
(42, 15000)
25
(42, 15000)
26
(42, 15000)
27
(42, 15000)
28
(42, 15000)
29
(42, 15000)
30
(42, 15000)
31
(42, 15000)
32
(42, 15000)
33
(42, 15000)
34
(42, 15000)
35
(42, 15000)
36
(42, 15000)
37
(42, 15000)
38
(42, 15000)
39
(42, 15000)
40
(42, 15000)
41
(42, 15000)
42
(42, 15000)
43
(42, 15000)
44
(42, 15000)
45
(42, 15000)
46
(42, 15000)
47
(42, 15000)
48
(42, 15000)
49
(42, 15000)
50
(42, 15000)
51
(42, 15000)
52
(42, 15000)
53
(42, 15000)
54
(42, 15000)
55
(42, 15000)
56
(42, 15000)
57
(42, 15000)
58
(42, 15000)
59
(42, 15000)
60
(42, 15000)
61
(42, 15000)
62
(42, 15000)
63
(42, 15000)
64
(42, 15000)
65
(42, 15000)
66
(42, 15000)
67
(42,