In [56]:
import torch
import torch.nn as nn
from torch.utils.tensorboard import SummaryWriter

#  定义自编码器网络模型
#  压缩比  15 
class MyConvNet(nn.Module):
    def __init__(self, compression_ratio):
        super(MyConvNet, self).__init__()
        #  输入1  输出 16  15000 x 1 -> 15000 x 16
        self.conv1 = nn.Conv2d(1, 16, kernel_size=3, padding=1) # 1D convolutional layer with 16 filters
        #  15000 x 16 -> 5000 x 16
        self.maxpool1 = nn.MaxPool2d(3) # Max pooling layer with compression_ratio stride
       
        # 5000 x 16  -> 5000 x 32
        self.conv2 = nn.Conv2d(16, 32, kernel_size=3, padding=1) # 1D convolutional layer with 64 filters

        #  5000 x 32 -> 5000 x 64
        self.conv3 = nn.Conv2d(32, 64, kernel_size=3, padding=1) # 1D convolutional layer with 64 filters

        # 5000 x 64-> 1000 x 64
        self.maxpool2 = nn.MaxPool2d(5)

        # 1000 x 64 -> 1000 x 128
        self.conv4 = nn.Conv2d(64,128,kernel_size=3,padding=1)

        # 最后一层卷积 1000 x 128 -> 1000 x 1
        self.conv5 = nn.Conv2d(128,1,kernel_size=3,padding=1)

    # encoder forward 前向传播
    def forward(self, x):
        alpha = 0.1
        elu = nn.ELU(alpha=alpha)  # 创建elu激活函数
        x = self.conv1(x)
        x = self.maxpool1(x)
        x = elu(x)
        x = self.conv2(x)
        x = elu(x)
        x = self.conv3(x)
        x = self.maxpool2(x)
        x = elu(x)
        x = self.conv4(x)
        x = self.conv5(x)
        return x


#  定义网络模型
# Instantiate the model with a compression ratio of 15   15000 -> 1000
compression_ratio = 15
model = MyConvNet(compression_ratio)

# Print the model architecture
print(model)


MyConvNet(
  (conv1): Conv2d(1, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (maxpool1): MaxPool2d(kernel_size=3, stride=3, padding=0, dilation=1, ceil_mode=False)
  (conv2): Conv2d(16, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv3): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (maxpool2): MaxPool2d(kernel_size=5, stride=5, padding=0, dilation=1, ceil_mode=False)
  (conv4): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv5): Conv2d(128, 1, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
)


In [57]:
#  定义损失函数  均方根
# loss_fn = nn.CrossEntropyLoss()
loss_fn = nn.MSELoss()

#  学习率 
learning_rate = 0.002

#  定义优化器  随机梯度下降
optimizer = torch.optim.SGD(model.parameters(),lr = learning_rate)


In [58]:
# 加载数据集
import numpy as np
import segyio

# 读取SEGY文件
segy_file = '../SegyData/20220101_115300.sgy'

with segyio.open(segy_file, 'rb') as segy:
    # 获取地震数据的一维数组
    seismic_data = segy.trace.raw[:]

    # 获取地震道数量和每个地震道的时间采样点数量
    n_traces = segy.tracecount
    n_samples = segy.samples.size

    print("地震道数量:{}".format(n_traces))
    print("采样点数量：{}".format(n_samples))

    # 将一维数组重新形状为二维数组，形状为 (n_traces, n_samples)
    seismic_data = seismic_data.reshape((n_traces, n_samples))

# 打印地震数据的形状  42 x 15000  42个地震道  15000个采样点
print("Seismic data shape:", seismic_data.shape)

# 可以使用NumPy进行进一步的处理和分析
# 例如，获取第一条地震道的前10个采样点
trace_0 = seismic_data[0, :10]
print("Trace 0 data:", trace_0)


地震道数量:42
采样点数量：15000
Seismic data shape: (42, 15000)
Trace 0 data: [-0.02413988 -0.02503395 -0.02622605 -0.02384186 -0.02413988 -0.01996756
 -0.02026558 -0.02384186 -0.01996756 -0.01817942]


In [59]:

# 处理数据集
# 如果数据是 594 x 1500 将数据处理成 594  x 1 x 1500 按照batch_size 分批

import segyio
import numpy as np
import torch
from sklearn.model_selection import train_test_split
from torch.utils.data import DataLoader

# 读取SEGY文件
segy_file = '../SegyData/20220101_115300.sgy'

with segyio.open(segy_file, 'rb') as segy:
    # 获取地震数据的一维数组
    seismic_data = segy.trace.raw[:]

    # 获取地震道数量和每个地震道的时间采样点数量
    n_traces = segy.tracecount
    n_samples = segy.samples.size

    print("地震道数量:{}".format(n_traces))
    print("采样点数量：{}".format(n_samples))

    # 将一维数组重新形状为二维数组，形状为 (n_traces, n_samples)
    seismic_data = seismic_data.reshape((n_traces, n_samples))

# 随机划分为训练集和测试集
train_data, test_data = train_test_split(seismic_data, test_size=0.2, random_state=42)
#  分割成 训练集和测试集
# 将数据转换为PyTorch张量  
train_data = torch.from_numpy(train_data).float()
test_data = torch.from_numpy(test_data).float()

print(f"训练集大小: {train_data.shape}")
print(f"测试集大小: {test_data.shape}")


# reshape 42 x 15000 x 1
train_data = train_data.reshape(33,15000,1)
test_data = test_data.reshape(9,15000,1)

#  数据预处理  归一化 0，1 之间

# 上面的数据是 42 x 15000 然后reshape 42 x 1 x 15000

#  dataloader加载数据集
train_dataloader = DataLoader(train_data,batch_size = 3)
test_dataloader = DataLoader(test_data,batch_size = 3)

地震道数量:42
采样点数量：15000
训练集大小: torch.Size([33, 15000])
测试集大小: torch.Size([9, 15000])


In [60]:
#  设置训练网络的一些参数
total_train_step = 0
# 记录测试的次数
total_test_step = 0
# 训练的轮数
epoch = 200
# writer  = SummaryWriter("../encoder_train")


In [61]:
import torch.nn as nn
# 定义自编码器网络
class AutoEncoder(nn.Module):
    def __init__(self):
        super(AutoEncoder,self).__init__()

        #  编码器 最后输出 1000 x 1 的结果
        self.encoder = nn.Sequential(
            nn.Conv2d(1, 16, kernel_size=3, padding=1) ,# 1D convolutional layer with 16 filters

            #  15000 x 16 -> 5000 x 16
            nn.MaxPool2d(3) ,# Max pooling layer with compression_ratio stride
       
            # 5000 x 16  -> 5000 x 32
            nn.Conv2d(16, 32, kernel_size=3, padding=1) ,# 1D convolutional layer with 64 filters

            #  5000 x 32 -> 5000 x 64
            nn.Conv2d(32, 64, kernel_size=3, padding=1) ,# 1D convolutional layer with 64 filters

            # 5000 x 64-> 1000 x 64
            nn.MaxPool2d(5),

            # 1000 x 64 -> 1000 x 128
            nn.Conv2d(64,128,kernel_size=3,padding=1),

            # 最后一层卷积 1000 x 128 -> 1000 x 1
            nn.Conv2d(128,1,kernel_size=3,padding=1)
        )

        # 解码器网络
        self.decoder = nn.Sequential(
            # 100 x 1 -> 100 x 128
            nn.ConvTranspose2d(1,128,kernel_size=3,padding=1),

            # 100 x 128 -> 500 x 128
            nn.Upsample(scale_factor=5),

            #  500 x 128 -> 500 x 64
            nn.ConvTranspose2d(128,64,kernel_size=3,padding=1),

            # 500 x 64 -> 500 x 32
            nn.ConvTranspose2d(64,32,kernel_size=3,padding=1),
            
            # 500 x 32 -> 1500 x 32
            nn.Upsample(scale_factor=3),

            #  1500 x 32 -> 1500 x 16
            nn.ConvTranspose2d(32,16,kernel_size=3,padding=1),

            #  1500 x 16 -> 1500 x 1
            nn.ConvTranspose2d(16,1,kernel_size=3,padding=1)
        )
    
    def forward(self,x):
        x = self.encoder(x)
        x = self.decoder(x)
        return x
    

In [62]:
#  处理数据集
import segyio
import numpy as np
import glob
import os

# 定义文件夹路径和文件名格式
folder_path = "../testData/"
file_pattern = "*.sgy"

# 获取所有匹配的文件名
file_list = glob.glob(os.path.join(folder_path, file_pattern))

# 定义张量的维度
num_files = len(file_list)
num_samples = 15000
num_channels = 42
num_components = 1 # 新增维度 表示数据的分量 

# 创建一个空张量来保存所有数据
data = np.zeros((num_files, num_samples,num_channels))

count = 0
# 遍历所有文件并将它们保存到张量中
for i, file_name in enumerate(file_list):
    with segyio.open(file_name, "r", ignore_geometry=True) as segyfile:
        trace_data = segyfile.trace.raw[:]
        print(trace_data.shape)

        #  保存每一个台站的张量  42 x 15000的形状
        data[i,:,:] = np.reshape(trace_data, (num_samples,num_channels))

        #  扩展第二个维度为1
        # data[i] = np.expand_dims(data[i],axis=0)
        # data[i,:,:,:] = data[i,:,:].reshape()
        # print("执行力")
        count += 1
        print(count)

# 打印张量的形状
print(data.shape)
print(count)

data = data.reshape(680,42,15000,1)


(42, 15000)
1
(42, 15000)
2
(42, 15000)
3
(42, 15000)
4
(42, 15000)
5
(42, 15000)
6
(42, 15000)
7
(42, 15000)
8
(42, 15000)
9
(42, 15000)
10
(42, 15000)
11
(42, 15000)
12
(42, 15000)
13
(42, 15000)
14
(42, 15000)
15
(42, 15000)
16
(42, 15000)
17
(42, 15000)
18
(42, 15000)
19
(42, 15000)
20
(42, 15000)
21
(42, 15000)
22
(42, 15000)
23
(42, 15000)
24
(42, 15000)
25
(42, 15000)
26
(42, 15000)
27
(42, 15000)
28
(42, 15000)
29
(42, 15000)
30
(42, 15000)
31
(42, 15000)
32
(42, 15000)
33
(42, 15000)
34
(42, 15000)
35
(42, 15000)
36
(42, 15000)
37
(42, 15000)
38
(42, 15000)
39
(42, 15000)
40
(42, 15000)
41
(42, 15000)
42
(42, 15000)
43
(42, 15000)
44
(42, 15000)
45
(42, 15000)
46
(42, 15000)
47
(42, 15000)
48
(42, 15000)
49
(42, 15000)
50
(42, 15000)
51
(42, 15000)
52
(42, 15000)
53
(42, 15000)
54
(42, 15000)
55
(42, 15000)
56
(42, 15000)
57
(42, 15000)
58
(42, 15000)
59
(42, 15000)
60
(42, 15000)
61
(42, 15000)
62
(42, 15000)
63
(42, 15000)
64
(42, 15000)
65
(42, 15000)
66
(42, 15000)
67
(42,

In [63]:
#  处理数据集
# data = data.reshape()
print(data.shape)
# print(data[0])

(680, 42, 15000, 1)


In [64]:
#  使用dataLoader进行加载
train_data ,test_data = train_test_split(data,test_size=0.2,random_state=42)


#  分割成训练集和测试集
train_data = torch.from_numpy(train_data).float()
test_data = torch.from_numpy(test_data).float()

print("训练及大小：{}".format(train_data.shape))
print("测试集大小:{}".format(test_data.shape))


# 使用dataloader加载数据集

train_data_loader = DataLoader(train_data,batch_size=16)
test_data_loader = DataLoader(test_data,batch_size=16)


训练及大小：torch.Size([544, 42, 15000, 1])
测试集大小:torch.Size([136, 42, 15000, 1])


In [65]:

# 处理数据集
# 如果数据是 594 x 1500 将数据处理成 594  x 1 x 1500 按照batch_size 分批

import segyio
import numpy as np
import torch
from sklearn.model_selection import train_test_split
from torch.utils.data import DataLoader

# 读取SEGY文件
segy_file = '../SegyData/20220101_115300.sgy'

with segyio.open(segy_file, 'rb') as segy:
    # 获取地震数据的一维数组
    seismic_data = segy.trace.raw[:]

    # 获取地震道数量和每个地震道的时间采样点数量
    n_traces = segy.tracecount
    n_samples = segy.samples.size

    print("地震道数量:{}".format(n_traces))
    print("采样点数量：{}".format(n_samples))

    # 将一维数组重新形状为二维数组，形状为 (n_traces, n_samples)
    seismic_data = seismic_data.reshape((n_traces, n_samples))

# 随机划分为训练集和测试集
train_data_s, test_data_s = train_test_split(seismic_data, test_size=0.2, random_state=42)
#  分割成 训练集和测试集
# 将数据转换为PyTorch张量  
train_data_s = torch.from_numpy(train_data_s).float()
test_data_s = torch.from_numpy(test_data_s).float()

print(f"训练集大小: {train_data_s.shape}")
print(f"测试集大小: {test_data_s.shape}")

# reshape 42 x 15000 x 1
train_data_s = train_data_s.reshape(33,1,15000)
test_data_s = test_data_s.reshape(9,1,15000)
print(train_data_s.shape)

#  数据预处理  归一化 0，1 之间

# 上面的数据是 42 x 15000 然后reshape 42 x 1 x 15000

#  dataloader加载数据集
train_dataloader_s = DataLoader(train_data_s,batch_size = 3)
test_dataloader_s = DataLoader(test_data_s,batch_size = 3)

地震道数量:42
采样点数量：15000
训练集大小: torch.Size([33, 15000])
测试集大小: torch.Size([9, 15000])
torch.Size([33, 1, 15000])


In [66]:
import torch.nn as nn
# 定义自编码器网络
class AutoEncoder(nn.Module):
    def __init__(self):
        super(AutoEncoder,self).__init__()

        #  编码器 最后输出 1000 x 1 的结果
        self.encoder = nn.Sequential(
            nn.Conv1d(1, 16, kernel_size=3, padding=1) ,# 1D convolutional layer with 16 filters

            #  15000 x 16 -> 5000 x 16
            nn.MaxPool1d(3) ,# Max pooling layer with compression_ratio stride
       
            # 5000 x 16  -> 5000 x 32
            nn.Conv1d(16, 32, kernel_size=3, padding=1) ,# 1D convolutional layer with 64 filters

            #  5000 x 32 -> 5000 x 64
            nn.Conv1d(32, 64, kernel_size=3, padding=1) ,# 1D convolutional layer with 64 filters

            # 5000 x 64-> 1000 x 64
            nn.MaxPool1d(5),

            # 1000 x 64 -> 1000 x 128
            nn.Conv1d(64,128,kernel_size=3,padding=1),

            # 最后一层卷积 1000 x 128 -> 1000 x 1
            nn.Conv1d(128,1,kernel_size=3,padding=1)
        )

        # 解码器网络
        self.decoder = nn.Sequential(
            # 100 x 1 -> 100 x 128
            nn.ConvTranspose1d(1,128,kernel_size=3,padding=1),

            # 100 x 128 -> 500 x 128
            nn.Upsample(scale_factor=5),

            #  500 x 128 -> 500 x 64
            nn.ConvTranspose1d(128,64,kernel_size=3,padding=1),

            # 500 x 64 -> 500 x 32
            nn.ConvTranspose1d(64,32,kernel_size=3,padding=1),
            
            # 500 x 32 -> 1500 x 32
            nn.Upsample(scale_factor=3),

            #  1500 x 32 -> 1500 x 16
            nn.ConvTranspose1d(32,16,kernel_size=3,padding=1),

            #  1500 x 16 -> 1500 x 1
            nn.ConvTranspose1d(16,1,kernel_size=3,padding=1)
        )
    
    def forward(self,x):
        x = self.encoder(x)
        x = self.decoder(x)
        return x
    

In [68]:
autoCoder = AutoEncoder()
writer = SummaryWriter("../encoder_train")
epoch = 200
for i in range(epoch):
    print("-------第{}轮训练开始".format(i + 1))

    #  训练步骤开始
    autoCoder.train()

    for data in train_dataloader_s:
        inputs = data
        #  将梯度清零
       
        # 前向传播
        outputs = autoCoder(inputs)
        # 计算损失  对比原始输入和 自编码器输出的结果 看看压缩效果
        loss = loss_fn(outputs,inputs)
        # 反向传播 计算梯度
        optimizer.zero_grad()
        loss.backward()
        # 优化
        optimizer.step()

        # 统计训练次数
        total_train_step = total_train_step + 1

        if total_train_step % 10 == 0:
            # 绘制训练损失
            writer.add_scalar("train_A_loss",loss.item(),total_train_step)
            print("训练次数:{},Loss{}".format(total_train_step,loss.item()))


    # #  测试步骤开始
    # autoCoder.eval()
    # total_test_loss = 0
    # total_accuracy = 0
    # with torch.no_grad():

    #     #  取出测试数据集的数据
    #     for data in test_dataloader_s:

    #         inputs = data

    #         outputs = autoCoder(inputs)

    #         # #  取出数据
    #         # imgs = data
    #         # # imgs = imgs.to(device)
    #         # # targets = targets.to(device)

    #         # outputs = tudui(imgs)

    #         loss = loss_fn(outputs,inputs) # 计算损失
    #         optimizer.zero_grad()

    #         loss.backward()

    #         optimizer.step()

    #         #  统计测试集上面的总损失
    #         total_test_loss = total_test_loss + loss.item()
    #         accuracy = (outputs.argmax(1) == inputs).sum()
    #         total_accuracy = total_accuracy + accuracy


    # print("整体测试集上面的Loss:{}".format(total_test_loss))
    # print("整体测试及上面的正确率:{}".format(total_accuracy / test_data_size))
    # # writer.add_scalar("test_loss",loss.item(),total_test_step)
    # # writer.add_scalar("test_accuracy",total_accuracy / test_data_size,total_test_step)
    # total_test_step = total_test_step + 1


-------第1轮训练开始
训练次数:2210,Loss0.2720562815666199
-------第2轮训练开始
训练次数:2220,Loss0.2605443000793457
-------第3轮训练开始
训练次数:2230,Loss0.26640787720680237
-------第4轮训练开始
训练次数:2240,Loss0.2464098483324051
-------第5轮训练开始
训练次数:2250,Loss0.26119691133499146
-------第6轮训练开始
训练次数:2260,Loss0.27709704637527466
-------第7轮训练开始
训练次数:2270,Loss0.27440446615219116
-------第8轮训练开始
训练次数:2280,Loss0.2797960042953491
-------第9轮训练开始
训练次数:2290,Loss0.27016228437423706
-------第10轮训练开始
训练次数:2300,Loss0.2701079547405243
训练次数:2310,Loss0.265067458152771
-------第11轮训练开始
训练次数:2320,Loss0.2720562815666199
-------第12轮训练开始
训练次数:2330,Loss0.2605443000793457
-------第13轮训练开始
训练次数:2340,Loss0.26640787720680237
-------第14轮训练开始
训练次数:2350,Loss0.2464098483324051
-------第15轮训练开始
训练次数:2360,Loss0.26119691133499146
-------第16轮训练开始
训练次数:2370,Loss0.27709704637527466
-------第17轮训练开始
训练次数:2380,Loss0.27440446615219116
-------第18轮训练开始
训练次数:2390,Loss0.2797960042953491
-------第19轮训练开始
训练次数:2400,Loss0.27016228437423706
-------第20轮训练开始
训练次数:2410,Loss0.2701