In [1]:
import numpy as np
from scipy.fftpack import fft
import matplotlib.pyplot as plt
from matplotlib.pylab import mpl
import os
import tqdm
import math
import time


# mpl.rcParams['font.sans-serif'] = ['SimHei']  #显示中文
mpl.rcParams['axes.unicode_minus'] = False  #显示负号
%matplotlib qt5
plt.rcParams['xtick.direction'] = 'in'
plt.rcParams['ytick.direction'] = 'in'

In [2]:
path = r'E:\data\CM-4M-o18-2020.10.17-1-60'
path = path.replace('\\','/')
os.chdir(path)
file_list = os.listdir(path)
os.getcwd()

data_path = 'CM-4M-o18-2020.10.17-1-60.txt'
amplitude = []
energy = []
duration = []
rise_time = []
rms = []
with open(data_path, "r") as f:
    f.readline()
    for _ in range(84641):
        data = f.readline().split(',')
        amplitude.append(float(data[5].strip()))
        rise_time.append(float(data[7].strip()))
        duration.append(float(data[8].strip()))
        energy.append(float(data[9].strip()))

In [None]:
# sorted(amplitude,reverse=True)
np.argsort(amplitude)

In [None]:
print('amplitude: ', min(amplitude), np.argmin(amplitude)
      , max(amplitude), np.argmax(amplitude))
print('duration: ', min(duration), np.argmin(duration)
      , max(duration), np.argmax(duration))
print('rise_time: ', min(rise_time), np.argmin(rise_time)
      , max(rise_time), np.argmax(rise_time))
print('energy: ', min(energy), np.argmin(energy)
      , max(energy), np.argmax(energy))

In [8]:
fig, ax = plt.subplots(2, 1)
# Amplitude - Energy
ax[0].scatter(amplitude, energy, marker='o',c='',edgecolors='g', label = 'Amplitude-Energy')
ax[0].set_xlabel('Amplitude /μV')
ax[0].set_ylabel('Energy /aJ')
ax[0].set_title("Amplitude - Energy")
ax[0].set_xlim(10.0, 41.0)
ax[0].set_xticks(np.linspace(10.0, 41.0, 7))
ax[0].set_ylim(0.70, 4.5)
ax[0].set_yticks(np.linspace(0.70, 4.5, 6))
ax[0].legend(loc='upper left')
ax[0].grid(True,linestyle = "--",color = 'gray' ,linewidth = '0.5',axis='both')

# Duration - Amplitude
ax[1].scatter(duration, amplitude, marker='o',c='',edgecolors='purple', label = 'Duration-Amplitude')
ax[1].set_xlabel('Duration /s')
ax[1].set_ylabel('Amplitude /μV')
ax[1].set_title("Duration - Amplitude")
ax[1].set_xlim(0.00079, 0.00103)
ax[1].set_xticks(np.linspace(0.00079, 0.00103, 6))
ax[1].set_ylim(10.0, 41.0)
ax[1].set_yticks(np.linspace(10.0, 41.0, 7))
ax[1].legend(loc='upper left')
ax[1].grid(True,linestyle = "--",color = 'gray' ,linewidth = '0.5',axis='both')

plt.tight_layout()

  ax[0].scatter(amplitude, energy, marker='o',c='',edgecolors='g', label = 'Amplitude-Energy')
  ax[1].scatter(duration, amplitude, marker='o',c='',edgecolors='purple', label = 'Duration-Amplitude')


In [5]:
file = 'CM-4M-o18-2020.10.17-1-60_1_37198_663376892.txt'
# CM-4M-o18-2020.10.17-1-60_1_45583_819030862
process = Preprocessing(25, 5, 60, file)
valid_time, valid_data = [], []
valid_time, valid_data, features, valid_wave_idx = process.main(file)
valid_time, valid_data, features, valid_wave_idx

(array([663.3768922, 663.3768924, 663.3768926, ..., 663.3779156,
        663.3779158, 663.377916 ]),
 array([-6.408690e-06, -5.340580e-06, -4.272460e-06, ...,  1.419067e-05,
         2.380371e-05,  2.517700e-05]),
 ['37198, 663.3768922, 1, 3.55655882, 25.0, 187.53052000, 59.4, 0.0008650, 0.0010238, 126.79085217, 35.19138811, 205'],
 array([   0,    1,    2, ..., 5117, 5118, 5119], dtype=int64))

In [7]:
Ts = 0.0000002
Fs = 1 / Ts
N = 5120
end = Ts * N
time = np.arange(0, end, Ts)
frq = (np.arange(N) / N) * Fs
fft_y = fft(valid_data)

abs_y = np.abs(fft_y)
normalization = abs_y / N
half_frq = frq[range(int(N / 2))]
normalization_half = normalization[range(int(N / 2))]

titles = ['Original Waveform', 'Bilateral amplitude spectrum (normalized)', 'Unilateral amplitude spectrum (normalized)']
colors = ['purple', 'green', 'blue']
x_label = ['Time (s)', 'Freq (Hz)', 'Freq (Hz)']
y_label = ['Amplitude (μV)', '|Y(freq)|', '|Y(freq)|']
xs = [time, frq, half_frq]
ys = [valid_data, normalization, normalization_half]

for i, [x, y, title, color, xlabel, ylabel] in enumerate(zip(xs, ys, titles, colors, x_label, y_label)):
    plt.subplot(311 + i)
    plt.plot(x, y, color)
    plt.xlabel(xlabel)
    plt.ylabel(ylabel)
    plt.title(title, color=color)
plt.tight_layout()

In [4]:
class Preprocessing:
    def __init__(self, thr_dB, thr_noise_ratio, magnification_dB, data_path):
        self.thr_dB = thr_dB
        self.thr_noise_ratio = thr_noise_ratio
        self.magnification_dB = magnification_dB
        self.thr_V = pow(10, self.thr_dB / 20) / (self.thr_noise_ratio * pow(10, 6))
        self.counts = 0
        self.duration = 0
        self.amplitude = 0
        self.rise_time = 0
        self.energy = 0
        self.RMS = 0
        self.hit_num = 0
        self.time = 0
        self.channel_num = 0
        self.sample_interval = 0
        self.magnification = pow(10, self.magnification_dB / 20)
        self.data_path = data_path

    def skip_n_column(self, file, n=3):
        for _ in range(n):
            file.readline()

    def cal_features(self, dataset, time_label, valid_wave_idx):
        start = time_label[valid_wave_idx[0]]
        end = time_label[valid_wave_idx[-1]]
        self.duration = end - start
        max_idx = np.argmax(abs(dataset))
        self.amplitude = abs(dataset[max_idx])
        self.rise_time = time_label[max_idx] - start
        valid_data = dataset[valid_wave_idx[0]:(valid_wave_idx[-1] + 1)]
        self.energy = np.sum(np.multiply(pow(valid_data, 2), self.sample_interval))
        self.RMS = math.sqrt(self.energy / self.duration)

        return valid_data

    def cal_counts(self, valid_data):
        self.counts = 0
        N = len(valid_data)
        for idx in range(1, N):
            if valid_data[idx - 1] <= self.thr_V <= valid_data[idx]:
                self.counts += 1

    def main(self, file_name, data=[]):
        with open(file_name, "r") as f:
            self.skip_n_column(f)
            self.sample_interval = float(f.readline()[29:])
            self.skip_n_column(f)
            points_num = int(f.readline()[36:])
            self.channel_num = int(f.readline().strip()[16:])
            self.hit_num = int(f.readline()[12:])
            self.time = float(f.readline()[14:])
            dataset = np.array([float(i.strip("\n")) for i in f.readlines()[1:]]) / self.magnification
            time_label = np.linspace(self.time, self.time + self.sample_interval * (points_num - 1), points_num)

            # calculate the duration, amplitude, rise_time, energy and counts
            valid_wave_idx = np.where(abs(dataset) >= self.thr_V)[0]
            valid_time = time_label[valid_wave_idx[0]:(valid_wave_idx[-1] + 1)]
            # print(dataset[0], dataset[-1], len(dataset))
            # print(valid_wave_idx)

            if valid_wave_idx.shape[0]:
                valid_data = self.cal_features(dataset, time_label, valid_wave_idx)
                self.cal_counts(valid_data)
                if self.counts >= 2:
                    data = [
                        '{}, {:.7f}, {}, {:.8f}, {:.1f}, {:.8f}, {:.1f}, {:.7f}, {:.7f}, {:.8f}, {:.8f}, {}'.format(
                            self.hit_num, self.time, self.channel_num, self.thr_V * pow(10, 6), self.thr_dB,
                                                                       self.amplitude * pow(10, 6),
                                                                       20 * np.log10(
                                                                           self.thr_noise_ratio * self.amplitude * pow(
                                                                               10, 6)), self.rise_time,
                            self.duration, self.energy * pow(10, 14), self.RMS * pow(10, 6), self.counts)]
        # ID, Time(s), Chan, Thr(μV)P, Thr(dB), Amp(μV), Amp(dB), RiseT(s), Dur(s), Eny(aJ), RMS(μV), Counts

        return valid_time, valid_data, data, valid_wave_idx