In [2]:
# -*- coding: UTF-8 -*-
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import math
import time
from tqdm import tqdm
from scipy.fftpack import fft
from matplotlib.pylab import mpl
import csv
import array
import sqlite3
import pprint
from matplotlib.ticker import FuncFormatter
from matplotlib import ticker, cm
import matplotlib
from collections import Counter
from mpl_toolkits.mplot3d import Axes3D
from sklearn.base import BaseEstimator, ClusterMixin
from sklearn.metrics.pairwise import pairwise_kernels
from sklearn.utils import check_random_state
from sklearn.decomposition import PCA, FastICA
from sklearn.cluster import KMeans
from scipy.optimize import curve_fit
from scipy.interpolate import make_interp_spline
from scipy.signal import savgol_filter
import warnings
import pywt


%matplotlib qt
warnings.filterwarnings("ignore")
plt.rcParams['xtick.direction'] = 'in'
plt.rcParams['ytick.direction'] = 'in'
mpl.rcParams['axes.unicode_minus'] = False  #显示负号

In [288]:
path = r'E:\Data\vallen'
# E:\data\vallen
# /home/Yuanbincheng/data
fold = '6016_CR_1'
os.chdir('/'.join([path, fold]))
path_pri = fold + '.pridb'
path_tra = fold + '.tradb'
features_path = fold + '.txt'
# 2020.11.10-PM-self
# 6016_CR_1
# 316L-1.5-z3-AE-3 sensor-20200530
# Ni-tension test-electrolysis-1-0.01-AE-20201031
# Ni-tension test-pure-1-0.01-AE-20201030
# 2020.11.10-PM-self

### Convert

In [289]:
def sqlite_read(path):
    """
    python读取sqlite数据库文件
    """
    mydb = sqlite3.connect(path)  # 链接数据库
    mydb.text_factory = lambda x: str(x, 'gbk', 'ignore')
    cur = mydb.cursor()  # 创建游标cur来执行SQL语句

    # 获取表名
    cur.execute("SELECT name FROM sqlite_master WHERE type='table'")
    Tables = cur.fetchall()  # Tables 为元组列表

    # 获取表结构的所有信息
    if path[-5:] == 'pridb':
        cur.execute("SELECT * FROM {}".format(Tables[3][0]))
        res = cur.fetchall()[-2][1]
    elif path[-5:] == 'tradb':
        cur.execute("SELECT * FROM {}".format(Tables[1][0]))
        res = cur.fetchall()[-3][1]
    return int(res)

def read_vallen_data(path_pri, path_tra, min_cnts=2):
    conn_tra = sqlite3.connect(path_tra)
    conn_pri = sqlite3.connect(path_pri)
    result_tra = conn_tra.execute("Select Time, Chan, Thr, SampleRate, Samples, TR_mV, Data, TRAI FROM view_tr_data")
    result_pri = conn_pri.execute("Select SetID, Time, Chan, Thr, Amp, RiseT, Dur, Eny, RMS, Counts, TRAI FROM view_ae_data")
    data_tra, data_pri, chan_1, chan_2, chan_3, chan_4 = [], [], [], [], [], []
    N_pri = sqlite_read(path_pri)
    N_tra = sqlite_read(path_tra)
    for _ in tqdm(range(N_tra), ncols=80):
        i = result_tra.fetchone()
        data_tra.append(i)
    for _ in tqdm(range(N_pri), ncols=80):
        i = result_pri.fetchone()
        if i[-2] is not None and i[-2] > min_cnts and i[-1] > 0:
            data_pri.append(i)
            if i[2] == 1:
                chan_1.append(i)
            elif i[2] == 2:
                chan_2.append(i)
            elif i[2] == 3:
                chan_3.append(i)
            elif i[2] == 4:
                chan_4.append(i)
    data_tra = sorted(data_tra, key=lambda x: x[-1])
    data_pri = np.array(data_pri)
    chan_1 = np.array(chan_1)
    chan_2 = np.array(chan_2)
    chan_3 = np.array(chan_3)
    chan_4 = np.array(chan_4)
    return data_tra, data_pri, chan_1, chan_2, chan_3, chan_4

In [293]:
data_tra, data_pri, chan_1, chan_2, chan_3, chan_4 = read_vallen_data(path_pri, path_tra)

100%|███████████████████████████████| 141033/141033 [00:01<00:00, 111005.76it/s]
100%|███████████████████████████████| 357771/357771 [00:01<00:00, 215150.20it/s]


### Decomposition-FastICA

In [290]:
def ICA(dim, method, *args):
    n = len(args)
    x = np.zeros([args[0].shape[0], n])
    for i in range(n):
        x[:, i] = args[i]

    x_mean = np.mean(x, axis=0)
    x_std = np.std(x, axis=0)
    x_nor = (x - x_mean) / x_std
    if method == 'pca':
        pca = PCA(n_components=dim)
        S_ = pca.fit_transform(x_nor)
        A_ = pca.components_  
    elif  method == 'ica':
        ica = FastICA(n_components=dim)
        S_ = ica.fit_transform(x_nor)  # 重构信号
        A_ = ica.mixing_  # 获得估计混合后的矩阵
    return S_, A_

### Clustering

#### Cluster-Kmeans

In [None]:
kmeans = KMeans(n_clusters=2, random_state=69)
kmeans.fit(S_)
y_pre = kmeans.labels_
cls_KKM = []
for i in range(2):
    cls_KKM .append(pred == i)

#### Cluster-Kernel Kmeans

In [291]:
class KernelKMeans(BaseEstimator, ClusterMixin):
    """
    Kernel K-means
    
    Reference
    ---------
    Kernel k-means, Spectral Clustering and Normalized Cuts.
    Inderjit S. Dhillon, Yuqiang Guan, Brian Kulis.
    KDD 2004.
    """

    def __init__(self, n_clusters=3, max_iter=50, tol=1e-3, random_state=None,
                 kernel="rbf", gamma=None, degree=3, coef0=1,
                 kernel_params=None, verbose=0):
        self.n_clusters = n_clusters
        self.max_iter = max_iter
        self.tol = tol
        self.random_state = random_state
        self.kernel = kernel
        self.gamma = gamma
        self.degree = degree
        self.coef0 = coef0
        self.kernel_params = kernel_params
        self.verbose = verbose
        
    @property
    def _pairwise(self):
        return self.kernel == "precomputed"

    def _get_kernel(self, X, Y=None):
        if callable(self.kernel):
            params = self.kernel_params or {}
        else:
            params = {"gamma": self.gamma,
                      "degree": self.degree,
                      "coef0": self.coef0}
        return pairwise_kernels(X, Y, metric=self.kernel,
                                filter_params=True, **params)

    def fit(self, X, y=None, sample_weight=None):
        n_samples = X.shape[0]

        K = self._get_kernel(X)

        sw = sample_weight if sample_weight else np.ones(n_samples)
        self.sample_weight_ = sw

        rs = check_random_state(self.random_state)
        self.labels_ = rs.randint(self.n_clusters, size=n_samples)

        dist = np.zeros((n_samples, self.n_clusters))
        self.within_distances_ = np.zeros(self.n_clusters)

        for it in range(self.max_iter):
            dist.fill(0)
            self._compute_dist(K, dist, self.within_distances_,
                               update_within=True)
            labels_old = self.labels_
            self.labels_ = dist.argmin(axis=1)

            # Compute the number of samples whose cluster did not change 
            # since last iteration.
            n_same = np.sum((self.labels_ - labels_old) == 0)
            if 1 - float(n_same) / n_samples < self.tol:
                if self.verbose:
                    print("Converged at iteration", it + 1)
                break

        self.X_fit_ = X

        return self

    def _compute_dist(self, K, dist, within_distances, update_within):
        """Compute a n_samples x n_clusters distance matrix using the 
        kernel trick."""
        sw = self.sample_weight_

        for j in range(self.n_clusters):
            mask = self.labels_ == j

            if np.sum(mask) == 0:
                raise ValueError("Empty cluster found, try smaller n_cluster.")

            denom = sw[mask].sum()
            denomsq = denom * denom

            if update_within:
                KK = K[mask][:, mask]  # K[mask, mask] does not work.
                dist_j = np.sum(np.outer(sw[mask], sw[mask]) * KK / denomsq)
                within_distances[j] = dist_j
                dist[:, j] += dist_j
            else:
                dist[:, j] += within_distances[j]

            dist[:, j] -= 2 * np.sum(sw[mask] * K[:, mask], axis=1) / denom

    def predict(self, X):
        K = self._get_kernel(X, self.X_fit_)
        n_samples = X.shape[0]
        dist = np.zeros((n_samples, self.n_clusters))
        self._compute_dist(K, dist, self.within_distances_,
                           update_within=False)
        return dist.argmin(axis=1)

In [None]:
km = KernelKMeans(n_clusters=8, max_iter=100, random_state=100, verbose=1, kernel="rbf")
# print(km.fit_predict(x)[:10])
# print(km.predict(x[:10]))

### Plot format

In [292]:
color_1 = [255/255, 0/255, 102/255] # red
color_2 = [0/255, 136/255, 204/255] # blue

def plot_norm(ax, xlabel=None, ylabel=None, zlabel=None, title=None, x_lim=[], y_lim=[], z_lim=[], legend=True, grid=False, 
              legend_loc='upper left', font_color='black', legendsize=11, labelsize=14, titlesize=15, ticksize=13, linewidth=2):
    ax.spines['bottom'].set_linewidth(linewidth)
    ax.spines['left'].set_linewidth(linewidth)
    ax.spines['right'].set_linewidth(linewidth)
    ax.spines['top'].set_linewidth(linewidth)

    # 设置坐标刻度值的大小以及刻度值的字体 Arial
    ax.tick_params(which='both', width=linewidth, labelsize=ticksize, colors=font_color)
    labels = ax.get_xticklabels() + ax.get_yticklabels()
    [label.set_fontname('Arial') for label in labels]

    font_legend = {'family': 'Arial', 'weight': 'normal', 'size': legendsize}
    font_label = {'family': 'Arial', 'weight': 'bold', 'size': labelsize, 'color':font_color}
    font_title = {'family': 'Arial', 'weight': 'bold', 'size': titlesize, 'color':font_color}

    if x_lim:
        ax.set_xlim(x_lim[0], x_lim[1])
    if y_lim:
        ax.set_ylim(y_lim[0], y_lim[1])
    if z_lim:
        ax.set_zlim(z_lim[0], z_lim[1])
    if legend:
        plt.legend(loc=legend_loc, prop=font_legend)
    if grid:
        ax.grid(ls='-.')
    if xlabel:
        ax.set_xlabel(xlabel, font_label)
    if ylabel:
        ax.set_ylabel(ylabel, font_label)
    if zlabel:
        ax.set_zlabel(zlabel, font_label)
    if title:
        ax.set_title(title, font_title)
    plt.tight_layout()

### Features extract

In [298]:
data_pri.shape, chan_1.shape, chan_2.shape, chan_3.shape, chan_4.shape

((591, 11), (0,), (0,), (591, 11), (0,))

In [12]:
min(Eny), max(Eny), min(Dur), max(Dur), min(Amp), max(Amp)

(0.204163881458538,
 9907.101731723107,
 5.5,
 6543.700000000001,
 16.203593179194673,
 3460.007263530703)

In [299]:
# SetID, Time, Chan, Thr, Amp, RiseT, Dur, Eny, RMS, Counts, TRAI
abnormal_idx = np.argsort(Eny)[::-1][0]
chan = np.delete(chan_3, abnormal_idx, axis=0)
# chan = chan_3
Time = chan[:, 1]
Amp = chan[:, 4]
RiseT = chan[:, 5]
Dur = chan[:, 6]
Eny = chan[:, 7]
RMS = chan[:, 8]
Counts = chan[:, 9]

### Energy-Time Curve

In [344]:
os.getcwd()

'E:\\Data\\vallen\\Ni-tension test-electrolysis-1-0.01-AE-20201031'

In [362]:
df_1 = pd.DataFrame({'time_pop1':Time[cls_KKM[0]], 'energy_pop1':Eny[cls_KKM[0]]})
df_2 = pd.DataFrame({'time_pop2':Time[cls_KKM[1]], 'energy_pop2':Eny[cls_KKM[1]]})
df_3 = pd.DataFrame({'time':time, 'displace':displace, 'load':smooth_load, 'strain':strain, 'stress':smooth_stress})
df_1.to_csv('E-T_electrolysis_pop1.csv')
df_2.to_csv('E-T_electrolysis_pop2.csv')
df_3.to_csv('E-T_electrolysis_RawData.csv')

#### Convert to GIF

In [436]:
# Convert E-A-D gif
azim = -45
elev = 15
plt.ion()
for t in tqdm(range(50, 30000, 50)):
    plt.clf()
    fig = plt.gcf()
    ax = fig.gca(projection='3d')
    ax.view_init(elev, azim)
    
    Time_1_idx = np.where(Time[cls_KKM[0]] <= t)[0]
    Time_2_idx = np.where(Time[cls_KKM[1]] <= t)[0]

    if list(Time_1_idx):
        ax.scatter3D(np.log10(Amp)[cls_KKM[0]][:Time_1_idx[-1]+1], np.log10(Eny)[cls_KKM[0]][:Time_1_idx[-1]+1], np.log10(Dur)[cls_KKM[0]][:Time_1_idx[-1]+1], s=15, color=color_1)
    if list(Time_2_idx):
        ax.scatter3D(np.log10(Amp)[cls_KKM[1]][:Time_2_idx[-1]+1], np.log10(Eny)[cls_KKM[1]][:Time_2_idx[-1]+1], np.log10(Dur)[cls_KKM[1]][:Time_2_idx[-1]+1], s=15, color=color_2)
    plot_norm(ax, xlabelz[0], xlabelz[2], xlabelz[1], x_lim=[1, 4], y_lim=[-1, 4.5], z_lim=[0.5, 4], legend=False)
    
#     plt.pause(0.001)
    try:
        plt.savefig('./gif/E-A-D_3dim/' + str(t))
    except FileNotFoundError:
        os.mkdir('./gif/E-A-D_3dim/')
        plt.savefig('./gif/E-A-D_3dim/' + str(t))
    plt.ioff()

100%|████████████████████████████████████████████████████████████████████████████████| 599/599 [02:09<00:00,  4.63it/s]


In [None]:
# Convert E-T gif
plt.ion()
for t in tqdm(range(100, 30000, 50)):
    plt.clf()
    fig = plt.gcf()
    ax = fig.gca(projection='3d')
    
    Time_1_idx = np.where(Time[cls_KKM[0]] <= t)[0]
    Time_2_idx = np.where(Time[cls_KKM[1]] <= t)[0]
    time_idx = np.where(time <= t)[0]
    
    fig = plt.figure(figsize=[6, 3.9])
    ax = plt.subplot()
    if list(Time_1_idx):
        ax.bar(Time[cls_KKM[0]][:Time_1_idx[-1]+1], Eny[cls_KKM[0]][:Time_1_idx[-1]+1], color=color_1, width=55, log=True, label='Population 1')
    if list(Time_2_idx):
        ax.bar(Time[cls_KKM[1]][:Time_2_idx[-1]+1], Eny[cls_KKM[1]][:Time_2_idx[-1]+1], color=color_2, width=55, log=True, label='Population 2')
    plot_norm(ax, xlabelz[0], xlabelz[2], x_lim=[0, 29000], y_lim=[0, 15000], legend_loc='upper right')

    ax2 = ax.twinx()
    ax2.plot(time[:time_idx[-1]], res[:time_idx[-1]], 'r', lw=3)
    plot_norm(ax2, 'Time (s)', 'Stress (MPa)', x_lim=[0, 29000], y_lim=[0, 700], legend=False, font_color='r')

    ax3 = ax.twiny()
    for key in ['right', 'top']:
        ax3.spines[key].set_color('r')
    plot_norm(ax3, 'Strain (%)', x_lim=[0, strain_max], y_lim=[0, 15000], legend=False, font_color='r')
    
#     plt.pause(0.001)
    try:
        plt.savefig('./gif/E-T/' + str(t))
    except FileNotFoundError:
        os.mkdir('./gif/E-T/')
        plt.savefig('./gif/E-T/' + str(t))
    plt.ioff()

#### E-T curve

In [338]:
def load_stress(path_curve):
    data = pd.read_csv(path_curve, encoding='gbk').drop(index=[0]).astype('float32')
    data_drop = data.drop_duplicates(['拉伸应变 (应变 1)'])
    time = np.array(data_drop.iloc[:, 0])
    displace = np.array(data_drop.iloc[:, 1])
    load = np.array(data_drop.iloc[:, 2])
    strain = np.array(data_drop.iloc[:, 3])
    stress = np.array(data_drop.iloc[:, 4])
    sort_idx = np.argsort(strain)
    strain = strain[sort_idx]
    stress = stress[sort_idx]
    return time, displace, load, strain, stress

def smooth_curve(time, stress, window_length=99, polyorder=1, epoch=200, curoff=[2500, 25000]):
    y_smooth = savgol_filter(stress, window_length, polyorder, mode= 'nearest')
    for i in range(epoch):
        if i == 5:
            front = y_smooth
        y_smooth = savgol_filter(y_smooth, window_length, polyorder, mode= 'nearest')

    front_idx = np.where(time < curoff[0])[0][-1]
    rest_idx = np.where(time > curoff[1])[0][0]
    res = np.concatenate((stress[:40], front[40:front_idx], y_smooth[front_idx:rest_idx], stress[rest_idx:]))
    return res

In [352]:
path_curve = r'E:\data\vallen\Ni-tension test-electrolysis-1-0.01-AE-20201031\Ni-tension test-electrolysis-1-0.01-20201031.is_tens_RawData\Specimen_RawData_1.csv'
time, displace, load, strain, stress = load_stress(path_curve)
smooth_stress = smooth_curve(time, stress)
smooth_load = smooth_curve(time, load)
strain_max = strain[-1] * 29000 / Time[-1]

# fig = plt.figure(figsize=[6, 3.9])
# ax = plt.subplot()
# # ax.plot(strain, stress, 'black')
# # ax.plot(strain, smooth_stress, 'y')
# ax.plot(displace, smooth_load, 'g')

[<matplotlib.lines.Line2D at 0x1f884ef1d00>]

In [343]:
t = 29900
Time_1_idx = np.where(Time[cls_KKM[0]] <= t)[0]
Time_2_idx = np.where(Time[cls_KKM[1]] <= t)[0]
time_idx = np.where(time <= t)[0]

fig = plt.figure(figsize=[6, 3.9])
ax = plt.subplot()
# ax.bar(Time[cls_KKM[0]][:Time_1_idx[-1]+1], Eny[cls_KKM[0]][:Time_1_idx[-1]+1], color=color_1, width=55, log=True, label='Population 1')
# ax.bar(Time[cls_KKM[1]][:Time_2_idx[-1]+1], Eny[cls_KKM[1]][:Time_2_idx[-1]+1], color=color_2, width=55, log=True, label='Population 2')
ax.semilogy(Time[cls_KKM[0]][:Time_1_idx[-1]+1], Eny[cls_KKM[0]][:Time_1_idx[-1]+1], '.', Marker='.', color=color_1, label='Population 1')
ax.semilogy(Time[cls_KKM[1]][:Time_2_idx[-1]+1], Eny[cls_KKM[1]][:Time_2_idx[-1]+1], '.', Marker='.', color=color_2, label='Population 2')
plot_norm(ax, 'Time (s)', 'Energy (aJ)', x_lim=[0, 29000], y_lim=[0, 15000], legend_loc='upper right')

ax2 = ax.twinx()
ax2.plot(time[:time_idx[-1]], smooth_stress[:time_idx[-1]], 'r', lw=3)
plot_norm(ax2, 'Time (s)', 'Stress (MPa)', x_lim=[0, 29000], y_lim=[0, 700], legend=False, font_color='r')

ax3 = ax.twiny()
for key in ['right', 'top']:
    ax3.spines[key].set_color('r')
plot_norm(ax3, 'Strain (%)', x_lim=[0, strain_max], y_lim=[0, 15000], legend=False, font_color='r')

### Feature selection

#### Convert to GIF

In [None]:
azim = -135
elev = 20
plt.ion()
for i in range(360):
    plt.clf()
    fig = plt.gcf()
    ax = fig.gca(projection='3d')
    ax.view_init(elev, azim)
#     ax.scatter3D(S_[:, 0], S_[:, 1], S_[:, 2], cmap='Blues')
#     ax.scatter3D(S_[cls_1_KKM, 0], S_[cls_1_KKM, 1], S_[cls_1_KKM, 2], s=15, c='blue', label='Class 2')
#     ax.scatter3D(S_[cls_2_KKM, 0], S_[cls_2_KKM, 1], S_[cls_2_KKM, 2], s=15, c='red', label='Class 1')
    ax.scatter3D(np.log10(Amp)[cls_2_KKM], np.log10(Eny)[cls_2_KKM], np.log10(Dur)[cls_2_KKM], s=15, c='blue', label='Class 2')
    ax.scatter3D(np.log10(Amp)[cls_1_KKM], np.log10(Eny)[cls_1_KKM], np.log10(Dur)[cls_1_KKM], s=15, c='red', label='Class 1')
    plot_norm(ax, 'Amplitude(μV)', 'Energy(aJ)', 'Duration(μs)', 'Chan 2', legend=False)
    # 'Amplitude(μV)', 'Energy(aJ)', 'Duration(μs)'
    # 'Component 1', 'Component 2', 'Component 3'
#     plt.pause(0.001)
    elev, azim = ax.elev, ax.azim - 1
    try:
        plt.savefig('./gif/res_E-A-D-C_2dim/' + str(i))
    except FileNotFoundError:
        os.mkdir('./gif/res_E-A-D-C_2dim/')
        plt.savefig('./gif/res_E-A-D-C_2dim/' + str(i))
    plt.ioff()

#### Classify with features

In [247]:
'''
Ni-electrolysis-2-100
Ni-pure-2-100
Al-3-55
'''
S_, A_ = ICA(3, 'ica', np.log10(Amp), np.log10(Eny), np.log10(Dur))
km = KernelKMeans(n_clusters=2, max_iter=100, random_state=55, verbose=1, kernel="rbf")
pred = km.fit_predict(S_)
cls_KKM = []
for i in range(2):
    cls_KKM .append(pred == i)
# cls_KKM[0], cls_KKM[1] = pred == 1, pred == 0

Converged at iteration 15


In [15]:
# 3D
fig = plt.figure(figsize=[6, 4.5])
ax = plt.subplot(projection='3d')

# ax.scatter3D(S_[:, 0], S_[:, 1], S_[:, 2], cmap='Blues')
# ax.scatter3D(np.log10(Amp), np.log10(Eny), np.log10(Dur), cmap='Blues')
for i, color in enumerate([color_1, color_2]):
    ax.scatter3D(np.log10(Amp)[cls_KKM[i]], np.log10(Eny)[cls_KKM[i]], np.log10(Dur)[cls_KKM[i]], s=15, color=color)
    
# ax.scatter3D(np.log10(Amp)[cls_KKM[0]][points_2_3], np.log10(Eny)[cls_KKM[0]][points_2_3], np.log10(Dur)[cls_KKM[0]][points_2_3], s=75, c='green', label='Class 2')
# ax.scatter3D(np.log10(Amp)[cls_KKM[1]][points_1_3], np.log10(Eny)[cls_KKM[1]][points_1_3], np.log10(Dur)[cls_KKM[1]][points_1_3], s=75, c='black', label='Class 1')

plot_norm(ax, 'Component 1', 'Component 2', 'Component 3', 'Chan 2', legend=False)

In [36]:
# 2D
fig = plt.figure(figsize=[6, 3.9])
ax2 = plt.subplot()

# ax2.scatter(S_[:, 0], S_[:, 1], s=15, c=color_2)
# ax2.loglog(Amp, Eny, '.', Marker='.', color=color_2)
for i, color in enumerate([color_1, color_2]):
    ax2.scatter(S_[cls_KKM[i], 0], S_[cls_KKM[i], 1], s=15, color=color)
#     ax2.loglog(Dur[cls_KKM[i]], Eny[cls_KKM[i]], '.', markersize=8, marker='.', color=color, label='Population %d'%(i+1))

# ax2.loglog(Amp[cls_KKM[0]][idx_same_amp_1], Eny[cls_KKM[0]][idx_same_amp_1], '.', markersize=8, marker='.', color='black')
# ax2.loglog(Amp[cls_KKM[1]][idx_same_amp_2], Eny[cls_KKM[1]][idx_same_amp_2], '.', markersize=8, marker='.', color='black')
# ax2.loglog(Amp[cls_KKM[1]], Eny[cls_KKM[1]], '.', markersize=8, Marker='.', color='black')

plot_norm(ax2, 'Duration (μs)', 'Energy (aJ)')
# 'Duration (μs)', 'Amplitude (μV)', 'Energy (aJ)', 'Component 1', 'Component 2'

No handles with labels found to put in legend.


#### Classify with frequency

In [92]:
def plot_signal_decomp(data, w, n, title):
    """Decompose and plot a signal S.
    S = An + Dn + Dn-1 + ... + D1
    """
    w = pywt.Wavelet(w) #选取小波函数
    a = data
    ca = [] #近似分量
    cd = [] #细节分量
    for i in range(n):
        (a, d) = pywt.dwt(a, w, pywt.Modes.smooth) #进行5阶离散小波变换
        ca.append(a)
        cd.append(d)

    rec_a = []
    rec_d = []

    for i, coeff in enumerate(ca):
        """
        waverec()返回list，[cAn, cDn, cDn-1, …, cD2, cD1]，
        n为分解阶次，cAn是逼近系数数组，后面的依次是细节系数数组
        [coeff, None] + [None] * i 是重构低频
        """
        coeff_list = [coeff, None] + [None] * i
        rec_a.append(pywt.waverec(coeff_list, w)) #重构

    for i, coeff in enumerate(cd):
        """
        [coeff, None] + [None] * i 是重构对角线高频
        """
        coeff_list = [None, coeff] + [None] * i
        rec_d.append(pywt.waverec(coeff_list, w))

    maxlev = pywt.dwt_max_level(data.shape[0], w.dec_len)
    threshold = 0.04
    coeffs = pywt.wavedec(data, 'db8', level=maxlev)
    for i in range(1, len(coeffs)):
        coeffs[i] = pywt.threshold(coeffs[i], threshold * max(coeffs[i]))
    datarec = pywt.waverec(coeffs, 'db8')
        
    fig = plt.figure()
    title = ['Raw signal', 'De-noised signal using wavelet techniques']
    y = [data, datarec]
    for idx, [i, j] in enumerate(zip(title, y)):
        ax_main = fig.add_subplot(len(rec_a) + 2, 1, idx + 1)
        ax_main.set_title(i)
        ax_main.plot(j)
        ax_main.set_xlim(0, len(data) - 1)

    for i, y in enumerate(rec_a):
        ax = fig.add_subplot(len(rec_a) + 2, 2, 5 + i * 2)
        ax.plot(y, 'r')
        ax.set_xlim(0, len(y) - 1)
        ax.set_ylabel("A%d" % (i + 1))

    for i, y in enumerate(rec_d):
        ax = fig.add_subplot(len(rec_d) + 2, 2, 6 + i * 2)
        ax.plot(y, 'g')
        ax.set_xlim(0, len(y) - 1)
        ax.set_ylabel("D%d" % (i + 1))
    plt.tight_layout()

In [None]:
waveform = Waveform(color_1, color_2, data_tra, path, path_pri, 'Ni-electrolysis', 'vallen')
frequency = Frequency(color_1, color_2, data_tra, path, path_pri, 'Ni-electrolysis', 'vallen')

In [None]:
time, sig = waveform.cal_wave(data_tra[323 - 1])
# plot_signal_decomp(sig, 'db8', 5, "DWT: Sample - db8")
frequency.cla_wtpacket(sig, 'db8', 3, False)

In [95]:
# Wavelet Packet
wpd = []
for trai in tqdm(chan[:, -1].astype(int)):
    _, sig = waveform.cal_wave(data_tra[trai - 1])
    energy = wpd_plt(sig, 'db8', 3, False)
    wpd.append(energy)
wpd = np.array(wpd)

100%|██████████████████████████████████████████████████████████████████████████████| 515/515 [00:00<00:00, 1001.95it/s]


##### Show all waveforms

In [330]:
freq, stage_idx = frequency.cal_freq_max(chan[:, -1].astype(int), status='peak')

100%|███████████████████████████████████████████████████████████████████████████████| 515/515 [00:01<00:00, 496.22it/s]


In [162]:
all_trai = chan[:, -1].astype('int')
cls_1_idx = np.where(np.array(stage_idx) == 1)[0]
cls_2_idx = np.where(np.array(stage_idx) == 2)[0]
cls_3_idx = np.where(np.array(stage_idx) == 3)[0]
for idx, i in enumerate([cls_1_idx, cls_2_idx, cls_3_idx]):
    Res = frequency.cal_ave_freq(all_trai[i])
    frequency.plot_ave_freq(Res, i.shape[0], str(idx+1))

In [317]:
for i in cls_1_idx[:1]:
    waveform.plot_wave_TRAI(all_trai[i])

In [336]:
frequency.plot_wave_frequency(57961, True, 3, True, True)

In [326]:
fig = plt.figure(figsize=[6, 4.5])
ax = plt.subplot()
ax.scatter(freq, [1 for _ in range(freq.shape[0])])

<matplotlib.collections.PathCollection at 0x1f884feb4f0>

In [None]:
np.random.seed(10)
b = np.random.permutation(chan[:, -1].astype(int))

##### Main

In [277]:
S_, A_ = ICA(3, 'ica', np.log10(Amp), np.log10(Eny), np.log10(Dur), freq)
# S_, A_ = ICA(3, 'ica', np.log10(Amp), np.log10(Eny), np.log10(Dur), freq, wpd[:, 0], wpd[:, 1], wpd[:, 2], wpd[:, 3], wpd[:, 4], wpd[:, 5], wpd[:, 6], wpd[:, 7])
km = KernelKMeans(n_clusters=8, max_iter=100, random_state=100, verbose=1, kernel="rbf")
pred = km.fit_predict(S_)
cls_KKM = []
for i in range(8):
    cls_KKM.append(pred == i)

Converged at iteration 13


In [281]:
# 3D
fig = plt.figure(figsize=[6, 4.5])
ax = plt.subplot(projection='3d')

ax.scatter3D(S_[:, 0], S_[:, 1], S_[:, 2], cmap='Blues')

plot_norm(ax, 'First peak', 'Second peak', 'Third peak', 'Chan 2', legend=False)

In [307]:
fig = plt.figure(figsize=[6, 3.9])
ax = plt.subplot(projection='3d')

# ax.scatter3D(S_[:, 0], S_[:, 1], S_[:, 2], cmap='Blues')
color_all = [[1, 0, 1], [0, 0, 1], [0, 1, 0], [1, 0, 0], [0.5, 0.5, 0.5], [1, 0.3, 0], [0, 0, 0], [0, 1, 1]]
for i, color in enumerate(color_all[:4]):
#     ax.scatter3D(np.log10(Amp)[cls_KKM[i]], np.log10(Eny)[cls_KKM[i]], np.log10(Dur)[cls_KKM[i]], s=15, color=color)
    ax.scatter3D(S_[cls_KKM[i], 0], S_[cls_KKM[i], 1], S_[cls_KKM[i], 2], s=15, color=color)
plot_norm(ax, 'Component 1', 'Component 2', 'Component 3', 'Chan 2', legend=False)

# fig = plt.figure(figsize=[6, 3.9], num='The first three frequency')
# ax2 = plt.subplot(projection='3d')

# for i, color in enumerate([[1, 0, 1], [0, 0, 1], [0, 1, 0], [1, 0, 0], [0.5, 0.5, 0.5], [1, 0.3, 0], [0, 0, 0], [0, 1, 1]]):
#     ax2.scatter3D(freq[cls_KKM[i], 0], freq[cls_KKM[i], 1], freq[cls_KKM[i], 2], s=15, color=color)
# plot_norm(ax2, 'First peak', 'Second peak', 'Third peak', 'Chan 2', legend=False)

In [None]:
TRAI = []
for i in range(len(cls_KKM)):
    TRAI.append(chan[cls_KKM[i]][:, -1].astype(int))

In [None]:
frequency = Frequency(color_1, color_2, data_tra, path, path_pri)
for i, title in enumerate(['POP ' + str(i+1) for i in range(8)]):
    Res = frequency.cal_ave_freq(TRAI[i])
    frequency.plot_ave_freq(Res, TRAI[i].shape[0], title)

### Find Wave

In [None]:
min(np.log10(Amp)[cls_1_KKM]), max(np.log10(Amp)[cls_1_KKM])

In [None]:
# A-D electrolysis
a = [16, 91, 94, 129, 104, 128, 176]
b = [11, 103, 12, 113, 276, 146, 140]

In [None]:
# E-A pure
a = [82, 56, 127, 45]
b = [32, 47, 57, 46]

In [None]:
tmp = 0
for i, j in zip(a, b):
    tmp += Eny[cls_KKM[1]][j] / Eny[cls_KKM[0]][i]
tmp = tmp / len(a)
tmp

In [234]:
for i in np.where((Amp[cls_KKM[0]] > 280) & (Amp[cls_KKM[0]] < 300) & (Eny[cls_KKM[0]] > 100))[0]:
    # Idx, Dur, Eny, TRAI
    print(i, Amp[cls_KKM[0]][i], Eny[cls_KKM[0]][i], '{:.0f}'.format(chan[cls_KKM[0]][i][-1]))
print('-'*50)
for i in np.where((Amp[cls_KKM[1]] > 280) & (Amp[cls_KKM[1]] < 300) & (Eny[cls_KKM[1]] < 100))[0]:
    # Idx, Dur, Eny, TRAI
#     if i == 12:
#         print(np.log10(Dur)[cls_KKM[1]][i], np.log10(Amp)[cls_KKM[1]][i])
    print(i, Amp[cls_KKM[1]][i], Eny[cls_KKM[1]][i], '{:.0f}'.format(chan[cls_KKM[1]][i][-1]))

22 299.22635404246165 236.85586802975618 6300
128 282.48264109062717 238.8925824525963 22112
--------------------------------------------------
29 299.22635404246165 80.87650632504395 2345
146 286.8035992717457 50.799943560134835 11266
184 288.4239585896652 65.76574186361822 18224


### PDF & CCDF & ML Curve

In [281]:
class Features:
    def __init__(self, color_1, color_2, time, feature_idx, status):
        self.color_1 = color_1
        self.color_2 = color_2
        self.time = time
        self.feature_idx = feature_idx
        self.convert = lambda x, a, b: pow(x, a) * pow(10, b)
        self.status = status

    def cal_interval(self, tmp, interval):
        tmp_max = int(max(tmp))
        tmp_min = int(min(tmp))
        if tmp_min <= 0:
            inter = [0] + [pow(10, i) for i in range(len(str(tmp_max)))]
            mid = [interval * pow(10, i) for i in range(len(str(tmp_max)) + 1)]
        else:
            inter = [pow(10, i) for i in range(len(str(tmp_min)) - 1,
                                               len(str(tmp_max)))]
            mid = [interval * pow(10, i) for i in range(len(str(tmp_min)), 
                                                        len(str(tmp_max)) + 1)]
        return inter, mid

    def cal_negtive_interval(self, res, interval):
        tmp = sorted(np.array(res))
        tmp_min, tmp_max = math.floor(np.log10(min(tmp))), math.ceil(np.log10(max(tmp)))
        inter = [pow(10, i) for i in range(tmp_min, tmp_max+1)]
        mid = [interval * pow(10, i) for i in range(tmp_min+1, tmp_max+2)]
        return inter, mid

    def cal_linear(self, tmp, inter, mid, interval_num, idx=0):
        # 初始化横坐标
        x = np.array([])
        for i in inter:
            if i != 0:
                x = np.append(x, np.linspace(i, i * 10, interval_num, endpoint=False))
            else:
                x = np.append(x, np.linspace(i, 1, interval_num, endpoint=False))

        # 初始化纵坐标
        y = np.zeros(x.shape[0])
        for i, n in Counter(tmp).items():
            while True:
                try:
                    if x[idx] <= i < x[idx + 1]:
                        y[idx] += n
                        break
                except IndexError:
                    if x[idx] <= i:
                        y[idx] += n
                        break
                idx += 1
        
        # 对横坐标作进一步筛选，计算概率分布值
        x, y = x[y != 0], y[y != 0]
        xx = np.zeros(x.shape[0])
        yy = y / sum(y)

        # 取区间终点作为该段的横坐标
        for idx in range(len(x) - 1):
            xx[idx] = (x[idx] + x[idx + 1]) / 2
        xx[-1] = x[-1] + pow(10, len(str(int(x[-1]))))*(0.9/interval_num) / 2

        # 计算分段区间长度，从而求得概率密度值
        interval = []
        for i, j in enumerate(mid):
            try:
                num = len(np.intersect1d(np.where(inter[i] <= xx)[0],
                                         np.where(xx < inter[i + 1])[0]))
                interval.extend([j] * num)
            except IndexError:
                num = len(np.where(inter[i] <= xx)[0])
                interval.extend([j] * num)
        yy = yy / np.array(interval)
        #     # 取对数变换为线性关系
        #     log_xx = np.log10(xx)
        #     log_yy = np.log10(yy)
        #     fit = np.polyfit(log_xx, log_yy, 1)
        #     alpha = abs(fit[0])
        #     fit_x = np.linspace(min(log_xx), max(log_xx), 100)
        #     fit_y = np.polyval(fit, fit_x)
        return xx, yy

    def cal_N_Naft(self, tmp, eny_lim):
        N_ms, N_as = 0, 0
        main_peak = np.where(eny_lim[0] < tmp)[0]    
        if len(main_peak):
            for i in range(main_peak.shape[0] - 1):
                if main_peak[i] >= eny_lim[1]:
                    continue
                elif main_peak[i+1] - main_peak[i] == 1:
                    N_ms += tmp[main_peak[i]]
                    continue
                N_ms += tmp[main_peak[i]]
                N_as += np.max(tmp[main_peak[i]+1:main_peak[i+1]])
            if main_peak[-1] < tmp.shape[0] - 1:
                N_as += np.max(tmp[main_peak[-1]+1:])
            N_ms += tmp[main_peak[-1]]
        return N_ms + N_as, N_as

    def cal_OmiroLaw_helper(self, tmp, eny_lim):
        res = [[] for _ in range(len(eny_lim))]
        for idx in tqdm(range(len(eny_lim))):
            main_peak = np.where((eny_lim[idx][0] < tmp) & (tmp < eny_lim[idx][1]))[0]
            if len(main_peak):
                for i in range(main_peak.shape[0] - 1):
                    for j in range(main_peak[i]+1, main_peak[i+1]+1):
                        if tmp[j] < eny_lim[idx][1]:
                            k = self.time[j] - self.time[main_peak[i]]
                            res[idx].append(k)
                        else:
                            break
                if main_peak[-1] < tmp.shape[0] - 1:
                    for j in range(main_peak[-1] + 1, tmp.shape[0]):
                        k = self.time[j] - self.time[main_peak[-1]]
                        res[idx].append(k)
        return res

    def cal_PDF(self, tmp_origin, tmp_1, tmp_2, xlabel, ylabel, features_path, LIM=[[0, None]]*3, INTERVAL_NUM=[6]*3, select=[0, 3], FIT=False):
        fig = plt.figure(figsize=[6, 3.9], num='PDF--%s'%xlabel)
#         fig = plt.figure(figsize=[6, 3.9])
        fig.text(0.15, 0.2, self.status, fontdict={'family':'Arial', 'fontweight':'bold', 'fontsize':12})
        ax = plt.subplot()
        TMP, COLOR, LABEL = [tmp_origin, tmp_1, tmp_2], ['black', self.color_1, self.color_2], ['Whole', 'Population 1', 'Population 2']
        if LIM[0][1] == None:
            method = 'index'
        elif LIM[0][1] == float('inf'):
            method = 'value'
        for tmp, color, label, num, lim in zip(TMP[select[0]:select[1]], COLOR[select[0]:select[1]], LABEL[select[0]:select[1]],
                                               INTERVAL_NUM[select[0]:select[1]], LIM[select[0]:select[1]]):
            inter, mid = self.cal_interval(tmp, num)
            xx, yy = self.cal_linear(tmp, inter, mid, num)
            if FIT:
                if method == 'value':
                    lim = np.where((xx > lim[0]) & (xx < lim[1]))[0]
                    fit = np.polyfit(np.log10(xx[lim[0]:lim[-1]]), np.log10(yy[lim[0]:lim[-1]]), 1)
                elif method == 'index':
                    fit = np.polyfit(np.log10(xx[lim[0]:lim[1]]), np.log10(yy[lim[0]:lim[1]]), 1)
                alpha, b = fit[0], fit[1]
                fit_x = np.linspace(xx[lim[0]], xx[-1], 100)
                fit_y = self.convert(fit_x, alpha, b)
                ax.plot(fit_x, fit_y, '-.', lw=1, color=color)
                ax.loglog(xx, yy, '.', marker='.', markersize=8, color=color, label='{}--{:.2f}'.format(label, abs(alpha)))
            else:
                ax.loglog(xx, yy, '.', marker='.', markersize=8, color=color, label=label)
            with open(features_path[:-4] + '_{}_'.format(label) + ylabel + '.txt', 'w') as f:
                f.write('{}, {}\n'.format(xlabel, ylabel))
                for j in range(len(xx)):
                    f.write('{}, {}\n'.format(xx[j], yy[j]))
        plot_norm(ax, xlabel, ylabel, legend_loc='upper right')            

    def cal_CCDF(self, tmp_origin, tmp_1, tmp_2, xlabel, ylabel, features_path, LIM=[[0, float('inf')]]*3, select=[0, 3], FIT=False):
        N_origin, N1, N2 = len(tmp_origin), len(tmp_1), len(tmp_2)
        fig = plt.figure(figsize=[6, 3.9], num='CCDF--%s' % xlabel)
        fig.text(0.15, 0.2, self.status, fontdict={'family':'Arial', 'fontweight':'bold', 'fontsize':12})
        ax = plt.subplot()
        TMP, N, COLOR, LABEL = [tmp_origin, tmp_1, tmp_2], [N_origin, N1, N2], ['black', self.color_1, self.color_2], ['Whole', 'Population 1', 'Population 2']
        for tmp, N, color, label, lim in zip(TMP[select[0]:select[1]], N[select[0]:select[1]], COLOR[select[0]:select[1]], 
                                             LABEL[select[0]:select[1]], LIM[select[0]:select[1]]):
            xx, yy = [], []
            for i in range(N - 1):
                xx.append(np.mean([tmp[i], tmp[i + 1]]))
                yy.append((N - i + 1) / N)
            if FIT:
                xx, yy = np.array(xx), np.array(yy)
                fit_lim = np.where((xx > lim[0]) & (xx < lim[1]))[0]
                fit = np.polyfit(np.log10(xx[fit_lim[0]:fit_lim[-1]]), np.log10(yy[fit_lim[0]:fit_lim[-1]]), 1)
                alpha, b = fit[0], fit[1]
                fit_x = np.linspace(xx[fit_lim[0]], xx[fit_lim[-1]], 100)
                fit_y = self.convert(fit_x, alpha, b)
                ax.plot(fit_x, fit_y, '-.', lw=1, color=color)
                ax.loglog(xx, yy, color=color, label='{}--{:.2f}'.format(label, abs(alpha)))
            else:
                ax.loglog(xx, yy, color=color, label=label)
            with open(features_path[:-4] + '_{}_'.format(label) + 'CCDF(%s).txt' % xlabel[0], 'w') as f:
                f.write('{}, {}\n'.format(xlabel, ylabel))
                for j in range(len(xx)):
                    f.write('{}, {}\n'.format(xx[j], yy[j]))
        plot_norm(ax, xlabel, ylabel, legend_loc='upper right')      

    def cal_ML(self, tmp_origin, tmp_1, tmp_2, xlabel, ylabel, features_path, select=[0, 3]):
        N_origin, N1, N2 = len(tmp_origin), len(tmp_1), len(tmp_2)
        fig = plt.figure(figsize=[6, 3.9], num='ML--%s' % xlabel)
        fig.text(0.96, 0.2, self.status, fontdict={'family':'Arial', 'fontweight':'bold', 'fontsize':12}, horizontalalignment="right")
        ax = plt.subplot()
        ax.set_xscale("log", nonposx='clip')
        TMP, N, LAYER, COLOR, LABEL = [tmp_origin, tmp_1, tmp_2], [N_origin, N1, N2], [1, 2, 3], ['black', self.color_1, self.color_2], ['Whole', 'Population 1', 'Population 2']
        for tmp, N, layer, color, label in zip(TMP[select[0]:select[1]], N[select[0]:select[1]], LAYER[select[0]:select[1]], COLOR[select[0]:select[1]], LABEL[select[0]:select[1]]):
            ML_y, Error_bar = [], []
            for j in tqdm(range(N)):
                valid_x = sorted(tmp)[j:]
                E0 = valid_x[0]
                Sum = np.sum(np.log(valid_x / E0))
                N_prime = N - j
                alpha = 1 + N_prime / Sum
                error_bar = (alpha - 1) / pow(N_prime, 0.5)
                ML_y.append(alpha)
                Error_bar.append(error_bar)
            ax.errorbar(sorted(tmp), ML_y, yerr=Error_bar, fmt='o', ecolor=color, color=color, elinewidth=1, capsize=2, ms=3, label=label, zorder=layer)
            with open(features_path[:-4] + '_{}_'.format(label) + 'ML(%s).txt' % xlabel[0], 'w') as f:
                f.write('{}, {}, Error bar\n'.format(xlabel, ylabel))
                for j in range(len(ML_y)):
                    f.write('{}, {}, {}\n'.format(sorted(tmp)[j], ML_y[j], Error_bar[j]))
        plot_norm(ax, xlabel, ylabel, y_lim=[1.25, 3])

    def cal_contour(self, tmp_1, tmp_2, xlabel, ylabel, title, x_lim, y_lim, size_x=40, size_y=40, 
                    method='linear_bin', padding=False, clabel=False):
        tmp_1, tmp_2 = 20 * np.log10(tmp_1), 20 * np.log10(tmp_2)
        if method == 'log_bin':
            sum_x, sum_y = x_lim[1] - x_lim[0], y_lim[1] - y_lim[0]
            arry_x = np.logspace(np.log10(sum_x + 10), 1, size_x) / (sum(np.logspace(np.log10(sum_x + 10), 1, size_x)) / sum_x)
            arry_y = np.logspace(np.log10(sum_y + 10), 1, size_y) / (sum(np.logspace(np.log10(sum_y + 10), 1, size_y)) / sum_y)
            x, y = [], []
            for tmp, res, arry in zip([x_lim[0], y_lim[0]], [x, y], [arry_x, arry_y]):
                for i in arry:
                    res.append(tmp)
                    tmp += i
            x, y = np.array(x), np.array(y)
        elif method == 'linear_bin':
            x, y = np.linspace(x_lim[0], x_lim[1], size_x), np.linspace(y_lim[0], y_lim[1], size_y)
        X, Y = np.meshgrid(x, y)
        height = np.zeros([X.shape[0], Y.shape[1]])
        linestyles = ['solid'] * 8 + ['--'] * 4
        levels = [1, 2, 3, 6, 12, 24, 48, 96, 192, 384, 768, 1536]
        colors = [[1, 0, 1], [0, 0, 1], [0, 1, 0], [1, 0, 0], [0.5, 0.5, 0.5],
                  [1, 0.3, 0], [0, 0, 0], [0, 1, 1], [1, 0, 1], [0, 0, 1], [0, 1, 0], [1, 0, 0]]

        for i in range(X.shape[1] - 1):
            valid_x = np.where((tmp_1 < X[0, i + 1]) & (tmp_1 >= X[0, i]))[0]
            for j in range(Y.shape[0] - 1):
                valid_y = np.where((tmp_2 < Y[j + 1, 0]) & (tmp_2 >= Y[j, 0]))[0]
                height[j, i] = np.intersect1d(valid_x, valid_y).shape[0]

        fig = plt.figure(figsize=[6, 3.9], num='Contour--%s & %s' % (ylabel.split(' ')[-1][0], xlabel.split(' ')[-1][0]))
        fig.text(0.96, 0.2, self.status, fontdict={'family':'Arial', 'fontweight':'bold', 'fontsize':12}, horizontalalignment="right")
        ax = plt.subplot()
        if padding:
            ct = ax.contourf(X, Y, height, levels, colors=colors, extend='max')
#             cbar = plt.colorbar(ct)
        else:
            ct = ax.contour(X, Y, height, levels, colors=colors, linewidths=1, linestyles=linestyles)
#             cbar = plt.colorbar(ct)
        if clabel:
            ax.clabel(ct, inline=True, colors='k', fmt='%.1f')
        plot_norm(ax, xlabel, ylabel, title=title, legend=False)

    def plot_correlation(self, tmp_1, tmp_2, xlabel, ylabel, cls_1=None, cls_2=None, idx_1=None, idx_2=None,
                         fit=False, status='A-D', x1_lim=None, x2_lim=None, plot_lim=None, title=''):
        fig = plt.figure(figsize=[6, 3.9], num='Correlation--%s & %s %s' % (ylabel, xlabel, title))
        fig.text(0.96, 0.2, self.status, fontdict={'family':'Arial', 'fontweight':'bold', 'fontsize':12}, horizontalalignment="right")
        ax = plt.subplot()
        if cls_1 is not None and cls_2 is not None:
            ax.loglog(tmp_1[cls_2], tmp_2[cls_2], '.', marker='.', markersize=8, color=self.color_2, label='Population 2')
            ax.loglog(tmp_1[cls_1], tmp_2[cls_1], '.', marker='.', markersize=8, color=self.color_1, label='Population 1')
            if idx_1:
                ax.loglog(tmp_1[cls_1][idx_1], tmp_2[cls_1][idx_1], '.', marker='.', markersize=8, color='black')
            if idx_2:
                ax.loglog(tmp_1[cls_2][idx_2], tmp_2[cls_2][idx_2], '.', marker='.', markersize=8, color='black')
            plot_norm(ax, xlabel, ylabel)
        else:
            ax.loglog(tmp_1, tmp_2, '.', Marker='.', markersize=8, color='g')
            plot_norm(ax, xlabel, ylabel, legend=False)

        if fit:
            cor_x1, cor_x2 = tmp_1[cls_1], tmp_1[cls_2]
            cor_y1, cor_y2 = tmp_2[cls_1], tmp_2[cls_2]
            if status == 'A-D':
                A = np.where((cor_x1 > x1_lim[0]) & (cor_x1 < x1_lim[1]))
                B = np.where((cor_x2 > x2_lim[0]) & (cor_x2 < x2_lim[1]))
            elif status == 'E-A':
                A = fit_with_x1
                B = fit_with_x2
            linear_x1 = cor_x1[A]
            linear_y1 = cor_y1[A]
            linear_x2 = cor_x2[B]
            linear_y2 = cor_y2[B]
            ave = 0
            alpha, b, fit_x, fit_y = [], [], [], []
            mix_cor_x = [min(cor_x1), min(cor_x2)] if status == 'E-A' else plot_lim
            for linear_x, linear_y, min_x, max_x in zip([linear_x1, linear_x2], [linear_y1, linear_y2],
                                                        mix_cor_x, [max(cor_x1), max(cor_x2)]):
                fit = np.polyfit(np.log10(linear_x), np.log10(linear_y), 1)
                alpha.append(fit[0])
                b.append(fit[1])
                fit_x.append(np.linspace(min_x, max_x, 100))
                fit_y.append(self.convert(np.linspace(min_x, max_x, 100), fit[0], fit[1]))
            ax.plot(fit_x[0], fit_y[0], ls='--', lw=2, color='black')
            ax.plot(fit_x[1], fit_y[1], ls='--', lw=2, color='black')
            if status == 'A-D':
                min_y = max(min(fit_y[0]), min(fit_y[1]))
                max_y = min(max(fit_y[0]), max(fit_y[1]))
                cal_y = np.linspace(np.log10(min_y), np.log10(max_y), 100)
                for i in cal_y:
                    tmp1 = (i - b[0]) / alpha[0]
                    tmp2 = (i - b[1]) / alpha[1]
                    ave += max(pow(10, tmp1), pow(10, tmp2)) / min(pow(10, tmp1), pow(10, tmp2))
            elif status == 'E-A':
                min_x = max(min(fit_x[0]), min(fit_x[1]))
                max_x = min(max(fit_x[0]), max(fit_x[1]))
                cal_x = np.linspace(np.log10(min_x), np.log10(max_x), 100)
                for i in cal_x:
                    tmp1 = alpha[0] * i + b[0]
                    tmp2 = alpha[1] * i + b[1]
                    ave += max(pow(10, tmp1), pow(10, tmp2)) / min(pow(10, tmp1), pow(10, tmp2))
            return ave / 100, alpha, b, A, B

    def plot_feature_time(self, tmp, ylabel):
        fig = plt.figure(figsize=[6, 3.9], num='Time domain curve')
        fig.text(0.96, 0.2, self.status, fontdict={'family':'Arial', 'fontweight':'bold', 'fontsize':12}, horizontalalignment="right")
        ax = plt.subplot()
        ax.set_yscale("log", nonposy='clip')
        ax.scatter(self.time, tmp)
        ax.set_xticks(np.linspace(0, 40000, 9))
        ax.set_yticks([-1, 0, 1, 2, 3])
        plot_norm(ax, 'Time(s)', ylabel, legend=False)

    def cal_BathLaw(self, tmp_origin, tmp_1, tmp_2, xlabel, ylabel, interval_num, select=[0, 3]):
#         fig = plt.figure(figsize=[6, 3.9], num='Bath law')
        fig = plt.figure(figsize=[6, 3.9])
        fig.text(0.12, 0.2, self.status, fontdict={'family':'Arial', 'fontweight':'bold', 'fontsize':12})
        ax = plt.subplot()
        TMP, MARKER, COLOR, LABEL = [tmp_origin, tmp_1, tmp_2], ['o', 'p', 'h'], ['black', self.color_1, self.color_2], ['Whole', 'Population 1', 'Population 2']
        for tmp, marker, color, label in zip(TMP[select[0]:select[1]], MARKER[select[0]:select[1]], COLOR[select[0]:select[1]], LABEL[select[0]:select[1]]):
            tmp_max = int(max(tmp))
            inter = [pow(10, i) for i in range(0, len(str(tmp_max)))]
            x = np.array([])
            y = []
            for i in inter:
                x = np.append(x, np.linspace(i, i * 10, interval_num, endpoint=False))
            for k in range(x.shape[0]):
                if k != x.shape[0] - 1:
                    N, Naft = self.cal_N_Naft(tmp, [x[k], x[k+1]])
                else:
                    N, Naft = self.cal_N_Naft(tmp, [x[k], float('inf')])
                if Naft != 0 and N != 0:
                    y.append(np.log10(N / Naft))
                else:
                    y.append(float('inf'))
            y = np.array(y)
            x, y = x[y != float('inf')], y[y != float('inf')]
            x_eny = np.zeros(x.shape[0])
            for idx in range(len(x) - 1):
                x_eny[idx] = (x[idx] + x[idx + 1]) / 2
            x_eny[-1] = x[-1] + pow(10, len(str(int(x[-1]))))*(0.9/interval_num) / 2
            ax.semilogx(x_eny, y, color=color, marker=marker, markersize=8, mec=color, mfc='none', label=label)
        ax.axhline(1.2, ls='-.', linewidth=1, color="black")
        plot_norm(ax, xlabel, ylabel, y_lim=[-1, 4], legend_loc='upper right')

    def cal_WaitingTime(self, time_origin, time_1, time_2, xlabel, ylabel, interval, interval_num, select=[0, 3], FIT=False):
#         fig = plt.figure(figsize=[6, 3.9], num='Distribution of waiting time')
        fig = plt.figure(figsize=[6, 3.9])
        fig.text(0.16, 0.22, self.status, fontdict={'family':'Arial', 'fontweight':'bold', 'fontsize':12})
        ax = plt.subplot()
        TIME, MARKER, COLOR, LABEL = [time_origin, time_1, time_2], ['o', 'p', 'h'], ['black', self.color_1, self.color_2], ['Whole', 'Population 1', 'Population 2']
        for [time, marker, color, label] in zip(TIME[select[0]:select[1]], MARKER[select[0]:select[1]], COLOR[select[0]:select[1]], LABEL[select[0]:select[1]]):
            res = []
            for i in range(time.shape[0] - 1):
                res.append(time[i+1] - time[i])
            inter, mid = self.cal_negtive_interval(res, interval)
            xx, yy = self.cal_linear(sorted(np.array(res)), inter, mid, interval_num)
            if FIT:
                xx, yy = np.array(xx), np.array(yy)
                fit = np.polyfit(np.log10(xx), np.log10(yy), 1)
                alpha, b = fit[0], fit[1]
                fit_x = np.linspace(xx[0], xx[-1], 100)
                fit_y = self.convert(fit_x, alpha, b)
                ax.plot(fit_x, fit_y, '-.', lw=1, color=color)
                ax.loglog(xx, yy, markersize=8, marker=marker, mec=color, mfc='none', color=color, label='{}--{:.2f}'.format(label, abs(alpha)))
            else:
                ax.loglog(xx, yy, markersize=8, marker=marker, mec=color, mfc='none', color=color, label=label)
        plot_norm(ax, xlabel, ylabel, legend_loc='upper right')

    def cal_OmoriLaw(self, tmp_origin, tmp_1, tmp_2, xlabel, ylabel, interval, interval_num, select=[0, 3], FIT=False):
        eny_lim = [[0.01, 0.1], [0.1, 1], [1, 10], [10, 100], [100, 1000]]
#         eny_lim = [[0.001, 0.01], [0.01, 0.1], [0.1, 10], [10, 1000], [1000, 10000]]
        tmp_origin, tmp_1, tmp_2 = self.cal_OmiroLaw_helper(tmp_origin, eny_lim), self.cal_OmiroLaw_helper(tmp_1, eny_lim), self.cal_OmiroLaw_helper(tmp_2, eny_lim)
        TMP, TITLE = [tmp_origin, tmp_1, tmp_2], ['Omori law_Whole', 'Omori law_Population 1', 'Omori law_Population 2']
        for idx, [tmp, title] in enumerate(zip(TMP[select[0]:select[1]], TITLE[select[0]:select[1]])):
#             fig = plt.figure(figsize=[6, 3.9], num=title)
            fig = plt.figure(figsize=[6, 3.9])
            fig.text(0.16, 0.21, self.status, fontdict={'family':'Arial', 'fontweight':'bold', 'fontsize':12})
            ax = plt.subplot()
            for i, [marker, color, label] in enumerate(zip(['>', 'o', 'p', 'h', 'H'], 
                                                           [[1, 0, 1], [0, 0, 1], [0, 1, 0], [1, 0, 0], [0.5, 0.5, 0.5]], 
                                                           ['$10^{-2}aJ<E_{MS}<10^{-1}aJ$', '$10^{-1}aJ<E_{MS}<10^{0}aJ$', 
                                                            '$10^{0}aJ<E_{MS}<10^{1}aJ$', '$10^{1}aJ<E_{MS}<10^{2}aJ$', 
                                                            '$10^{2}aJ<E_{MS}<10^{3}aJ$'])):
                if len(tmp[i]):
                    inter, mid = self.cal_negtive_interval(tmp[i], interval)
                    xx, yy = self.cal_linear(sorted(np.array(tmp[i])), inter, mid, interval_num)
                    if FIT:
                        xx, yy = np.array(xx), np.array(yy)
#                         fit_lim = np.where((xx > lim[0]) & (xx < lim[1]))[0]
                        fit = np.polyfit(np.log10(xx), np.log10(yy), 1)
                        alpha, b = fit[0], fit[1]
                        fit_x = np.linspace(xx[0], xx[-1], 100)
                        fit_y = self.convert(fit_x, alpha, b)
                        ax.plot(fit_x, fit_y, '-.', lw=1, color=color)
                        ax.loglog(xx, yy, markersize=8, marker=marker, mec=color, mfc='none', color=color, label='{}--{:.2f}'.format(label, abs(alpha)))
                    else:
                        ax.loglog(xx, yy, markersize=8, marker=marker, mec=color, mfc='none', color=color, label=label)
            plot_norm(ax, xlabel, ylabel, legend_loc='upper right')

In [214]:
pow(10, len(str(int(498.52)))), np.linspace(100, 1000, 9, endpoint=False)

(1000, array([100., 200., 300., 400., 500., 600., 700., 800., 900.]))

In [282]:
if __name__ == '__main__':
    feature_idx = [Amp, Dur, Eny]
    features = Features(color_1, color_2, Time, feature_idx, 'Al alloy')
    xlabelz = ['Amplitude (μV)', 'Duration (μs)', 'Energy (aJ)']
    
    # Al-alloy
    LIM_PDF = [[[0, None], [1, -4], [2, -6]], [[0, float('inf')], [100, 900], [36, 500]], [[0, None], [4, -3], [2, -4]]]
    LIM_CCDF = [[[0, float('inf')], [15, 100], [20, 300]], [[0, float('inf')], [100, 2500], [30, 250]], [[0, float('inf')], [0.5, 10], [0.2, 10]]]
    INTERVAL_NUM = [[8, 16, 16], [8, 15, 20], [8, 8, 10]]
    
#     # Ni-electrolysis: 8, 10, 7
#     LIM_PDF = [[[0, None], [1, None], [1, -2]], [[0, None], [11, -2], [10, -1]], [[0, None], [2, -2], [1, -2]]]
#     LIM_CCDF = [[[0, float('inf')], [0, 1000], [0, 3000]], [[0, float('inf')], [100, 3600], [80, 600]], [[0, float('inf')], [1, 3000], [0.5, 1500]]]
#     INTERVAL_NUM = [[8, 16, 16], [8, 16, 8], [8, 5, 6]]
    
#     # Ni-pure
#     LIM_PDF = [[[0, None], [2, -1], [1, -1]], [[0, None], [6, None], [9, -1]], [[0, None], [2, -2], [2, -4]]]
#     LIM_CCDF = [[[0, float('inf')], [20, 250], [25, 150]], [[0, float('inf')], [150, 2000], [30, 200]], [[0, float('inf')], [0.6, 400], [0.3, 6]]]
#     INTERVAL_NUM = [[8, 11, 8], [8, 10, 9], [8, 5, 9]]

#     for idx, lim_pdf, lim_ccdf, inerval_num in zip([0, 1, 2], LIM_PDF, LIM_CCDF, INTERVAL_NUM):
#         tmp, tmp_1, tmp_2 = sorted(feature_idx[idx]), sorted(feature_idx[idx][cls_KKM[0]]), sorted(feature_idx[idx][cls_KKM[1]])
#         features.cal_PDF(tmp, tmp_1, tmp_2, xlabelz[idx], 'PDF (%s)' % xlabelz[idx][0], features_path, lim_pdf, inerval_num, select=[1, None], FIT=True)
#         features.cal_ML(tmp, tmp_1, tmp_2, xlabelz[idx], 'ML (%s)' % xlabelz[idx][0], features_path, select=[1, None])
#         features.cal_CCDF(tmp, tmp_1, tmp_2, xlabelz[idx], 'CCD C(s)', features_path, lim_ccdf, select=[1, None], FIT=True)

#     features.cal_contour(Amp, Eny, '$20 \log_{10} A(\mu V)$', '$20 \log_{10} E(aJ)$', 'Contour', [20, 55], [-20, 40], 50, 50, method='log_bin')
#     features.cal_BathLaw(Eny, Eny[cls_KKM[0]], Eny[cls_KKM[1]], 'Mainshock Energy (aJ)', r'$\mathbf{\Delta}$M', 9, select=[1, None])
#     features.cal_WaitingTime(Time, Time[cls_KKM[0]], Time[cls_KKM[1]], r'$\mathbf{\Delta}$t (s)', r'P($\mathbf{\Delta}$t)', 0.9/23, 23, select=[1, None])
#     features.cal_OmoriLaw(Eny, Eny[cls_KKM[0]], Eny[cls_KKM[1]], r'$\mathbf{t-t_{MS}\;(s)}$', r'$\mathbf{r_{AS}(t-t_{MS})\;(s^{-1})}$', 0.9/7, 7, select=[1, None])
#     ave, alpha, b, A, B = features.plot_correlation(Dur, Amp, xlabelz[0], xlabelz[2], cls_1=cls_KKM[0], cls_2=cls_KKM[1], status='A-D', x1_lim=[pow(10, 2.75), float('inf')],
#                                                     x2_lim=[pow(10, 1.7), pow(10, 2.0)], plot_lim=[150, 30], fit=True)
#     features.plot_correlation(Dur, Amp, xlabelz[1], xlabelz[0], cls_KKM[0], cls_KKM[1])
#     features.plot_correlation(Dur, Eny, xlabelz[1], xlabelz[2], cls_KKM[0], cls_KKM[1])
#     features.plot_correlation(Amp, Eny, xlabelz[0], xlabelz[2], cls_KKM[0], cls_KKM[1])

In [None]:
# pop1:5, 2, -2   pop2:6, 1, -2
idx = 2
lim_pdf = [[0, None], [4, -3], [2, -4]]
lim_ccdf = [[0, float('inf')], [0.5, 10], [0.2, 10]]
tmp, tmp_1, tmp_2 = sorted(feature_idx[idx]), sorted(feature_idx[idx][cls_KKM[0]]), sorted(feature_idx[idx][cls_KKM[1]])

# features.cal_PDF(tmp, tmp_1, tmp_2, xlabelz[idx], 'PDF (%s)' % xlabelz[idx][0], features_path, lim_pdf, [8, 8, 10], select=[2, None], FIT=True)
# features.cal_CCDF(tmp, tmp_1, tmp_2, xlabelz[idx], 'CCD C(s)', features_path, lim_ccdf, select=[1, None], FIT=True)

for num in range(25, 30):
#     features.cal_PDF(tmp, tmp_1, tmp_2, xlabelz[idx], 'PDF (%s)' % xlabelz[idx][0], features_path, lim_pdf, [num]*3, select=[2, None], FIT=True)
#     features.cal_BathLaw(Eny, Eny[cls_KKM[0]], Eny[cls_KKM[1]], 'Mainshock Energy (aJ)', r'$\mathbf{\Delta}$M', num, select=[1, None])
#     features.cal_WaitingTime(Time, Time[cls_KKM[0]], Time[cls_KKM[1]], r'$\mathbf{\Delta}$t (s)', r'P($\mathbf{\Delta}$t)', 0.9/num, num, select=[1, None], FIT=True)
    features.cal_OmoriLaw(Eny, Eny[cls_KKM[0]], Eny[cls_KKM[1]], r'$\mathbf{t-t_{MS}\;(s)}$', r'$\mathbf{r_{AS}(t-t_{MS})\;(s^{-1})}$', 1/num, num, select=[2, None], FIT=True)

In [None]:
ave, alpha, b, A, B

In [None]:
feature_idx = [Amp, Dur, Eny]
features = Features(color_1, color_2, Time, feature_idx, 'Ni-electrolysis')
height = features.cal_contour(Amp, Eny, '$20 \log_{10} A(\mu V)$', '$20 \log_{10} E(aJ)$', 
            'Time', [-20, 90], [10, 80], 50, 50, method='linear_bin')

In [None]:
fig = plt.figure(figsize=[16, 6], num='Energy-Amplitude')
for i in range(8):
    ax = plt.subplot(2, 4, i+1)
    cal_contour(np.array(t[i])[:, 4], np.array(t[i])[:, 7], '$20 \log_{10} A(\mu V)$', '$20 \log_{10} E(aJ)$', 
                'Time:{}-{}'.format(time[i], time[i+1]), [20, 55], [-20, 40], 50, 50, method='linear_bin')
fig.savefig('Energy-Amplitude.png',dpi=600,format='png')

fig = plt.figure(figsize=[16, 6], num='Duration-Energy')
for i in range(8):
    ax = plt.subplot(2, 4, i+1)
    cal_contour(np.array(t[i])[:, 7], np.array(t[i])[:, 6], '$20 \log_{10} E(aJ)$', '$20 \log_{10} D(\mu s)$', 
                'Time:{}-{}'.format(time[i], time[i+1]), [-20, 40], [15, 60], 50, 50, method='linear_bin')
fig.savefig('Duration-Energy.png',dpi=600,format='png')

###  Validate features

In [210]:
# Time, Amp, RiseTime, Dur, Eny, Counts, TRAI
def validation(k):
    i = data_tra[k]
    sig = np.multiply(array.array('h', bytes(i[-2])), i[-3] * 1000)
    time = np.linspace(i[0], i[0] + pow(i[-5], -1) * (i[-4] - 1), i[-4])

    thr = i[2]
    valid_wave_idx = np.where(abs(sig) >= thr)[0]
    valid_time = time[valid_wave_idx[0]:(valid_wave_idx[-1] + 1)]
    start = time[valid_wave_idx[0]]
    end = time[valid_wave_idx[-1]]
    duration = (end - start) * pow(10, 6)
    max_idx = np.argmax(abs(sig))
    amplitude = max(abs(sig))
    rise_time = (time[max_idx] - start) * pow(10, 6)
    valid_data = sig[valid_wave_idx[0]:(valid_wave_idx[-1] + 1)]
    energy = np.sum(np.multiply(pow(valid_data, 2), pow(10, 6) / i[3]))
    RMS = math.sqrt(energy / duration)
    count, idx = 0, 1
    N = len(valid_data)
    for idx in range(1, N):
        if valid_data[idx - 1] >= thr > valid_data[idx]:
            count += 1
    # while idx < N:
    #     if min(valid_data[idx - 1], valid_data[idx]) <= thr < max((valid_data[idx - 1], valid_data[idx])):
    #         count += 1
    #         idx += 2
    #         continue
    #     idx += 1
    print(i[0], amplitude, rise_time, duration, energy / pow(10, 4), count, i[-1])

### Waveform & Frequency

In [332]:
class Waveform:
    def __init__(self, color_1, color_2, data_tra, path, path_pri, status, device, thr_dB=25):
        self.data_tra = data_tra
        self.path = path
        self.path_pri = path_pri
        self.color_1 = color_1
        self.color_2 = color_2
        self.status = status
        self.device = device
        self.thr = pow(10, thr_dB / 20)

    def cal_wave(self, i, valid=True):
        if self.device == 'vallen':
            # Time, Chan, Thr, SampleRate, Samples, TR_mV, Data, TRAI
            sig = np.multiply(array.array('h', bytes(i[-2])), i[-3] * 1000)
            time = np.linspace(0, pow(i[-5], -1) * (i[-4] - 1) * pow(10, 6), i[-4])
            thr = i[2]
            if valid:
                valid_wave_idx = np.where(abs(sig) >= thr)[0]
                start = time[valid_wave_idx[0]]
                end = time[valid_wave_idx[-1]]
                duration = end - start
                sig = sig[valid_wave_idx[0]:(valid_wave_idx[-1] + 1)]
                time = np.linspace(0, duration, sig.shape[0])
        elif self.device == 'pac':
            sig = i[-2]
            time = np.linspace(0, i[2] * (i[-3] - 1) * pow(10, 6), i[-3])
            if valid:
                valid_wave_idx = np.where(abs(sig) >= self.thr)[0]
                start = time[valid_wave_idx[0]]
                end = time[valid_wave_idx[-1]]
                duration = end - start
                sig = sig[valid_wave_idx[0]:(valid_wave_idx[-1] + 1)]
                time = np.linspace(0, duration, sig.shape[0])
        return time, sig

    def find_wave(self, Dur, Eny, cls_KKM, chan, dur_lim, eny_lim):
        for i in np.where((np.log10(Dur)[cls_KKM] > dur_lim[0]) & (np.log10(Dur)[cls_KKM] < dur_lim[1]) &
                          (np.log10(Eny)[cls_KKM] > eny_lim[0]) & (np.log10(Eny)[cls_KKM] < eny_lim[1]))[0]:
            # Idx, Dur, Eny, TRAI
            print(i, np.log10(Dur)[cls_KKM][i], np.log10(Eny)[cls_KKM][i], '{:.0f}'.format(chan[cls_KKM][i][-1]))

    def plot_2cls_wave(self, TRAI_select_1, TRAI_select_2, same, value, valid=False):
        fig = plt.figure(figsize=(9.2, 3), num='Waveforms with same %s--%d μV' % (same, value))
        fig.text(0.48, 0.24, self.status, fontdict={'family': 'Arial', 'fontweight': 'bold', 'fontsize': 12},
                 horizontalalignment="right")
        fig.text(0.975, 0.24, self.status, fontdict={'family': 'Arial', 'fontweight': 'bold', 'fontsize': 12},
                 horizontalalignment="right")
        i = self.data_tra[TRAI_select_1 - 1]
        if i[-1] != TRAI_select_1:
            print('Error: TRAI %d in data_tra is inconsistent with %d by input!' % (i[-1], TRAI_select_1))
            return
        valid_time, valid_data = self.cal_wave(i, valid=valid)

        ax = fig.add_subplot(1, 2, 1)
        ax.plot(valid_time, valid_data, lw=0.5, color=self.color_1)
        ax.axhline(abs(i[2]), 0, valid_data.shape[0], linewidth=1, color="black")
        ax.axhline(-abs(i[2]), 0, valid_data.shape[0], linewidth=1, color="black")
        plot_norm(ax, xlabel='Time (μs)', ylabel='Amplitude (μV)', legend=False, grid=True)

        ax2 = fig.add_subplot(1, 2, 2)
        i = self.data_tra[TRAI_select_2 - 1]
        if i[-1] != TRAI_select_2:
            print('Error: TRAI %d in data_tra is inconsistent with %d by input!' % (i[-1], TRAI_select_2))
            return
        valid_time, valid_data = self.cal_wave(i, valid=valid)
        ax2.plot(valid_time, valid_data, lw=0.5, color=self.color_2)
        ax2.axhline(abs(i[2]), 0, valid_data.shape[0], linewidth=1, color="black")
        ax2.axhline(-abs(i[2]), 0, valid_data.shape[0], linewidth=1, color="black")
        plot_norm(ax2, xlabel='Time (μs)', ylabel='Amplitude (μV)', legend=False, grid=True)

    def plot_wave_TRAI(self, k, valid=True):
        # Waveform with specific TRAI
        i = self.data_tra[k - 1]
        if i[-1] != k:
            return str('Error: TRAI %d in data_tra is inconsistent with %d by input!' % (i[-1], k))
        time, sig = self.cal_wave(i, valid=valid)

        fig = plt.figure(figsize=(6, 4.1), num='Waveform--TRAI %d (%s)' % (k, valid))
        fig.text(0.95, 0.17, self.status, fontdict={'family': 'Arial', 'fontweight': 'bold', 'fontsize': 12},
                 horizontalalignment="right")
        ax = fig.add_subplot(1, 1, 1)
        ax.plot(time, sig, lw=1)
        if self.device == 'vallen':
            plt.axhline(abs(i[2]), 0, sig.shape[0], linewidth=1, color="black")
            plt.axhline(-abs(i[2]), 0, sig.shape[0], linewidth=1, color="black")
        elif self.device == 'pac':
            plt.axhline(abs(self.thr), 0, sig.shape[0], linewidth=1, color="black")
            plt.axhline(-abs(self.thr), 0, sig.shape[0], linewidth=1, color="black")
        plot_norm(ax, 'Time (μs)', 'Amplitude (μV)', title='TRAI:%d' % k, legend=False, grid=True)

    def save_wave(self, TRAI, pop):
        # Save waveform
        os.chdir(self.path)
        for idx, j in enumerate(tqdm(TRAI)):
            i = self.data_tra[j - 1]
            valid_time, valid_data = self.cal_wave(i)
            with open(self.path_pri[:-6] + '_pop%s-%d' % (pop, idx + 1) + '.txt', 'w') as f:
                f.write('Time, Signal\n')
                for k in range(valid_data.shape[0]):
                    f.write("{}, {}\n".format(valid_time[k], valid_data[k]))

class Frequency:
    def __init__(self, color_1, color_2, data_tra, path, path_pri, status, device, thr_dB=25, size=500):
        self.data_tra = data_tra
        self.waveform = Waveform(color_1, color_2, data_tra, path, path_pri, status, device, thr_dB)
        self.size = size
        self.grid = np.linspace(0, pow(10, 6), self.size)
        self.status = status
        self.device = device
        self.thr = pow(10, thr_dB / 20)

    def cal_frequency(self, k, valid=True):
        if self.device == 'vallen':
            i = self.data_tra[k]
            sig = np.multiply(array.array('h', bytes(i[-2])), i[-3] * 1000)
            thr, Fs = i[2], i[3]
            # Ts = 1 / Fs
            if valid:
                valid_wave_idx = np.where(abs(sig) >= thr)[0]
                sig = sig[valid_wave_idx[0]:(valid_wave_idx[-1] + 1)]
        elif self.device == 'pac':
            i = self.data_tra[k]
            Fs = 1 / i[2]
            sig = i[-2]
            if valid:
                valid_wave_idx = np.where(abs(sig) >= self.thr)[0]
                sig = sig[valid_wave_idx[0]:(valid_wave_idx[-1] + 1)]
        N = sig.shape[0]
        fft_y = fft(sig)
        abs_y = np.abs(fft_y)
        normalization = abs_y / N
        normalization_half = normalization[range(int(N / 2))]
        frq = (np.arange(N) / N) * Fs
        half_frq = frq[range(int(N / 2))]
        return half_frq, normalization_half

    def cal_ave_freq(self, TRAI):
        Res = np.array([0 for _ in range(self.size)]).astype('float64')

        for j in TRAI:
            half_frq, normalization_half = self.cal_frequency(j - 1, valid=False)
            valid_idx = int((pow(10, 6) / max(half_frq)) * half_frq.shape[0])
            tmp = [0 for _ in range(self.size)]
            i = 1
            for j, k in zip(half_frq[:valid_idx], normalization_half[:valid_idx]):
                while True:
                    if self.grid[i - 1] <= j < self.grid[i]:
                        tmp[i - 1] += k
                        break
                    i += 1
            Res += np.array(tmp)
        return Res

    def cla_wtpacket(self, signal, w, n, plot=False):
        w = pywt.Wavelet(w)
        wp = pywt.WaveletPacket(data=signal, wavelet=w, mode='symmetric',maxlevel=n)

        map = {}
        map[1] = signal
        for row in range(1,n+1):
            lev = []
            for i in [node.path for node in wp.get_level(row, 'freq')]:
                map[i] = wp[i].data

        re = []
        for i in [node.path for node in wp.get_level(n, 'freq')]:
            re.append(wp[i].data)
        energy = []
        for i in re:
            energy.append(pow(np.linalg.norm(i,ord=None),2))

        if plot:
            plt.figure(dpi=100)
            plt.subplot(n+1,1,1)
            plt.plot(map[1])
            for i in range(2,n+2):
                level_num = pow(2,i-1)
                # ['aaa', 'aad', 'add', 'ada', 'dda', 'ddd', 'dad', 'daa']
                re = [node.path for node in wp.get_level(i-1, 'freq')]
                for j in range(1,level_num+1):
                    plt.subplot(n+1,level_num,level_num*(i-1)+j)
                    plt.plot(map[re[j-1]])
            plt.figure(dpi=100)
            values = [i/sum(energy) for i in energy]
            index = np.arange(pow(2,n))
            p2 = plt.bar(index, values, 0.45, label="num", color="#87CEFA")
            plt.xlabel('clusters')
            plt.ylabel('number of reviews')
            plt.title('Cluster Distribution')
            plt.xticks(index, ('7', '8', '9', '10', '11', '12', '13', '14'))
            plt.legend(loc="upper right")
        return map, wp, energy
    
    def plot_wave_frequency(self, TRAI, valid=False, n=3, wtpacket=False, wtpacket_eng=False):
        fig = plt.figure(figsize=(9.2, 3), num='Waveform & Frequency--TRAI %d' % TRAI)
        i = self.data_tra[TRAI - 1]
        valid_time, valid_data = self.waveform.cal_wave(i, valid=valid)
        half_frq, normalization_half = self.cal_frequency(TRAI - 1, valid=valid)

        ax = fig.add_subplot(1, 2, 1)
        ax.plot(valid_time, valid_data)
        ax.axhline(abs(i[2]), 0, valid_data.shape[0], linewidth=1, color="black")
        ax.axhline(-abs(i[2]), 0, valid_data.shape[0], linewidth=1, color="black")
        plot_norm(ax, 'Time (μs)', 'Amplitude (μV)', legend=False, grid=True)

        ax = fig.add_subplot(1, 2, 2)
        ax.plot(half_frq, normalization_half)
        plot_norm(ax, 'Freq (Hz)', '|Y(freq)|', x_lim=[0, pow(10, 6)], legend=False)
        
        if wtpacket:
            fig = plt.figure(figsize=(15, 7), num='WaveletPacket--TRAI %d' % TRAI)
            map, wp, energy = self.cla_wtpacket(valid_data, 'db8', n)
            for i in range(2, n+2):
                level_num = pow(2, i-1)
                # ['aaa', 'aad', 'add', 'ada', 'dda', 'ddd', 'dad', 'daa']
                re = [node.path for node in wp.get_level(i-1, 'freq')]
                for j in range(1,level_num+1):
                    ax = fig.add_subplot(n, level_num, level_num*(i-2)+j)
                    ax.plot(map[re[j-1]])
                    plot_norm(ax, '', '', legend=False)
            if wtpacket_eng:
                fig = plt.figure(figsize=(4.6, 3), num='WaveletPacket Energy--TRAI %d' % TRAI)
                ax = fig.add_subplot()
                values = [i/sum(energy) for i in energy]
                index = np.arange(pow(2,n))
                ax.bar(index, values, 0.45, color="#87CEFA")
                plot_norm(ax, 'Clusters', 'Reviews (%)', legend=False)

    def plot_ave_freq(self, Res, N, title):
        fig = plt.figure(figsize=(6, 4.1), num='Average Frequency--%s' % title)
        ax = fig.add_subplot()
        ax.plot(self.grid, Res / N)
        plot_norm(ax, xlabel='Freq (Hz)', ylabel='|Y(freq)|', title='Average Frequency', legend=False)

    def plot_freq_TRAI(self, k, valid=False):
        # Frequency with specific TRAI
        half_frq, normalization_half = self.cal_frequency(k-1, valid=valid)

        fig = plt.figure(figsize=(6, 4.1), num='Frequency--TRAI:%d (%s)' % (k, valid))
        ax = plt.subplot()
        ax.plot(half_frq, normalization_half)
        plot_norm(ax, 'Freq (Hz)', '|Y(freq)|', x_lim=[0, pow(10, 6)], title='TRAI:%d' % k, legend=False)

    def plot_2cls_freq(self, TRAI_1, TRAI_2, same):
        fig = plt.figure(figsize=(6.5, 10), num='Frequency with same %s' % same)
        for idx, k in enumerate(TRAI_1):
            half_frq, normalization_half = self.cal_frequency(k - 1)
            ax = fig.add_subplot(5, 2, 1 + idx * 2)
            ax.plot(half_frq, normalization_half)
            plot_norm(ax, 'Freq (Hz)', '|Y(freq)|', x_lim=[0, pow(10, 6)], legend=False)

            half_frq, normalization_half = self.cal_frequency(TRAI_2[idx] - 1)
            ax2 = fig.add_subplot(5, 2, 2 + idx * 2)
            ax2.plot(half_frq, normalization_half)
            plot_norm(ax2, 'Freq (Hz)', '|Y(freq)|', x_lim=[0, pow(10, 6)], legend=False)
    
    def cal_freq_max(self, ALL_TRAI, status='peak'):
        freq, stage_idx = [], []
        for trai in tqdm(ALL_TRAI):
            half_frq, normalization_half = self.cal_frequency(trai - 1)
            if status == 'peak':
                freq.append(half_frq[np.argmax(normalization_half)])
            elif status == 'three peaks':
                freq_max = []
                idx_1 = np.where(half_frq < 300000)
                idx_2 = np.where((half_frq >= 300000) & (half_frq < 500000))
                idx_3 = np.where(half_frq >= 500000)
                normalization_max = 0
                if idx_1.shape[0] != 0 and idx_2.shape[0] != 0 and idx_3.shape[0] != 0:
                    for i, idx in enumerate([idx_1, idx_2, idx_3]):
                        if max(normalization_half[idx]) > normalization_max:
                            idx_max = idx
                            tmp = i + 1
                            normalization_max = max(normalization_half[idx])
                            freq_max = half_frq[idx_max][np.argmax(normalization_half[idx_max])]
                freq.append(freq_max)
                stage_idx.append(tmp)
        freq = np.array(freq)
        stage_idx = np.array(stage_idx)
        return freq, stage_idx

#### Validate features

In [None]:
# Time, Amp, RiseTime, Dur, Eny, Counts, TRAI
for i in TRAI_2:
    vallen = data_pri[i-1]
    print('{:.8f} {} {} {} {} {:.0f} {:.0f}'.format(vallen[1], vallen[4], vallen[5], vallen[6], vallen[-4], vallen[-2], vallen[-1]))

In [None]:
for i in TRAI_2:
    validation(i-1)

#### Plot waveform

In [322]:
waveform = Waveform(color_1, color_2, data_tra, path, path_pri, 'Ni-electrolysis', 'vallen')

In [323]:
waveform.plot_wave_TRAI(2345, valid=True)

In [526]:
for idx, value in zip([i for i in range(5)], [32, 56, 105, 185, 285]):
    waveform.plot_2cls_wave(TRAI_same_amp_1[idx], TRAI_same_amp_2[idx], 'amplitude', value)

# waveform.plot_2cls_wave(TRAI_same_eny_1, TRAI_same_eny_2, 'energy')

#### Plot frequency & average frequency

In [333]:
frequency = Frequency(color_1, color_2, data_tra, path, path_pri, 'Ni-electrolysis', 'vallen')

In [24]:
TRAI_1_all = chan[cls_KKM[0]][:, -1].astype(int)
TRAI_2_all = chan[cls_KKM[1]][:, -1].astype(int)
TRAI_all = np.append(TRAI_1_all, TRAI_2_all)

In [None]:
for trai, title in zip([TRAI_all, TRAI_1_all, TRAI_2_all], ['Whole', 'Population 1', 'Population 2']):
    Res = frequency.cal_ave_freq(trai)
    frequency.plot_ave_freq(Res, trai.shape[0], title)
frequency.plot_wave_frequency(TRAI_same_amp_1, '1')
frequency.plot_wave_frequency(TRAI_same_amp_2, '2')

In [None]:
frequency.plot_wave_frequency(trai_2_1, 2)

In [216]:
frequency.plot_2cls_freq(TRAI_same_amp_1, TRAI_same_amp_2, 'amplitude')

### Export data through time selection

In [None]:
class Read_with_time:
    def __init__(self, path_pri, time, fold):
        self.time = time
        self.path_pri = path_pri
        self.fold = fold
        
    def select_time(self):
        conn_pri = sqlite3.connect(self.path_pri)
        result_pri = conn_pri.execute("Select SetID, Time, Chan, Thr, Amp, RiseT, Dur, Eny, RMS, Counts, TRAI FROM view_ae_data")
        chan_1, chan_2, chan_3, chan_4 = [], [], [], []
        t = [[] for _ in range(len(self.time)-1)]
        N_pri = sqlite_read(self.path_pri)
        for _ in tqdm(range(N_pri)):
            i = result_pri.fetchone()
            if i[-2] is not None and i[-2] >= 6 and i[-1] > 0:
                for idx, chan in zip(np.arange(1, 5), [chan_1, chan_2, chan_3, chan_4]):
                   if i[2] == idx:
                        chan.append(i)
                        for j in range(len(t)):
                            if time[j] <= i[1] < time[j+1]:
                                t[j].append(i)
                                break
                        break
        return t, chan_1, chan_2, chan_3, chan_4
    
    def export_feature(self, t):
        for i in range(len(self.time)-1):
            with open(self.fold + '-%d-%d.txt' % (self.time[i], self.time[i+1]), 'w') as f:
                f.write('SetID, TRAI, Time, Chan, Thr, Amp, RiseT, Dur, Eny, RMS, Counts\n')
                # ID, Time(s), Chan, Thr(μV), Thr(dB), Amp(μV), Amp(dB), RiseT(s), Dur(s), Eny(aJ), RMS(μV), Counts, Frequency(Hz)
                for i in t[i]:
                    f.write('{}, {}, {:.8f}, {}, {:.7f}, {:.7f}, {:.2f}, {:.2f}, {:.7f}, {:.7f}, {}\n'.format(
                        i[0], i[-1], i[1], i[2], i[3], i[4], i[5], i[6], i[7], i[8], i[9]))

In [None]:
time = [0, 1600, 3044, 4177, 5997, 8285, 11592, 16240, 18000]
read_with_time = Read_with_time(path_pri, time)
t, chan_1, chan_2, chan_3, chan_4 = read_with_time.select_time()

### Waveform-Find with TRAI

#### 316L

##### Random select 10 waves for each pop

In [None]:
TRAI_1 = [2939, 33400, 3391, 1720, 1882, 12861, 21555, 85898, 65567, 42415]
TRAI_2 = [39553, 22378, 88770, 1808, 102212, 225509, 26146, 55467, 81744, 134964]

#### Ni-electrolysis

In [None]:
'''Ni-electrolysis
x1_lim=[pow(10, 3), float('inf')], x2_lim=[pow(10, 2.25), float('inf')], plot_lim=[300, 35]
(8.714970845731935,
 [1.2903438545965467, 1.253999359428133],
 [-1.9635628529617444, -0.6698164866272941],
(4.186621867300563,
 [2.1170568158769116, 2.0237536458394985],
 [-2.8455989788969775, -3.2314155452663345])
'''
fit_with_x1 = np.array([ 20,  22,  34,  42,  43,  55,  63,  70,  71,  73,  78,  87,  99,
         104, 105, 111, 112, 119, 128, 130, 137, 149, 157, 159, 161, 164,
         166, 168, 176, 180, 187, 190, 206, 209, 210])
fit_with_x2 = np.array([  2,   5,   7,  25,  29,  45,  50,  59,  75,  82,  86, 112, 123,
         135, 137, 139, 140, 144, 146, 148, 154, 180, 184, 187, 203, 234,
         261, 265, 275, 277])

##### Random select 10 waves for each pop

In [None]:
# 1.263, 1.395, 1.538, 1.728, 1.832, 2.233, 2.450, 2.573, 2.702, 3.079
idx_1 = [5, 70, 3, 9, 0, 20, 136, 42, 54, 108]
TRAI_1 = [735, 3218, 593, 1138, 323, 2001, 6585, 2614, 2832, 4619]

# 1.365, 1.495, 1.773, 1.839, 1.95, 2.258, 2.390, 2.507, 2.680, 2.875
idx_2 = [0, 172, 1, 3, 5, 112, 7, 144, 2, 137]
TRAI_2 = [323, 9776, 383, 593, 735, 4694, 909, 7445, 405, 6695]

##### Random select 5 waves for each pop

In [None]:
# 0.115, 0.275, 0.297, 0.601, 1.024
idx_select_2 = [50, 148, 51, 252, 10]
TRAI_select_2 = [3067, 11644, 3079, 28583, 1501]

# 0.303, 0.409, 0.534, 0.759, 1.026
idx_select_1 = [13, 75, 79, 72, 71]
TRAI_select_1 = [2949, 14166, 14815, 14140, 14090]

##### Misclassified 6 waves for pop 1

In [None]:
# Misclassified point
idx_Miscls_1 = [18, 69, 5, 12, 53, 202]
TRAI_Miscls_1 = [4816, 13486, 2226, 3067, 11644, 37244]

##### Select 5 waves for each pop with same energy

In [None]:
# Comparied with same energy
idx_same_eny_2 = [79, 229, 117, 285, 59]
TRAI_same_eny_2 = [4012, 22499, 7445, 34436, 3282]

idx_same_eny_1 = [160, 141, 57, 37, 70]
TRAI_same_eny_1 = [26465, 23930, 11974, 9379, 13667]

##### Select 5 waves for each pop with same amplitude

In [492]:
# Comparied with same amplitude
idx_same_amp_2 = [90, 23, 48, 50, 184]
TRAI_same_amp_2 = [4619, 2229, 2977, 3014, 18224]

idx_same_amp_1 = [16, 26, 87, 34, 128]
TRAI_same_amp_1 = [3932, 7412, 16349, 9001, 22112]

##### Select frequency for each pop

In [91]:
# Types of spectrum in each category
trai_1 = chan[cls_KKM[0]][:, -1].astype(int) # 306
trai_2 = chan[cls_KKM[1]][:, -1].astype(int) # 209

trai_1_1 = [4307, 3079, 42489, 4816, 2949]
trai_1_2 = [37064, 21233, 32494, 11974, 12805]
trai_1_3 = [17364, 13460, 12614, 23930, 14220]

trai_2_1 = [3282, 2756, 2001, 5365, 3738]
trai_2_2 = [34351, 2773, 5110, 2300, 27364]
trai_2_3 = [18450, 18924, 19347, 6343, 14815]
trai_2_4 = [3686, 3527, 2684, 3218, 1869]

#### Ni-pure

In [None]:
'''Ni-pure
x1_lim=[pow(10, 2.75), float('inf')], x2_lim=[pow(10, 1.7), pow(10, 2.0)], plot_lim=[150, 30]
(9.408311638695471,
 [0.9126500470268016, 1.456145928319286],
 [-0.7280740406031267, -0.9006654973796809],
(5.766350749078614,
 [1.875719534565335, 1.2106043595263152],
 [-2.4122140513381365, -1.8502129064842026])
'''
fit_with_x1 = np.array([ 2,  3, 14, 26, 27, 29, 42, 45, 46, 48, 49, 50, 58, 60, 62, 64, 71,
         74, 75, 79, 80, 84, 85, 86, 88, 90, 92])
fit_with_x2 = np.array([ 25,  35,  36,  41,  45,  50,  60,  64,  74,  77,  84,  86,  87,
          90,  91,  99, 101, 102, 107, 108, 111, 113, 115, 118, 119, 139])

##### Random select 10 waves for each pop

In [None]:
# 1.263, 1.365, 1.422, 1.552, 1.601, 1.749, 1.875, 1.965, 2.339, 2.522
idx_1 = [23, 2, 77, 3, 19 ,0 , 24, 22, 59, 101]
TRAI_1 = [13345, 2751, 63678, 2876, 8716, 425, 13697, 12594, 49370, 81608]

# 1.288, 1.365, 1.413, 1.552, 1.676, 1.785, 1.899, 1.975, 2.149, 2.341
idx_2 = [60, 5, 9, 21, 8, 38, 0, 109, 3, 44]
TRAI_2 = [52208, 3851, 4720, 11113, 4701, 22311, 425, 86575, 2876, 26988]

##### Select 5 waves for each pop with same amplitude

In [488]:
# Comparied with same amplitude
idx_same_amp_1 = [56, 41, 39, 62, 45]
TRAI_same_amp_1 = [104771, 84019, 82239, 114468, 88835]
idx_same_amp_2 = [136, 91, 139, 118, 61]
TRAI_same_amp_2 = [4078227, 121368, 4114923, 168997, 74951]

#### Al alloy

In [None]:
'''Al alloy
x1_lim=[pow(10, 2.6), pow(10, 3.6)], x2_lim=[pow(10, 1.6), float('inf')], plot_lim=[350, 20]
(9.799479289991396,
 [1.4643657514207225, 1.038643211760153],
 [-2.622637801482029, -0.29024759045676185],
(5.659182491899722,
 [1.7385373218375684, 1.537111255283656],
 [-2.101862723802673, -2.4298374025456746])
'''
fit_with_x1 = np.array([  0,   1,   3,   4,   9,  20,  27,  31,  32,  37,  41,  43,  44,
              46,  55,  56,  58,  63,  65,  68,  72,  76,  79,  81,  85,  86,
              87,  88,  90,  92,  93,  96,  97,  98, 101, 106, 108, 113, 114,
              115, 119, 128, 130])
fit_with_x2 = np.array([  1,  11,  21,  23,  25,  28,  30,  31,  34,  36,  38,  41,  43,
          44,  45,  46,  47,  48,  49,  51,  53,  54,  55,  56,  57,  59,
          63,  65,  67,  69,  70,  71,  72,  73,  75,  78,  79,  80,  81,
          84,  85,  86,  87,  88,  89,  90,  92,  94,  95,  97,  98,  99,
         101, 102, 103, 105, 106, 109, 110, 111, 112, 113, 114, 115, 116,
         117, 118, 119, 120, 121, 124, 125, 127, 129, 131, 132, 134, 135,
         139, 141, 147, 149, 150, 151, 152, 154, 157, 160, 161, 162, 163,
         165, 166, 167, 168, 169, 170, 171, 172, 176, 177, 181, 187, 188,
         190, 191, 195, 196, 197, 198, 201, 203, 204, 205, 207, 208, 210,
         212, 213, 214, 215, 216, 218, 219, 220, 224, 225, 227, 228, 229,
         231, 233, 236, 237, 238, 240, 241, 242, 244, 248, 250, 252, 255,
         258, 259, 261, 262, 263, 266, 270, 271, 273, 274, 276, 277, 278,
         279, 282, 285, 286, 287, 294, 296, 298, 299, 300, 301, 303, 304,
         305, 307, 308, 312, 315, 318, 319, 321, 322, 327, 330, 332, 335,
         336, 337, 338, 340, 341, 342, 343, 348, 350, 352, 354, 355, 356,
         357, 360, 361, 362, 372, 373, 382, 383, 384, 391, 393, 399, 400,
         401, 402, 405, 408, 410, 411, 412, 415, 418, 419, 422, 424, 429,
         431, 433, 434, 437, 439, 440, 442, 445, 447, 449, 451, 455])

##### Select 5 waves for each pop with same amplitude

In [None]:
# 6016_CR_1
# Comparied with same amplitude
idx_select_2 = [32, 39, 156, 17, 20]
TRAI_select_2 = [8381, 8961, 15592, 7295, 7402]

idx_select_1 = [51, 72, 71, 9, 66]
TRAI_select_1 = [5350, 7987, 7963, 785, 7819]

### Read

In [None]:
def sqlite_read(path):
    """
    python读取sqlite数据库文件
    """
    mydb = sqlite3.connect(path)                # 链接数据库
    mydb.text_factory = lambda x: str(x, 'gbk', 'ignore')
    cur = mydb.cursor()                         # 创建游标cur来执行SQL语句

    # 获取表名
    cur.execute("SELECT name FROM sqlite_master WHERE type='table'")
    Tables = cur.fetchall()                     # Tables 为元组列表
#     print(Tables)

#     i = 0
#     while True:
#         try:
#             tbl_name = Tables[i][0]                     # 获取第一个表名
#             print(tbl_name)
#         except:
#             break
#         # 获取表的列名
#         cur.execute("SELECT * FROM {}".format(tbl_name))
#         col_name_list = [tuple[0] for tuple in cur.description]
#         pprint.pprint(col_name_list)
#         i += 1

    # 获取表结构的所有信息
    cur.execute("SELECT * FROM {}".format(Tables[3][0]))
    res = cur.fetchall()
#     pprint.pprint(cur.fetchall()[-1][1])
    return int(res[-2][1]), int(res[-1][1])

In [None]:
sqlite_read(path_pri)

In [None]:
conn.execute("PRAGMA table_info(view_tr_data)").fetchall()