In [1]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import math
import time
from sklearn.decomposition import PCA
from sklearn.cluster import KMeans
from tqdm import tqdm
from scipy.fftpack import fft
from matplotlib.pylab import mpl
import csv


%matplotlib qt5
plt.rcParams['xtick.direction'] = 'in'
plt.rcParams['ytick.direction'] = 'in'
mpl.rcParams['axes.unicode_minus'] = False  #显示负号
os.getcwd()

'C:\\Users\\Yuan\\Desktop\\AE'

## Feature curve

In [2]:
def cal_linear(energy, inter, mid, interval_num, idx = 0):
    # 初始化横坐标
    x = np.array([])
    for i in inter:
        if i != 0:
            x = np.append(x, np.linspace(i, i * 10, interval_num, endpoint=False))
        else:
            x = np.append(x, np.linspace(i, 1, interval_num, endpoint=False))
    
    # 初始化纵坐标
    y = np.zeros(x.shape[0])
    for i in energy:
#         print(idx, x, y, i)
        while True:
            try:
                if x[idx] <= i < x[idx + 1]:
                    y[idx] += 1
                    break
            except IndexError:
                if x[idx] <= i:
                    y[idx] += 1
                    break
            idx += 1
    
    # 对横坐标作进一步筛选，计算概率分布值
    x, y = x[y != 0], y[y != 0]
    xx = np.zeros(x.shape[0])
    yy = y / sum(y)
    
    # 取区间终点作为该段的横坐标
    for idx in range(len(x) - 1):
        xx[idx] = (x[idx] + x[idx + 1]) / 2
    xx[-1] = x[-1]
    
    # 计算分段区间长度，从而求得概率密度值
    interval = []
    for i, j in enumerate(mid):
        try:
            num = len(np.intersect1d(np.where(inter[i] <= xx)[0], 
                                     np.where(xx < inter[i + 1])[0]))
            interval.extend([j] * num)
        except IndexError:
            num = len(np.where(inter[i] <= xx)[0])
            interval.extend([j] * num)
    yy = yy / np.array(interval)
#     # 取对数变换为线性关系
#     log_xx = np.log10(xx)
#     log_yy = np.log10(yy)
#     fit = np.polyfit(log_xx, log_yy, 1)
#     alpha = abs(fit[0])
#     fit_x = np.linspace(min(log_xx), max(log_xx), 100)
#     fit_y = np.polyval(fit, fit_x)
    return xx, yy


def cal_PDF(tmp, inter, mid, interval_num, features_path):
    xx, yy = cal_linear(sorted(tmp), inter, mid, interval_num)
#     with open(features_path[:-4] + ' ' + ylabel + '.txt', 'w') as f:
#         f.write('{}, {}\n'.format(xlabel, ylabel))
#         for j in range(xx.shape[0]):
#             f.write('{}, {}\n'.format(xx[j], yy[j]))
    
    ax = fig.add_subplot(331 + i)
    ax.scatter(np.log10(xx), np.log10(yy), edgecolors='blue')
#     ax.plot(fit_x, fit_y, label=r'$\epsilon$={:.2f}'.format(alpha))
    
#     tmp_1, tmp_2 = sorted(tmp[cls_1]), sorted(tmp[cls_2])
#     xx, yy, fit_x, fit_y, alpha = cal_linear(tmp_1, inter, mid, interval_num)
#     ax.scatter(xx, yy, edgecolors='purple')
#     ax.plot(fit_x, fit_y, label=r'$\aepsilon$={:.2f}'.format(alpha))
#     xx, yy, fit_x, fit_y, alpha = cal_linear(tmp_2, inter, mid, interval_num)
#     ax.scatter(xx, yy, edgecolors='g')
#     ax.plot(fit_x, fit_y, label=r'$\epsilon$={:.2f}'.format(alpha))
    
    ax.set_xlabel(xlabel)
    ax.set_ylabel(ylabel)
    ax.legend(loc='upper right')
    

def cal_CCDF(tmp, features_path, cls_1, cls_2):
    tmp_1, tmp_2 = sorted(tmp[cls_1]), sorted(tmp[cls_2])
    N1, N2 = len(tmp_1), len(tmp_2)
    xx_1, xx_2 = [], []
    yy_1, yy_2 = [], []
    for i in range(N1 - 1):
        xx_1.append(np.mean([tmp_1[i], tmp_1[i+1]]))
        yy_1.append((N1 - i + 1) / N1)
    for i in range(N2 - 1):
        xx_2.append(np.mean([tmp_2[i], tmp_2[i+1]]))
        yy_2.append((N2 - i + 1) / N2)
    
    ax = plt.subplot()
    ax.plot(np.log10(xx_1), np.log10(yy_1), markersize=25, color=color_1, label='Class 1')
    ax.plot(np.log10(xx_2), np.log10(yy_2), markersize=25, color=color_2, label='Class 2')
    
    with open(features_path[:-4] + '_1 ' + xlabel[:-4] + '_CCDF.txt', 'w') as f:
        f.write('{}, CCD C(s)\n'.format(xlabel))
        for j in range(len(xx_1)):
            f.write('{}, {}\n'.format(xx_1[j], yy_1[j]))
    with open(features_path[:-4] + '_2 ' + xlabel[:-4] + '_CCDF.txt', 'w') as f:
        f.write('{}, CCD C(s)\n'.format(xlabel))
        for j in range(len(xx_2)):
            f.write('{}, {}\n'.format(xx_2[j], yy_2[j]))


def cal_ML(tmp, N, features_path):
#     tmp_1, tmp_2 = sorted(tmp[cls_1]), sorted(tmp[cls_2])
#     N1, N2 = len(tmp_1), len(tmp_2)
    ML_y1, ML_y2 = [], []
    Error_bar1, Error_bar2 = [] ,[]
    for j in tqdm(range(N)):
        valid_x = sorted(tmp)[j:]
        E0 = valid_x[0]
        Sum = np.sum(np.log(valid_x/E0))
        N_prime = N - j
        alpha = 1 + N_prime / Sum
        error_bar = (alpha - 1) / pow(N_prime, 0.5)
        ML_y1.append(alpha)
        Error_bar1.append(error_bar)
#     for j in tqdm(range(N2)):
#         valid_x = sorted(tmp_1)[j:]
#         N_prime = N - j
#         error_bar = (alpha - 1) / pow(N_prime, 0.5)
#         ML_y2.append(alpha)
#         Error_bar2.append(error_bar)
    
#     with open(features_path[:-4] + ' ' + xlabel[:-4] + '_ML.txt', 'w') as f:
#         f.write('{}, ε, Error bar\n'.format(xlabel))
#         for j in range(len(ML_y1)):
#             f.write('{}, {}, {}\n'.format(sorted(tmp)[j], ML_y1[j], Error_bar1[j]))
    
    ax2 = fig.add_subplot(334 + i)
    ax2.errorbar(np.log10(sorted(tmp)), ML_y1, yerr=Error_bar1, 
                 fmt='o', ecolor='purple', color='purple', 
                 elinewidth=1, capsize=2, ms=5)
#     ax2.errorbar(np.log10(tmp_2), ML_y2, yerr=Error_bar2, 
#                  fmt='o', ecolor='g', color='g', 
#                  elinewidth=1, capsize=2, ms=5)
    ax2.set_ylim(1, 3)
    ax2.set_xlabel(xlabel)
    ax2.set_ylabel(r'$\epsilon$')

    
def cal_correlation(feature):
    cor_idx = [[0, 3], [2, 0], [2, 3]]
    for idx, [i, j] in enumerate(cor_idx):
        ax3 = fig.add_subplot(337 + idx)
#         cor_x = np.log10(feature[:, i]) if i != 3 else np.log10(feature[:, i] * pow(10, 6))
        cor_x = np.log10(feature[:, i])
        cor_y = np.log10(feature[:, j])
#         cor_x1, cor_x2 = cor_x[cls_1], cor_x[cls_2]
#         cor_y1, cor_y2 = cor_y[cls_1], cor_y[cls_2]
#         ax3.scatter(cor_x1, cor_y1, edgecolors='purple')
#         ax3.scatter(cor_x2, cor_y2, edgecolors='g')
        ax3.scatter(cor_x, cor_y, edgecolors='blue')
        ax3.set_xlabel(xlabelz[max(0, i - 2)])
        ax3.set_ylabel(xlabelz[max(0, j - 2)])

In [6]:
if __name__ == "__main__":
    path = r'C:\Users\Yuan\Desktop'
    # E:\data\vallen
    # E:\data\CM-PM-o18-2020.10.17
    # E:\data\CM-4M-o18-2020.10.17-1-60
    # C:\Users\Yuan\Desktop
    os.chdir(path)
    features_path = r'pri_database.txt'
    # Ni-tension test-electrolysis-1-0.01-AE-20201031
    # r'D:\data\3D porous TC4-2mA-compression test-z1-0.001-20200929\3D porous TC4-2mA-compression test-z1-0.001-20200929.txt'
    # r'C:\Users\Yuan\Desktop\pri_database.txt'
    # r'C:\Users\Yuan\Desktop\CM-4M-o18-2020.10.17-1-60.txt'
    # r'E:\data\CM-PM-o18-2020.10.17\CM-PM-o18-2020.10.17.txt'

    label_path = r'C:\Users\Yuan\Desktop\label.txt'

    # Amp,RiseT,Dur,Eny,RMS,Counts
    with open(features_path, 'r') as f:
        feature = np.array([i.split(',')[6:-4] for i in f.readlines()[1:]])
    feature = feature.astype(np.float32)
    
    with open(label_path, 'r') as f:
        label = np.array([i.strip() for i in f.readlines()[1:]])
    label = label.astype(np.float32).reshape(-1, 1)
    label[np.where(label == 2)] = 0
    ext = np.zeros([feature.shape[0], 1])
    ext[np.where(label == 0)[0].tolist()] = 1
    label = np.concatenate((label, ext), axis=1)
    cls_1 = label[:, 0] == 1
    cls_2 = label[:, 1] == 1

    feature_idx = [0, 2, 3]
    N = feature.shape[0]
    interval_num = 6
    interval = 1 / interval_num
    interz = []
    midz = []

    for idx in feature_idx:
#         tmp = feature[:, idx] * pow(10, 6) if idx == 3 else feature[:, idx]
        tmp = feature[:, idx]
        tmp_max = int(max(tmp))
        tmp_min = int(min(tmp))
        if tmp_min <= 0:
            interz.append([0] + [pow(10, i) for i in range(len(str(tmp_max)))])
            midz.append([interval * pow(10, i)
                         for i in range(len(str(tmp_max)) + 1)])
        else:
            interz.append([pow(10, i) for i in range(len(str(tmp_min)) - 1, 
                                                     len(str(tmp_max)))])
            midz.append([interval * pow(10, i) 
                         for i in range(len(str(tmp_min)), 
                                        len(str(tmp_max)) + 1)])

    xlabelz = ['Amplitude(μV)', 'Duration(μs)', 'Energy(aJ)']
    ylabelz = ['PDF(A)', 'PDF(D)', 'PDF(E)']
    color_1 = [255/255, 0/255, 102/255] # red
    color_2 = [0/255, 136/255, 204/255] # blue
    
    fig = plt.figure()
#     for i, [idx, inter, mid, xlabel, ylabel
#             ] in enumerate(zip(feature_idx, interz, midz, xlabelz, ylabelz)):
# #         tmp = feature[:, idx] * pow(10, 6) if idx == 3 else feature[:, idx]
#         tmp = feature[:, idx]
# #         tmp /= min(tmp)
#         cal_PDF(tmp, inter, mid, interval_num, features_path)
# #         cal_ML(tmp, inter, mid, N, interval_num, features_path)
#     cal_correlation(feature)
    xlabel, ylabel = 'Energy(aJ)', 'PDF(E)'
    cal_CCDF(feature[:, 3], path, cls_1, cls_2)
    plt.tight_layout()

  ax = plt.subplot()


## Frequency domain curve

In [None]:
# Import data
path = r'D:\data\3D porous TC4-2mA-compression test-z1-0.001-20200929'
os.chdir(path)
# E:\data\CM-PM-o18-2020.10.17
# E:\data\CM-4M-o18-2020.10.17-1-60
convert_path = path.split('\\')[-1] + '.txt'
ls = os.listdir(path)[1:]
lss = np.array(['_'.join(i.split('_')[1:3]) for i in ls])
file = []
with open(convert_path, "r") as f:
    f.readline()
    valid_idx = np.array(["_".join([i.split(',')[2].strip(), 
                                    i.split(',')[0].strip()]) 
                          for i in f.readlines()])
with open(convert_path, "r") as f:
    f.readline()
    energy = np.array([float(i.split(',')[-3].strip()) for i in f.readlines()])
for i in tqdm(valid_idx):
    file.append(path + '\\' + ls[np.where(lss == i)[0][0]])
# file = np.array([path + '\\' + ls[np.where(lss == i)[0][0]] for i in valid_idx])

In [None]:
# Calculate frequency
freq_max = [[]] * len(file)
thr_V = pow(10, 25 / 20) / pow(10, 3)

for idx, i in enumerate(tqdm(file)):
    with open(i, "r") as f:
        for _ in range(10):
            f.readline()
        t0 = float(f.readline()[14:])
        amp = np.array([float(i.strip("\n")) for i in f.readlines()[1:]])
#     time[idx] = np.linspace(t0, t0 + 0.0000002 * (amp.shape[0] - 1), amp.shape[0])
#     Amp[idx] = amp[:]
    
    valid_wave_idx = np.where(abs(amp) >= thr_V)[0]
    valid_data = amp[valid_wave_idx[0]:(valid_wave_idx[-1] + 1)]

    Ts = 0.0000002
    Fs = 1 / Ts
    N = valid_wave_idx[-1] - valid_wave_idx[0] + 1
    end = Ts * N
    time_label = np.arange(0, end, Ts)
    frq = (np.arange(N) / N) * Fs
    fft_y = fft(valid_data)

    abs_y = np.abs(fft_y)
    normalization = abs_y / N
    half_frq = frq[range(int(N / 2))]
    normalization_half = normalization[range(int(N / 2))]
    freq_max[idx] = half_frq[np.argmax(normalization_half)]

#     # Plot
#     titles = ['Original Waveform', 'Bilateral amplitude spectrum (normalized)', 'Unilateral amplitude spectrum (normalized)']
#     colors = ['purple', 'green', 'blue']
#     x_label = ['Time (s)', 'Freq (Hz)', 'Freq (Hz)']
#     y_label = ['Amplitude (μV)', '|Y(freq)|', '|Y(freq)|']
#     xs = [time_label, frq, half_frq]
#     ys = [valid_data, normalization, normalization_half]

#     for i, [x, y, title, color, xlabel, ylabel] in enumerate(zip(xs, ys, titles, colors, x_label, y_label)):
#         plt.subplot(311 + i)
#         plt.plot(x, y, color)
#         plt.xlabel(xlabel)
#         plt.ylabel(ylabel)
#         plt.title(title, color=color)
#     plt.tight_layout()

In [None]:
All_file = os.listdir(path)[2:]
# time, Amp = [[]] * len(All_file), [[]] * len(All_file)
# for idx, i in enumerate(tqdm(All_file)):
#     with open(i, "r") as f:
#         for _ in range(10):
#             f.readline()
#         t0 = float(f.readline()[14:])
#         amp = max(np.array([float(i.strip("\n")) for i in f.readlines()[1:]]))
#     time = np.append(time, t0)
#     Amp = np.append(Amp, amp)

In [None]:
# # Save the calculated frequency value in CSV format
# file_name = path.split('\\')[-1]
# with open(file_name + '-frequency.csv', 'w') as csvfile:
#     writer = csv.writer(csvfile)
#     writer.writerows([freq_max])

# # Change the frequency file format to TXT
# os.rename(path + '\\' + file_name + '-frequency.csv', 
#           path + '\\' + file_name + '-frequency.txt')
# with open(file_name + '-frequency.txt', 'r') as f:
#     data = f.readlines()[:-1]
# freq_max = [i.split(',') for i in data]
# freq_max = np.array(freq_max[0]).astype('float32')

In [None]:
# Plot the frequency energy map
plt.scatter(energy, freq_max)
plt.xlim(-10, math.ceil(max(energy)))
plt.xlabel('Energy(aJ)')
plt.ylabel('Frequency(Hz)')
plt.title('Frequency-Energy')

# plt.bar(time, Amp)
# plt.xlim(math.floor(min(time)), math.ceil(max(time)))
# plt.xlabel('Time(s)')
# plt.ylabel('Amplitude(μV)')
# plt.title('Time-Amplitude')
plt.savefig(r'C:\Users\Yuan\Desktop\test.png')

In [None]:
# xx[-1]
# 9, 5, 4, 2, 1, 0.8, 0.75, 0.5, 0.5
base = np.array([9, 14, 18, 20, 21, 21.8, 22.55, 23.05, 23.55])
tick_1 = base + 0
tick_2 = base + tick_1[-1]
tick_3 = base + tick_2[-1]
tick_4 = base + tick_3[-1]
tick_5 = base + tick_4[-1]
tick_6 = base + tick_5[-1]
x_tick = np.concatenate((tick_1, tick_2, tick_3, tick_4, tick_5, tick_6))

## Time domain curve