In [2]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import math
import time
from sklearn.decomposition import PCA
from sklearn.cluster import KMeans


%matplotlib qt5
plt.rcParams['xtick.direction'] = 'in'
plt.rcParams['ytick.direction'] = 'in'

In [25]:
def plot(energy, inter, mid, edgecolor, idx = 0):
    x = np.array([])
    # 初始化横坐标
    for i in inter:
        if i != 0:
            x = np.append(x, np.linspace(i, i * 10, 8, endpoint=False))
        else:
            x = np.append(x, np.linspace(i, 1, 8, endpoint=False))
    
    # 初始化纵坐标
    y = np.zeros(x.shape[0])
    for i in energy:
        while True:
            if x[idx] <= i < x[idx + 1]:
                y[idx] += 1
                break
            idx += 1
    
    # 对横坐标作进一步筛选，计算概率分布值
    x, y = x[y != 0], y[y != 0]
    xx = np.zeros(x.shape[0])
    yy = y / sum(y)
    
    # 取区间终点作为该段的横坐标
    for idx in range(len(x) - 1):
        xx[idx] = (x[idx] + x[idx + 1]) / 2
    xx[-1] = x[-1]
    
    # 计算分段区间长度，从而求得概率密度值
    interval = []
    for i, j in enumerate(mid):
        try:
            num = len(np.intersect1d(np.where(inter[i] <= xx)[0], np.where(xx < inter[i + 1])[0]))
            interval.extend([j] * num)
        except IndexError:
            num = len(np.where(inter[i] <= xx)[0])
            interval.extend([j] * num)
    yy = yy / np.array(interval)
    
    # 取对数变换为线性关系
    xx = np.log10(xx)
    yy = np.log10(yy)
    plt.scatter(xx, yy, edgecolors=edgecolor)
    return xx

In [26]:
if __name__ == "__main__":
    features_path = r'C:\Users\Yuan\Desktop\pri_database.txt'
    label_path = r'C:\Users\Yuan\Desktop\label.txt'

    # Amp,RiseT,Dur,Eny,RMS,Counts
    with open(features_path, 'r') as f:
        feature = np.array([i.split(',')[6:-4] for i in f.readlines()[1:]])
    feature = feature.astype(np.float32)

    with open(label_path, 'r') as f:
        label = np.array([i.strip() for i in f.readlines()[1:]])
    label = label.astype(np.float32).reshape(-1, 1)
    label[np.where(label == 2)] = 0
    ext = np.zeros([feature.shape[0], 1])
    ext[np.where(label == 0)[0].tolist()] = 1
    label = np.concatenate((label, ext), axis=1)

    feature_idx = [0, 2, 3]
    interz = [[pow(10, i) for i in range(1, 5)],
              [pow(10, i) for i in range(4)],
              [0] + [pow(10, i) for i in range(6)]]
    midz = [[0.125 * pow(10, i) for i in range(2, 6)],
            [0.125 * pow(10, i) for i in range(1, 5)],
            [0.125 * pow(10, i) for i in range(7)]]
    xlabelz = ['Amplitude(μV)', 'Duration(μs)', 'Energy(aJ)']
    ylabelz = ['PDF(A)', 'PDF(D)', 'PDF(E)']
    cls_1 = label[:, 0] == 1
    cls_2 = label[:, 1] == 1

    fig = plt.figure()
    for i, [idx, inter, mid, xlabel, ylabel
            ] in enumerate(zip(feature_idx, interz, midz, xlabelz, ylabelz)):
        ax = fig.add_subplot(321 + i)
        tmp = feature[:, idx]
        tmp_1, tmp_2 = sorted(tmp[cls_1]), sorted(tmp[cls_2])
        #     xx = plot(sorted(energy), inter, mid, 'blue')
        xx = plot(tmp_1, inter, mid, 'purple')
        xx = plot(tmp_2, inter, mid, 'g')
        ax.set_xlabel(xlabel)
        ax.set_ylabel(ylabel)

    cor_idx = [[0, 3], [2, 0], [2, 3]]
    for idx, [i, j] in enumerate(cor_idx):
        ax = fig.add_subplot(321 + idx + 3)
        cor_x = np.log10(feature[:, i])
        cor_y = np.log10(feature[:, j])
        cor_x1, cor_x2 = sorted(cor_x[cls_1]), sorted(cor_x[cls_2])
        cor_y1, cor_y2 = sorted(cor_y[cls_1]), sorted(cor_y[cls_2])
        ax.scatter(cor_x1, cor_y1, edgecolors='purple')
        ax.scatter(cor_x2, cor_y2, edgecolors='g')
        ax.set_xlabel(xlabelz[max(0, i - 1)])
        ax.set_ylabel(xlabelz[max(0, j - 1)])

    plt.tight_layout()

In [None]:
# xx[-1]
# 9, 5, 4, 2, 1, 0.8, 0.75, 0.5, 0.5
base = np.array([9, 14, 18, 20, 21, 21.8, 22.55, 23.05, 23.55])
tick_1 = base + 0
tick_2 = base + tick_1[-1]
tick_3 = base + tick_2[-1]
tick_4 = base + tick_3[-1]
tick_5 = base + tick_4[-1]
tick_6 = base + tick_5[-1]
x_tick = np.concatenate((tick_1, tick_2, tick_3, tick_4, tick_5, tick_6))

## PCA

In [None]:
pca = PCA(n_components=5)
newData = pca.fit_transform(feature)

In [None]:
pca.components_.shape

## Backbone

In [None]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import math
import time
import argparse
import torch
from sklearn.decomposition import PCA
from sklearn.cluster import KMeans
from sklearn.model_selection import StratifiedKFold, train_test_split
from torch.utils.data import DataLoader, Dataset
import torch.nn.functional as F
from torch.nn import Module

In [None]:
def metric(logit, truth, threshold=0.5):
    batch_size, num_class = logit.shape

    with torch.no_grad():
        logit = logit.view(batch_size, num_class, -1)
        truth = truth.view(batch_size, num_class, -1)

        probability = torch.sigmoid(logit)
        p = (probability > threshold).float()
        t = (truth > 0.5).float()

        tp = ((p + t) == 2).float()  # True positives
        tn = ((p + t) == 0).float()  # True negatives
        # 各个类别预测正确的正样本、负样本数目
        tp = tp.sum(dim=0)
        tn = tn.sum(dim=0)
        num_pos = t.sum(dim=0)
        num_neg = batch_size - num_pos
        # 预测正确的正样本和负样本的数目
        tp = tp.data.cpu().numpy()
        tn = tn.data.cpu().numpy()
        # 正样本、负样本的数目
        num_pos = num_pos.data.cpu().numpy()
        num_neg = num_neg.data.cpu().numpy()

        # tp = np.nan_to_num(tp / (num_pos + 1e-12), 0)
        # tn = np.nan_to_num(tn / (num_neg + 1e-12), 0)

        # tp = list(tp)
        # num_pos = list(num_pos)

    return tn, tp, num_neg, num_pos


class Meter:
    '''A meter to keep track of iou and dice scores throughout an epoch'''
    def __init__(self):
        self.base_threshold = 0.5
        self.true_negative = []
        self.true_poisitive = []
        self.number_negative = []
        self.number_positive = []

    def update(self, targets, outputs):
        tn, tp, num_neg, num_pos = metric(outputs, targets, self.base_threshold)
        self.true_negative.append(tn)
        self.true_poisitive.append(tp)
        self.number_negative.append(num_neg)
        self.number_positive.append(num_pos)

    def get_metrics(self):
        # 各类预测正确的样本数目，样本总数目
        class_tn = np.sum(np.array(self.true_negative), axis=0)
        class_tp = np.sum(np.array(self.true_poisitive), axis=0)
        class_num_neg = np.sum(np.array(self.number_negative), axis=0)
        class_num_pos = np.sum(np.array(self.number_positive), axis=0)
        # 预测正确的样本的总数目，样本总数目
        tn = np.sum(self.true_negative)
        tp = np.sum(self.true_poisitive)
        num_neg = np.sum(self.number_negative)
        num_pos = np.sum(self.number_positive)
        # 各类的正负样本的准确率和总的准确率
        class_neg_accuracy = class_tn / class_num_neg
        class_pos_accuracy = class_tp / class_num_pos
        class_accuracy = (class_tn + class_tp) / (class_num_neg + class_num_pos)
        # 正负样本各自的准确率和总的准确率
        neg_accuracy = tn / (num_neg + 1e-12)
        pos_accuracy = tp / (num_pos + 1e-12)
        accuracy = (tn + tp) / (num_neg + num_pos)

        return class_neg_accuracy, class_pos_accuracy, class_accuracy, neg_accuracy, pos_accuracy, accuracy

In [None]:
class Classify_model(torch.nn.Module):
    def __init__(self, layer, training=True):
        super(Fit_model,self).__init__()
        self.linear1 = torch.nn.Linear(layer[0],layer[1])
        self.relu = torch.nn.ReLU()
        self.linear2 = torch.nn.Linear(layer[1],layer[2])
        self.linear3 = torch.nn.Linear(layer[2],1)
        self.criterion = torch.nn.MSELoss()
        self.sigmoid = torch.nn.Sigmoid() 
        self.opt = torch.optim.SGD(self.parameters(),lr=0.0001)
        self.training = training
    def forward(self, input):
        y = self.linear1(input)
        y = self.relu(y)
        y = F.dropout(y, 0.5, training=self.training)
        y = self.linear2(y)
        y = self.sigmoid(y)
        y = self.linear3(y)
        y = self.sigmoid(y)
        return y 

In [None]:
class SteelClassDataset(Dataset):
    def __init__(self, dataset):
        super(SteelClassDataset, self).__init__()
        self.feature = dataset[0]
        self.label = dataset[1]

    def __getitem__(self, idx):
        x = self.feature[idx]
        y = self.label[idx]
        return x, y

    def __len__(self):
        return len(self.label)

In [None]:
def classify_provider(features_path, label_path, n_splits, batch_size,num_workers):
    # Time,Thr,Amp,RiseT,Dur,Eny,RMS,Counts
    with open(features_path, 'r') as f:
        feature = np.array([i.split(',')[6:-4] for i in f.readlines()[1:]])
    # feature = np.delete(feature, [1, 2], 1).astype(np.float32)
    feature = torch.from_numpy(feature.astype(np.float32))

    with open(label_path, 'r') as f:
        label = np.array([i.strip() for i in f.readlines()[1:]])
    label = label.astype(np.float32)
    label[np.where(label == 2)] = 0
    label = torch.unsqueeze(torch.from_numpy(label), dim=1)
    
    train_dfs = list()
    val_dfs = list()
    if n_splits != 1:
        skf = StratifiedKFold(n_splits=n_splits, shuffle=True, random_state=69)
        for train_df_index, val_df_index in skf.split(feature, label):
            train_dfs.append([feature[train_df_index], label[train_df_index]])
            val_dfs.append([feature[val_df_index], label[val_df_index]])
    else:
        df_temp = train_test_split(feature,label, test_size=0.2, stratify=label, random_state=69)
        train_dfs.append([df_temp[0], df_temp[2]])
        val_dfs.append([df_temp[1], df_temp[3]])

    dataloaders = list()
    for df_index, (train_df, val_df) in enumerate(zip(train_dfs, val_dfs)):
        train_dataset = SteelClassDataset(train_df)
        val_dataset = SteelClassDataset(val_df)
        train_dataloader = DataLoader(train_dataset,
                                      batch_size=batch_size,
                                      num_workers=num_workers,
                                      pin_memory=True,
                                      shuffle=True)
        val_dataloader = DataLoader(val_dataset,
                                    batch_size=batch_size,
                                    num_workers=num_workers,
                                    pin_memory=True,
                                    shuffle=False)
        dataloaders.append([train_dataloader, val_dataloader])
    return dataloaders


class Solver():
    def __init__(self, model):
        self.model = model
        self.device = torch.device(
            'cuda' if torch.cuda.is_available() else 'cpu')

    def forward(self, images):
        images = images.to(self.device)
        outputs = self.model(images)
        return outputs

    def cal_loss(self, targets, predicts, criterion):
        targets = targets.to(self.device)
        return criterion(predicts, targets)

    def backword(self, optimizer, loss):
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()

    def save_checkpoint(self, save_path, state, is_best):
        torch.save(state, save_path)
        if is_best:
            print('Saving Best Model.')
            save_best_path = save_path.replace('.pth', '_best.pth')
            shutil.copyfile(save_path, save_best_path)

    def load_checkpoint(self, load_path):
        if os.path.isfile(load_path):
            checkpoint = torch.load(load_path, map_location='cpu')
            # self.model.module.load_state_dict(checkpoint['state_dict'])
            print('Successfully Loaded from %s' % (load_path))
            return self.model
        else:
            raise FileNotFoundError(
                "Can not find weight file in {}".format(load_path))

In [None]:
class TrainVal():
    def __init__(self, config, fold):
        self.model = Classify_model(config.layer, training=True)
        if torch.cuda.is_available():
            self.model = torch.nn.DataParallel(self.model)
            self.model = self.model.cuda()
        
        self.lr = config.lr
        self.weight_decay = config.weight_decay
        self.epoch = config.epoch
        self.fold = fold

        self.solver = Solver(self.model)
        self.criterion = torch.nn.MSELoss()
        
        self.model_path = config.save_path
        if not os.path.exists(self.model_path):
            os.makedirs(self.model_path)
            
    def train(self, train_loader, valid_loader):
        optimizer = optim.Adam(self.model.module.parameters(), self.lr, weight_decay=self.weight_decay)
        lr_scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, self.epoch+10)
        global_step = 0

        for epoch in range(self.epoch):
            epoch += 1
            epoch_loss = 0
            self.model.train(True)

            tbar = tqdm.tqdm(train_loader)
            for i, (x, labels) in enumerate(tbar):
                labels_predict = self.solver.forward(x)
                loss = self.solver.cal_loss(labels, labels_predict, self.criterion)
                epoch_loss += loss.item()
                self.solver.backword(optimizer, loss)
                
                params_groups_lr = str()
                for group_ind, param_group in enumerate(optimizer.param_groups):
                    params_groups_lr = params_groups_lr + 'params_group_%d' % (group_ind) + ': %.12f, ' % (param_group['lr'])
                descript = "Fold: %d, Train Loss: %.7f, lr: %s" % (self.fold, loss.item(), params_groups_lr)
                tbar.set_description(desc=descript)
            
            lr_scheduler.step()
            global_step += len(train_loader)

            print('Finish Epoch [%d/%d], Average Loss: %.7f' % (epoch, self.epoch, epoch_loss/len(tbar)))
            
            class_neg_accuracy, class_pos_accuracy, class_accuracy, neg_accuracy, pos_accuracy, accuracy, loss_valid = \
                self.validation(valid_loader)

            if accuracy > self.max_accuracy_valid: 
                is_best = True
                self.max_accuracy_valid = accuracy
            else:
                is_best = False
            
            state = {
                'epoch': epoch,
                'state_dict': self.model.module.state_dict(),
                'max_accuracy_valid': self.max_accuracy_valid,
            }
            
            self.solver.save_checkpoint(os.path.join(self.model_path, '%s_classify_fold%d.pth' % (self.model_name, self.fold)), state, is_best)
            self.writer.add_scalar('valid_loss', loss_valid, epoch)
            self.writer.add_scalar('valid_accuracy', accuracy, epoch)
            self.writer.add_scalar('valid_class_0_accuracy', class_accuracy[0], epoch)
            self.writer.add_scalar('valid_class_1_accuracy', class_accuracy[1], epoch)
            
    def validation(self, valid_loader):
        self.model.eval()
        meter = Meter()
        tbar = tqdm.tqdm(valid_loader)
        loss_sum = 0
        
        with torch.no_grad(): 
            for i, (x, labels) in enumerate(tbar):
                labels_predict = self.solver.forward(x)
                loss = self.solver.cal_loss(labels, labels_predict, self.criterion)
                loss_sum += loss.item()

                meter.update(labels, labels_predict.cpu())

                descript = "Val Loss: {:.7f}".format(loss.item())
                tbar.set_description(desc=descript)
        loss_mean = loss_sum / len(tbar)
        
        class_neg_accuracy, class_pos_accuracy, class_accuracy, neg_accuracy, pos_accuracy, accuracy = meter.get_metrics()
        print("Class_0_accuracy: %0.4f | Class_1_accuracy: %0.4f | Negative accuracy: %0.4f | positive accuracy: %0.4f | accuracy: %0.4f" %
              (class_accuracy[0], class_accuracy[1], neg_accuracy, pos_accuracy, accuracy))
        return class_neg_accuracy, class_pos_accuracy, class_accuracy, neg_accuracy, pos_accuracy, accuracy, loss_mean

In [None]:
if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument('--features_path', type=str, default=r'C:\Users\Yuan\Desktop\pri_database.txt')
    parser.add_argument('--label_path', type=str, default=r'C:\Users\Yuan\Desktop\label.txt')
    parser.add_argument('--save_path', type=str, default='./checkpoints')
    parser.add_argument('--class_num', type=int, default=4)
    parser.add_argument('--num_workers', type=int, default=8)
    parser.add_argument('--lr', type=float, default=5e-5, help='init lr')
    parser.add_argument('--weight_decay', type=float, default=0, help='weight_decay in optimizer')
    parser.add_argument('--n_splits', type=int, default=5, help='n_splits_fold')
    parser.add_argument('--batch_size', type=int, default=24, help='batch size')
    parser.add_argument('--epoch', type=int, default=30, help='epoch')
    parser.add_argument("--layer", type=list, default=[8, 100, 80])
    config = parser.parse_args()

    dataloaders = classify_provider(features_path, label_path, n_splits, batch_size, num_workers)
    for fold_index, [train_loader, valid_loader] in enumerate(dataloaders):
        train_val = TrainVal(config, fold_index)
        train_val.train(train_loader, valid_loader)