In [1]:
import numpy as np
import urllib.request
import os
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
import sklearn
import joblib
import hdf5storage
import itertools
import h5py
from torch.utils.data import DataLoader, Dataset
import seaborn as sns
from tsmoothie.utils_func import sim_randomwalk
from tsmoothie.smoother import LowessSmoother
from tsmoothie.utils_func import sim_seasonal_data
from tsmoothie.smoother import DecomposeSmoother
from sklearn import preprocessing
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, accuracy_score, f1_score, recall_score
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
from sklearn.feature_selection import RFE
from sklearn.model_selection import GridSearchCV
from sklearn.preprocessing import StandardScaler
from data_cul import  merge_data
import joblib

plt.rcParams['font.sans-serif']=['SimHei']  # 用来正常显示中文标签
plt.rcParams['axes.unicode_minus']=False  # 用来正常显示负号

In [2]:
def smooth(data):
    # operate smoothing
    #参数整定
    smoother = DecomposeSmoother(smooth_type='lowess', periods=50,
                                smooth_fraction=0.2)
    smoother.smooth(data)
    low, up = smoother.get_intervals('sigma_interval')
    return smoother.smooth_data # 处理后的数据

def cul_data(path):
    """
    cul_data处理

    path:  数据所在的文件路径

    return: 返回值是处理后的结果
    """
    num = int(path[-5])  # 读取文件命名
    f = hdf5storage.loadmat(path)
    if num == 0:
        element_name = 'MM_multi_MA_'
    else:
        element_name = 'MM_multi_MA_0'

    src_data = np.array(f[element_name + str(num)])

    ## 阵元归一
    divisor = src_data[0,:]

    divisor = divisor[np.newaxis,:]
    # print(divisor.shape)

    temp_data = src_data / divisor

    num_cols = temp_data.shape[1]
    k = 1000
    random_cols = np.random.choice(num_cols, k, replace=False)
    sm_data = temp_data[:,random_cols]
    # return sm_data
    return temp_data

def add_label(data, label):
    '''
    add_label:

    data:原始数据

    label: 标签
    
    return: 
    '''
    news_row = np.array([label] *data.shape[1])

    arr = np.vstack((data, news_row))
    
    return arr

def handle_data():
    '''
        return: 整个数据集 滤波后的数据
    '''
    path = ['../data/val_All_MM_MA_0.mat', '../data/val_All_MM_MA_04.mat', '../data/val_All_MM_MA_07.mat']

    k = 0
    all_data_test = []
    for i in path:
        temp_data = cul_data(i)
        # temp_data = smooth(temp_data)
        all_data_test.append(add_label(temp_data, k))
        k = k+1
    return all_data_test

def merge_data(data):
    '''
    data: 传入列表数据
    return: 合并后的数组 arr
    '''
    k = 10000  ## 取前一万个数据

    num_cols = data[0].shape[1]

    random_cols = np.random.choice(num_cols, k, replace=False)
    temp_data = data[0][:,random_cols]

    for i in range(1, len(data)):
        num_cols = data[i].shape[1]

        random_cols = np.random.choice(num_cols, k, replace=False)
        merge_arr = np.concatenate((temp_data, data[i][:,random_cols]), axis=1)
        temp_data = merge_arr

    return temp_data


In [3]:
temp_val_data = handle_data()

all_data = merge_data(temp_val_data)

Labels = all_data[-1,:].T

# all_Features = all_data[:-1, :]
all_Features = smooth(all_data[:-1, :1000])

i = 1000
while i < all_data.shape[1] :
    temp_Features = smooth(all_data[:-1,i:i+1000])
    all_Features = np.concatenate((all_Features, temp_Features),axis=1)
    i += 1000

In [9]:
Features = all_Features.T

X_train, X_test, y_train, y_test = train_test_split(Features, Labels, test_size=0.2, random_state=42)

rcf_test = joblib.load("./model/rfc_3_model-0.942.model")

y_pred = rcf_test.predict(X_test)

accuracy = np.mean(y_pred == y_test)

print("Accuracy:{:.3f}".format(accuracy*100))

Accuracy:59.717


In [5]:
conf_matrix = confusion_matrix(y_test, y_pred)

conf_matrix

array([[1989,    9,    4],
       [1708,   36,  252],
       [   4,  423, 1575]], dtype=int64)