## 导入库类和数据

In [169]:
import numpy as np
import matplotlib.pyplot as plt
from scipy.io import loadmat
import spectral
from sklearn import preprocessing
from sklearn.cluster import KMeans 
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn import metrics
from xgboost import XGBClassifier
import lightgbm as lgb
import joblib

In [2]:
input_image = loadmat('H:\data\hyp_data.mat')['hyp_data']
output_image = loadmat('H:\data\X.mat')['X']

#### 处理第一维参数，提取信息，并进行kmeans， 计算欧式距离， 生成各像素空间特征

In [3]:
# 可以提取出高光谱数据的一个channel的数据 144*144
def split_one_channel(A ,channel):
    d = [0]*144
    for i in range(144):
        d[i] = input_image[i][:,channel-1]
    e = np.vstack((d[i] for i in range(144)))
    return e  
# 参数说明
# channel 通道
# A 高光谱立方块

In [4]:
# 依据patch大小（Filter 大小）对矩阵进行镜像处理
def mirror_matrix(A, fil):
    pad = int((fil-1)/2)
    for i in range(1,pad+1): # up_down and left_right
        up_down = np.array(A[2*i-1,:])
        Matrix_up_down = np.vstack([up_down,A,up_down])
        left_right = np.array(Matrix_up_down[:,2*i-1]).reshape(len(Matrix_up_down),1)
        Matrix_left_right = np.hstack([left_right,Matrix_up_down,left_right])
        A = Matrix_left_right
    return A

# 参数说明
# n   array size
# fil  filter size5*5
# pad = (fil-1)/2  padding大小

In [5]:
# 将镜像处理后的矩阵进行向量化操作
def split_array(A,fil):
    s = []
    length = len(A)-fil+1
    for i in range(length): # row
        for j in range(length): # col
            temp = [0]* fil
            for z in range(fil):
                temp[z] = A[i+z,j:j+fil]
            com = np.concatenate([item for item in temp ])
            s.append(com)
    return s 
# 参数说明
# A 镜像处理后的矩阵
# fil  filter size

In [7]:
# 处理first channel
origin_matrix_one = split_one_channel(input_image,1)  # 取出第1个channel的信息
mirror_matrix_one = mirror_matrix(origin_matrix_one,3) # 对第1个channel进行mirror操作
split_list_one = split_array(mirror_matrix_one,3) # 将第一个channel进行向量化提取操作
split_array_one = np.array(split_list_one) # 将list转化为numpy array


In [23]:
# kmeans 操作 取K为5
kmeans = KMeans(n_clusters=5, random_state=0).fit(split_array_one)
len(kmeans.labels_)
K_centroid = kmeans.cluster_centers_

In [24]:
# 计算欧几里得距离
def Euclidean(v1,v2):
    return np.linalg.norm(v1-v2)

vec1 = K_centroid[0]
vec2 = split_array_one[0]
print(Euclidean(vec1,vec2))

260.2241105677054


In [11]:
# 计算单个patch的空间信息
def patch_spitial(v1, K_centroid):
    distance_array = np.array([Euclidean(v1, centroid) for centroid in K_centroid]) #距离matrix
    sums = np.sum(distance_array)
    average = np.average(distance_array)
    spitial_one_patch = np.maximum(0,average - distance_array) #依据公式求空间信息
    return spitial_one_patch
patch_spitial(vec2 ,K_centroid )

array([271.73206528,   0.        ,   0.        ,  14.00930299,
        27.64327152])

In [15]:
# 对整个channel求解
channel_spitial_list = list()
for i in range(len(split_list_one)):
    channel_spitial_list.append(patch_spitial(split_array_one[i], K_centroid))
channel_spitial_array = np.array(channel_spitial_list)    

In [51]:
# 对一个channel的求解进行函数化
def channel_spitial(channel_matrix, K_centroid):
    channel_spitial_list = list()
    for i in range(len(channel_matrix)):
        channel_spitial_list.append(patch_spitial(channel_matrix[i], K_centroid))
    return np.array(channel_spitial_list)
channel_spitial(split_list_one, K_centroid).shape

(20736, 5)

In [47]:
channel_spitial_array.shape

(20736, 5)

# 对整个矩阵求解

In [19]:
# 计算144个channel的信息
whole_origin_matrix_array = np.array([split_one_channel(input_image,i) for i in range(1,201)])
whole_mirror_matrix_array = np.array([mirror_matrix(whole_origin_matrix_array[i],3) for i in range(200)])
whole_split_matrix_array = np.array([split_array(whole_mirror_matrix_array[i],3) for i in range(200)])


In [22]:
whole_split_matrix_array.shape   # 200维的长度

(200, 20736, 9)

In [25]:
whole_kmeans = [KMeans(n_clusters=5, random_state=0).fit(whole_split_matrix_array[i]) for i in range(200)]
whole_K_centroid = [whole_kmeans[i].cluster_centers_ for i in range(200) ]

In [53]:
np.array(whole_K_centroid).shape
len(whole_K_centroid)

200

In [54]:
whole_temp = list()
for i in range(len(whole_K_centroid)):
    whole_temp.append(channel_spitial(whole_split_matrix_array[i],whole_K_centroid[i]))
whole_spitial = np.array(whole_temp)
whole_spitial.shape

(200, 20736, 5)

# 下边为测试提取到信息的代码  vstack合并列数不变,hstack合并行数不变

In [59]:
np.save("whole_spitial.npy",whole_spitial)

In [61]:
s = np.load("whole_spitial.npy")

In [62]:
s.shape

(200, 20736, 5)

In [107]:
s1 = np.hstack([s[i]for i in range(200)]) # sample
s1.shape 

(20736, 1000)

In [116]:
s2 = output_image.reshape(20736,1)   # label

In [119]:
sample_lb = np.hstack([s1,s2])

In [121]:
sample_lb.shape   # sample with lable

(20736, 1001)

In [138]:
sample_lab_fix_list = list()
for i in range(len(sample_lb)):
    if sample_lb[i,-1] != 0:
        sample_lab_fix_list.append(sample_lb[i,:])
sample_lab_fix = np.array(sample_lab_fix_list)

In [140]:
sample_lab_fix.shape    # 提取出需要分类的标签

(10366, 1001)

#### 分类预处理

In [157]:
## 归一化
data_D = preprocessing.StandardScaler().fit_transform(sample_lab_fix[:,:-1])
data_L = sample_lab_fix[:,-1]

## 存储
new = np.column_stack((data_D, data_L))
new_ = pd.DataFrame(new)
new_.to_csv('H:\data\specr_spi.csv',header=False,index=False)

In [173]:
# 分类
data = pd.read_csv('H:\data\specr_spi.csv',header=None)
data = data.as_matrix()
data_D = data[:,:-1]
data_L = data[:,-1]
data_train, data_test, label_train, label_test = train_test_split(data_D,data_L,test_size=0.9)

#clf = lgb.LGBMClassifier()
clf = SVC()
# clf = XGBClassifier(max_depth=20,learning_rate=0.05,n_estimators=500,silent=False,
#                     objective='multi:softmax',
#                     min_child_weight=1,
#                     gamma=0.,
#                     scale_pos_weight=1)
clf.fit(data_train,label_train)
pred = clf.predict(data_test)
accuracy = metrics.accuracy_score(label_test, pred)*100
print(accuracy)
joblib.dump(clf,"specr_spi.m")

70.64308681672026


['specr_spi.m']