# import包

In [1]:
import pandas as pd
import numpy as np
from sklearn.decomposition import PCA, FastICA, TruncatedSVD, NMF
from sklearn.random_projection import GaussianRandomProjection, SparseRandomProjection
from sklearn.cluster import KMeans, SpectralClustering



# 构造数据

In [2]:
df = pd.DataFrame({
    "feature_1":np.random.randint(1,10,size=100),
    "feature_2":np.random.randint(1,10,size=100),
    "feature_3":np.random.randint(1,10,size=100),
    "feature_4":np.random.random(100).round(2),
    "feature_5":np.random.random(100).round(2),
    "feature_6":np.random.random(100).round(2)
})

# 降维特征

In [3]:
n_comp = 3

# PCA
pca = PCA(n_components=n_comp, random_state=42)
pca_df = pd.DataFrame(pca.fit_transform(df))

# ICA
ica = FastICA(n_components=n_comp, random_state=42)
ica_df = pd.DataFrame(ica.fit_transform(df))

# tSVD
tsvd = TruncatedSVD(n_components=n_comp, random_state=42)
tsvd_df = pd.DataFrame(tsvd.fit_transform(df))

# GRP
grp = GaussianRandomProjection(n_components=n_comp, eps=0.1, random_state=42)
grp_df = pd.DataFrame(pd.DataFrame(grp.fit_transform(df)))

# SRP
srp = SparseRandomProjection(n_components=n_comp, dense_output=True, random_state=42)
srp_df = pd.DataFrame(srp.fit_transform(df))

# NMF
nmf = NMF(n_components=n_comp, init='nndsvdar', random_state=42)
nmf_df = pd.DataFrame(nmf.fit_transform(df))



In [4]:
# raname
pca_df.columns = ['pca_' + str(i) for i in range(1, n_comp + 1)]
ica_df.columns = ['ica_' + str(i) for i in range(1, n_comp + 1)]
tsvd_df.columns = ['tsvd_' + str(i) for i in range(1, n_comp + 1)]
grp_df.columns = ['grp_' + str(i) for i in range(1, n_comp + 1)]
srp_df.columns = ['srp_' + str(i) for i in range(1, n_comp + 1)]
nmf_df.columns = ['nmf_' + str(i) for i in range(1, n_comp + 1)]

# 聚类特征

## k平均算法（kmeans）

In [8]:
kms = KMeans(n_clusters=3, random_state=1).fit(df)
df['kmeans_Cluster'] = kms.labels_

## 谱聚类（spectral clustering）

In [9]:
sc = SpectralClustering().fit(df)
df['sc_Cluster'] = sc.labels_

In [10]:
df

Unnamed: 0,feature_1,feature_2,feature_3,feature_4,feature_5,feature_6,kmeans_Cluster,sc_Cluster
0,1,4,2,0.91,0.41,0.15,0,1
1,5,4,6,0.73,0.43,0.81,1,2
2,1,5,5,0.55,0.08,0.56,1,2
3,6,1,7,0.74,0.35,0.29,1,2
4,2,2,1,0.71,0.20,0.39,0,5
...,...,...,...,...,...,...,...,...
95,2,5,7,0.81,0.49,0.15,1,2
96,1,7,2,0.88,0.33,0.69,0,1
97,2,9,1,0.97,0.06,0.38,0,1
98,9,4,4,0.33,0.80,0.62,2,2
