### 导入第三方库

In [1]:
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import AgglomerativeClustering
from gower import gower_matrix

import numpy as np
import pandas as pd

#### 数据预处理

In [2]:
excelpath = "merged_excel.xlsx"
dateGlass = pd.read_excel(excelpath, sheet_name="standard")

In [3]:
def dateProcess(dateGlass,type:int):

    dateGlass_value = (dateGlass[dateGlass["类型"] == type]
                .iloc[:,3:17]
                .reset_index(drop=True))
    dateGlass_sample = (dateGlass[dateGlass["类型"] == type]
                .iloc[:,1]
                .reset_index(drop=True))
    dateGlass_all = pd.concat([dateGlass_sample, dateGlass_value],axis=1)

    return dateGlass_all,dateGlass_value

In [4]:
# 提取出不同类型的 文物数据 的 成分性数据
dateGlass_pb_all, dateGlass_pb_value = dateProcess(dateGlass,0) # 铅钡
dateGlass_hk_all, dateGlass_hk_value = dateProcess(dateGlass,1)  # 高钾

In [5]:
dateGlass_pb_all.columns

Index(['文物采样点', '二氧化硅(SiO2)', '氧化钠(Na2O)', '氧化钾(K2O)', '氧化钙(CaO)', '氧化镁(MgO)',
       '氧化铝(Al2O3)', '氧化铁(Fe2O3)', '氧化铜(CuO)', '氧化铅(PbO)', '氧化钡(BaO)',
       '五氧化二磷(P2O5)', '氧化锶(SrO)', '氧化锡(SnO2)', '二氧化硫(SO2)'],
      dtype='object')

使用层次聚类分析 来求解成分性数据

In [6]:
def clusterClassify_2(dateGlass):
    
    # 计算Gower距离矩阵
    gower_dist = gower_matrix(dateGlass)

    # 创建StandardScaler实例
    scaler = StandardScaler()

    # 标准正态分布缩放Gower距离矩阵
    ilr_data_scaled = scaler.fit_transform(gower_dist)

    # 使用层次聚类算法，选择簇数为 2
    clustering_model = AgglomerativeClustering(n_clusters=2,linkage='ward')

    # 拟合模型并预测簇标签
    cluster_labels = clustering_model.fit_predict(ilr_data_scaled)
    # 返回聚类结果
    return cluster_labels

In [7]:
cluster_labels_pb_2 = clusterClassify_2(dateGlass_pb_value)
cluster_labels_hk_2 = clusterClassify_2(dateGlass_hk_value)

算法结果

In [8]:
# 先对结果进行筛选
print("铅钡-是否风化分类结果")
print("层次聚类数据")
# 根据array数组提取风化和无风化数据的索引
weather_indices = np.where(cluster_labels_pb_2 == 1)[0]  # 风化数据的索引
no_weather_indices = np.where(cluster_labels_pb_2 == 0)[0]  # 无风化数据的索引
# 提取风化数据
weather_data = np.asarray(dateGlass_pb_all.iloc[weather_indices].iloc[:,0])
# 提取无风化数据
no_weather_data = np.asarray(dateGlass_pb_all.iloc[no_weather_indices].iloc[:,0])
print(f"风化数据:\n{weather_data}")
print(f"无风化数据:\n{no_weather_data}")

铅钡-是否风化分类结果
层次聚类数据
风化数据:
[0.         0.         0.         0.         0.         0.
 0.         0.         0.         3.66073215 0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.        ]
无风化数据:
[ 0.          2.58465237 15.2992671   0.          0.          0.
  0.          1.96353436 15.96756432  0.          0.          0.
  0.          0.          0.          0.          0.          0.
  0.          0.          0.          0.          0.          0.
  0.          0.          0.          0.          0.        ]


In [9]:
# 先对结果进行筛选
print("高钾-是否风化分类结果")
print("层次聚类数据")
# 根据array数组提取风化和无风化数据的索引
weather_indices = np.where(cluster_labels_hk_2 == 1)[0]  # 风化数据的索引
no_weather_indices = np.where(cluster_labels_hk_2 == 0)[0]  # 无风化数据的索引
# 提取风化数据
weather_data = np.asarray(dateGlass_hk_all.iloc[weather_indices].iloc[:,0])
# 提取无风化数据
no_weather_data = np.asarray(dateGlass_hk_all.iloc[no_weather_indices].iloc[:,0])
print(f"风化数据:\n{weather_data}")
print(f"无风化数据:\n{no_weather_data}")

高钾-是否风化分类结果
层次聚类数据
风化数据:
[0. 0. 0. 0. 0. 0. 0. 0.]
无风化数据:
[0.39954923 0.         0.37476577 0.48699617 0.         0.
 0.         0.         0.         0.        ]


-----------------------

四分类-建立时间序列

In [10]:
def clusterClassify_4(dateGlass):
    
    # 计算Gower距离矩阵
    gower_dist = gower_matrix(dateGlass)

    # 创建StandardScaler实例
    scaler = StandardScaler()

    # 标准正态分布 缩放Gower距离矩阵
    ilr_data_scaled = scaler.fit_transform(gower_dist)

    # 使用层次聚类算法，选择簇数为 2
    clustering_model = AgglomerativeClustering(n_clusters=4,linkage='ward')

    # 拟合模型并预测簇标签
    cluster_labels = clustering_model.fit_predict(ilr_data_scaled)
    # 返回聚类结果
    return cluster_labels

In [11]:
cluster_labels_pb_4 = clusterClassify_4(dateGlass_pb_value)
cluster_labels_hk_4 = clusterClassify_4(dateGlass_hk_value)

In [12]:
cluster_labels_pb_4

array([3, 2, 2, 1, 1, 2, 0, 2, 0, 2, 2, 0, 0, 3, 3, 0, 0, 0, 1, 0, 0, 0,
       1, 1, 1, 3, 0, 0, 3, 3, 0, 0, 0, 0, 0, 3, 0, 1, 1, 3, 3, 1, 0, 1,
       3, 0, 1, 1, 1], dtype=int64)

算法结果

In [18]:
# 先对结果进行筛选
print("铅钡-是否风化分类结果")
print("层次聚类数据")
# 根据array数组提取风化和无风化数据的索引
weather_indices_0 = np.where(cluster_labels_pb_4 == 0)[0]  # 无风化数据的索引
weather_indices_1 = np.where(cluster_labels_pb_4 == 1)[0]  # 轻度风化
weather_indices_2 = np.where(cluster_labels_pb_4 == 2)[0]  # 中度风化
weather_indices_3 = np.where(cluster_labels_pb_4 == 3)[0]  # 重度风化
# 提取风化数据
weather_data_0 = np.asarray(dateGlass_pb_all.iloc[weather_indices_0].iloc[:,0])
weather_data_1 = np.asarray(dateGlass_pb_all.iloc[weather_indices_1].iloc[:,0])
weather_data_2 = np.asarray(dateGlass_pb_all.iloc[weather_indices_2].iloc[:,0])
weather_data_3 = np.asarray(dateGlass_pb_all.iloc[weather_indices_3].iloc[:,0])
print(f"无风化数据:\n{weather_data_0}")
print(f"轻度风化数据:\n{weather_data_1}")
print(f"中度风化数据:\n{weather_data_2}")
print(f"重度风化数据:\n{weather_data_3}")

铅钡-是否风化分类结果
层次聚类数据
无风化数据:
['23未风化点' '25未风化点' '28未风化点' '29未风化点' '31' '32' '33' '35' '36' '37'
 '42未风化点1' '42未风化点2' '44未风化点' '45' '46' '47' '48' '49未风化点' '53未风化点' '55']
轻度风化数据:
[35.21333473 30.01215067 36.68990976 33.40773055 26.57958688 16.9524196
 19.94011312 46.44109759 27.35969388 22.35376743 31.60234172 27.48999676
 30.77156744]
中度风化数据:
[20.17631737  4.69258958 42.25766316 32.30177994 19.82568624  3.72409651]
重度风化数据:
[36.31995195 35.05870342 37.41641337 19.6990716  13.10730883 22.44750181
 30.14659686 25.81558796 23.28244275 17.65373504]


In [19]:
# 先对结果进行筛选
print("高钾-是否风化分类结果")
print("层次聚类数据")
# 根据array数组提取风化和无风化数据的s索引
weather_indices_0 = np.where(cluster_labels_hk_4 == 0)[0]  # 无风化数据的索引
weather_indices_1 = np.where(cluster_labels_hk_4 == 1)[0]  # 轻度风化
weather_indices_2 = np.where(cluster_labels_hk_4 == 2)[0]  # 中度风化
weather_indices_3 = np.where(cluster_labels_hk_4 == 3)[0]  # 重度风化
# 提取风化数据
weather_data_0 = np.asarray(dateGlass_hk_all.iloc[weather_indices_0].iloc[:,0])
weather_data_1 = np.asarray(dateGlass_hk_all.iloc[weather_indices_1].iloc[:,0])
weather_data_2 = np.asarray(dateGlass_hk_all.iloc[weather_indices_2].iloc[:,0])
weather_data_3 = np.asarray(dateGlass_hk_all.iloc[weather_indices_3].iloc[:,0])
print(f"无风化数据:\n{weather_data_0}")
print(f"轻度风化数据:\n{weather_data_1}")
print(f"中度风化数据:\n{weather_data_2}")
print(f"重度风化数据:\n{weather_data_3}")

高钾-是否风化分类结果
层次聚类数据
无风化数据:
['03部位2' '06部位1' '06部位2' '21']
轻度风化数据:
['03部位1' '07' '09' '10' '12' '18' '22' '27']
中度风化数据:
['13' '14' '16']
重度风化数据:
['01' '04' '05']


---

将数据存入新的表格中

In [20]:
dateGlass_pb_all["weather_time"] = cluster_labels_pb_4
dateGlass_hk_all["weather_time"] = cluster_labels_hk_4
dateGlass_pb_all["weather_or_no"] = cluster_labels_pb_2
dateGlass_hk_all["weather_or_no"] = cluster_labels_hk_2

In [21]:
with pd.ExcelWriter("date_weather_type.xlsx") as writer:
    dateGlass_pb_all.to_excel(writer, sheet_name="pb")
    dateGlass_hk_all.to_excel(writer, sheet_name="hk")