In [2]:
import pandas as pd
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import LabelEncoder
import numpy as np

# 读取数据
data = pd.read_csv("dataset\car_following_01_tracks.csv")

# 将 "Truck" 设置为 1，"Car" 设置为 0
label_encoder = LabelEncoder()
data['front_class_encoded'] = label_encoder.fit_transform(data['front_class'])
data['following_class_encoded'] = label_encoder.transform(data['following_class'])

# 选择需要进行主成分分析的特征列
features = ['distance', 'following_x', 'front_x', 'front_speed', 'following_speed',
             'front_width', 'following_width', 'front_class_encoded', 'following_class_encoded']

# 将特征列进行标准化
scaler = StandardScaler()
scaled_data = scaler.fit_transform(data[features])

# 创建 PCA 模型并拟合数据
pca = PCA(n_components=8)
principal_components = pca.fit_transform(scaled_data)

# 创建包含主成分的数据框
principal_df = pd.DataFrame(data=principal_components)

# 获取每个主成分的方差比例和累积贡献率
explained_variance_ratio = pca.explained_variance_ratio_
cumulative_variance_ratio = np.cumsum(explained_variance_ratio)

# 创建总方差解释表
variance_explained_df = pd.DataFrame({
    'Principal Component': [f'PC{i+1}' for i in range(len(explained_variance_ratio))],
    'Explained Variance Ratio': explained_variance_ratio,
    'Cumulative Variance Ratio': cumulative_variance_ratio
})

component_names = [f"PC{i+1}" for i in range(len(pca.components_))]
component_df = pd.DataFrame(data=pca.components_, columns=features, index=component_names)


# 打印总方差解释表
print(variance_explained_df)

print(component_df.iloc[:3,])



  Principal Component  Explained Variance Ratio  Cumulative Variance Ratio
0                 PC1                  0.334440                   0.334440
1                 PC2                  0.228925                   0.563365
2                 PC3                  0.162563                   0.725928
3                 PC4                  0.120982                   0.846910
4                 PC5                  0.085395                   0.932305
5                 PC6                  0.034939                   0.967244
6                 PC7                  0.023382                   0.990627
7                 PC8                  0.009279                   0.999905
     distance  following_x   front_x  front_speed  following_speed  \
PC1 -0.196975    -0.046057 -0.047603     0.388016         0.382481   
PC2 -0.097577     0.682852  0.682509    -0.155735        -0.075992   
PC3 -0.382303    -0.081988 -0.074508    -0.090110        -0.240951   

     front_width  following_width  front_cla

In [None]:
t = Data_process(r'dataset\01_tracks.csv')
allinfo = t.distance_selector()[t.time_selecter().index[0]]
for i in range(1, len(t.time_selecter())):
    allinfo = pd.concat([allinfo, t.distance_selector()[t.time_selecter().index[i]]], axis=0)

In [26]:
from hmmlearn import hmm
import pandas as pd
import numpy as np

observations = principal_df.iloc[:, :3].values
num_states = 3  
model = hmm.GaussianHMM(n_components=num_states, covariance_type="full", n_iter=100)
model.fit(observations)
predicted_labels = model.predict(observations)

data['driving_style_hmm'] = predicted_labels
data.to_csv("output1.csv", index=False)

data_evaluate = data[['distance', 'following_x', 'front_x', 'front_speed', 'following_speed']]

In [27]:
data_evaluate = data[['distance', 'following_x', 'front_x', 'front_speed', 'following_speed','driving_style_hmm', 'driving_style_kmeans']]

In [4]:
from sklearn.cluster import KMeans
from sklearn.metrics import silhouette_score

# 对主成分进行 KMeans 聚类
kmeans = KMeans(n_clusters=3)
cluster_labels = kmeans.fit_predict(principal_df.iloc[:, :3])  # 选择前三个主成分进行聚类
data['driving_style_kmeans'] = cluster_labels
# 计算轮廓系数
silhouette_avg = silhouette_score(principal_df.iloc[:, :3], cluster_labels)
print(f"Silhouette Score kmeans: {silhouette_avg}")

silhouette_avg1 = silhouette_score(principal_df.iloc[:, :3], predicted_labels)
print(f"Silhouette Score hmm: {silhouette_avg1}")

  super()._check_params_vs_input(X, default_n_init=10)


Silhouette Score kmeans: 0.2973653550107876


NameError: name 'predicted_labels' is not defined

In [11]:
from sklearn.cluster import DBSCAN

from sklearn.model_selection import GridSearchCV

# 定义参数网格
param_grid = {
    'eps': [0.1, 0.5, 1.0],
    'min_samples': [3, 5, 10]
}

# 创建DBSCAN实例
dbscan = DBSCAN()

# 实例化GridSearchCV
grid_search = GridSearchCV(estimator=dbscan, param_grid=param_grid, scoring='adjusted_rand_score', cv=3)

# 执行网格搜索
grid_search.fit(principal_df.iloc[:, :3])

# 输出最佳参数组合
print("Best Parameters:", grid_search.best_params_)

# 输出最佳轮廓系数
print("Best Silhouette Score:", grid_search.best_score_)


Traceback (most recent call last):
  File "d:\anaconda3\Lib\site-packages\sklearn\model_selection\_validation.py", line 811, in _score
    scores = scorer(estimator, X_test)
             ^^^^^^^^^^^^^^^^^^^^^^^^^
TypeError: _BaseScorer.__call__() missing 1 required positional argument: 'y_true'

Traceback (most recent call last):
  File "d:\anaconda3\Lib\site-packages\sklearn\model_selection\_validation.py", line 811, in _score
    scores = scorer(estimator, X_test)
             ^^^^^^^^^^^^^^^^^^^^^^^^^
TypeError: _BaseScorer.__call__() missing 1 required positional argument: 'y_true'

Traceback (most recent call last):
  File "d:\anaconda3\Lib\site-packages\sklearn\model_selection\_validation.py", line 811, in _score
    scores = scorer(estimator, X_test)
             ^^^^^^^^^^^^^^^^^^^^^^^^^
TypeError: _BaseScorer.__call__() missing 1 required positional argument: 'y_true'

Traceback (most recent call last):
  File "d:\anaconda3\Lib\site-packages\sklearn\model_selection\_validation

Best Parameters: {'eps': 0.1, 'min_samples': 3}
Best Silhouette Score: nan


Traceback (most recent call last):
  File "d:\anaconda3\Lib\site-packages\sklearn\model_selection\_validation.py", line 811, in _score
    scores = scorer(estimator, X_test)
             ^^^^^^^^^^^^^^^^^^^^^^^^^
TypeError: _BaseScorer.__call__() missing 1 required positional argument: 'y_true'



In [None]:
mean_values = data_evaluate.groupby('driving_style_hmm').mean()