In [1]:
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import LeaveOneOut
from sklearn import metrics

In [3]:
if __name__ == '__main__':
    pd.set_option('display.width', 300)
    pd.set_option('display.max_columns', 300)
    
    # 读取数据
    data = pd.read_csv('100组分类预测.csv')
    columns = data.columns.tolist()
    print(data.head(10))
    
    # 存储所有节点的精度结果
    all_accuracies = {}
    
    # 初始化留一交叉验证器
    loo = LeaveOneOut()
    
    # 对每个特征列（除最后一列）进行预测和评估
    for target_col in columns[:-1]:
        print(f"\n=== 预测目标: {target_col} ===")
        
        # 创建特征矩阵（排除目标列）
        features = [col for col in columns if col != target_col]
        x = pd.DataFrame()
        
        # 对特征列进行独热编码
        for col in features:
            t = pd.get_dummies(data[col], prefix=col)
            x = pd.concat((x, t), axis=1)
        
        # 准备目标变量
        y = np.array(pd.Categorical(data[target_col]).codes)
        
        # 存储每次留一验证的预测结果
        y_true = []
        y_pred = []
        
        # 执行留一交叉验证
        for train_index, test_index in loo.split(x):
            x_train, x_test = x.iloc[train_index], x.iloc[test_index]
            y_train, y_test = y[train_index], y[test_index]
            
            # 训练随机森林模型
            clf = RandomForestClassifier(n_estimators=60, max_depth=8,random_state=42)
            clf.fit(x_train, y_train)
            
            # 记录预测结果
            y_true.append(y_test[0])
            y_pred.append(clf.predict(x_test)[0])
        
        # 计算留一交叉验证的准确率
        loo_accuracy = metrics.accuracy_score(y_true, y_pred)
        
        print(f'{target_col} 留一交叉验证精确度: {loo_accuracy:.4f}')
        
        # 存储结果
        all_accuracies[target_col] = {
            'loo_accuracy': loo_accuracy
        }
    
    # 计算平均精度
    avg_loo_accuracy = np.mean([acc['loo_accuracy'] for acc in all_accuracies.values()])
    
    print("\n=== 所有节点的平均精度 ===")
    print(f"平均留一交叉验证精确度: {avg_loo_accuracy:.4f}")
    
    """
    #输出每个节点的精度
    print("\n=== 各节点精度详情 ===")
    for col, acc in all_accuracies.items():
        print(f"{col}: 留一交叉验证 {acc['loo_accuracy']:.4f}")
    """

    A   B  C  D  E  F  M  N  Q  R  S  G  A4  A5  A6  A7  A8  H  Y  Z  A1  A2  A3  J  T  U  V  W  X  K  L
0  20   4  1  1  1  0  0  0  0  0  0  0   0   0   0   1   1  0  0  0   1   0   1  1  1  0  0  0  0  1  0
1  35  12  1  1  0  0  1  0  1  1  0  0   1   0   1   1   0  0  1  1   1   1   1  1  1  0  1  1  1  1  0
2  20   8  1  1  2  1  1  0  1  2  1  0   0   0   0   1   1  0  0  0   1   0   1  1  1  0  0  1  1  1  0
3  20   4  1  0  1  0  1  0  1  2  1  0   0   0   0   0   2  0  0  0   0   0   1  1  1  0  0  1  0  1  0
4  35   8  1  0  1  0  1  1  1  2  1  1   1   1   0   1   0  0  0  1   1   0   1  1  1  1  0  1  1  1  0
5  55   4  0  1  2  0  1  0  0  2  1  0   1   1   1   0   2  0  0  1   1   1   1  1  0  0  0  0  0  0  0
6  35   4  1  0  1  0  1  0  1  0  0  0   0   0   0   1   0  0  1  1   1   0   1  1  1  0  0  0  1  1  0
7  55   8  1  1  1  0  1  1  1  2  0  0   1   0   0   1   0  0  0  0   1   0   1  1  0  0  0  1  0  1  0
8  35   8  1  1  1  1  0  0  0  1  1  0   0   0   0   0