``
pip install pandas numpy scikit-learn six matplotlib gmdhpy
``

In [4]:
import pandas as pd

from gmdhpy.gmdh import MultilayerGMDH

In [5]:
# 1. 加载数据 txt格式,\t为分隔符
clinical_data = pd.read_csv('Training_clinical_infor.txt', sep='\t')
test_clinical_data = pd.read_csv('testing_clinical_infor.txt', sep='\t')

gene_expression_data = pd.read_csv('Training_selected_genes.txt', sep='\t')
gene_expression_data.set_index(gene_expression_data.columns[0], inplace=True)  # 将第一列（基因名）设为索引
test_gene_expression_data = pd.read_csv('testing_selected_genes.txt', sep='\t')
test_gene_expression_data.set_index(test_gene_expression_data.columns[0], inplace=True)  # 将第一列（基因名）设为索引

# clinical id 替换 - 为 .
clinical_data['id'] = clinical_data['id'].str.replace('-', '.')

# print(clinical_data.head(5))
print(gene_expression_data.head(5))
print(test_gene_expression_data.head(5))


            TCGA.P5.A781.01A.11R.A32Q.07  TCGA.S9.A6WL.01A.21R.A33Z.07  \
Unnamed: 0                                                               
RPAP1                        2284.529555                   2498.443645   
RNF130                       7217.447970                   5749.241712   
S1PR3                         333.990256                    967.611382   
IL18                          237.142133                    173.459492   
DCTD                          844.479428                   1430.518339   

            TCGA.P5.A737.01A.11R.A32Q.07  TCGA.S9.A6U5.01A.12R.A33Z.07  \
Unnamed: 0                                                               
RPAP1                        2929.385527                   3210.650243   
RNF130                       7306.794324                   6120.424533   
S1PR3                         221.148604                    965.351187   
IL18                          147.802836                    496.886347   
DCTD                          680.115

In [6]:
# 2. 处理test、train数据不同

# 统一两个数据集的基因表达数据
def unify_row(tb1,tb2):
    print(f"保留共同基因……")
    # 找到两个表中共同的基因
    common_genes = tb1.index.intersection(tb2.index)

    # 打印信息
    print(f"tb1基因数: {len(tb1.index)} , tb2基因数: {len(tb2.index)} , 共同基因数量: {len(common_genes)}")
    print(f"tb1独有基因列表: {set(tb1.index) - set(common_genes)}")
    print(f"tb2独有基因列表: {set(tb2.index) - set(common_genes)}")

    # 只保留共同基因的表达数据
    tb1 = tb1.loc[common_genes]
    tb2 = tb2.loc[common_genes]
    
    return tb1, tb2

# 转置基因表达数据，使列名变为行索引，行名变成列索引，方便后续拼接
def transpose_gene_expression_data(gene_expression_df):
    gene_expression_t = gene_expression_df.T  # 转置：列名变为行索引，行名（基因名）变成列索引
    gene_expression_t.reset_index(inplace=True)  # 将索引变为列
    gene_expression_t.rename(columns={'index': 'id'}, inplace=True)  # 重命名索引列为id

    # 按 数字、字母顺序进行列排序(从2列开始)
    gene_1_columns = gene_expression_t.iloc[:, 0]  # 保留第一列数据（id）

    gene_expression_t = gene_expression_t.reindex(sorted(gene_expression_t.columns[1:]), axis=1)
    gene_expression_t.insert(0, 'id', gene_1_columns)  # 将id列放回第一列

    return gene_expression_t


gene_unified, test_gene_unified = unify_row(gene_expression_data, test_gene_expression_data)
print(gene_unified.shape)
print(test_gene_unified.shape)

gene_expression_transposed = transpose_gene_expression_data(gene_unified)
test_gene_expression_transposed = transpose_gene_expression_data(test_gene_unified)

print(gene_expression_transposed.head())
print(test_gene_expression_transposed.head())


保留共同基因……
tb1基因数: 1611 , tb2基因数: 1587 , 共同基因数量: 1587
tb1独有基因列表: {'SLC5A3', 'FLJ16779', 'PCDHB7', 'TAP2', 'AC004540.2', 'AL139393.2', 'AQP1', 'CASTOR3', 'LINC02283', 'LINC01578', 'AC093010.3', 'AC090114.2', 'PI4K2A', 'PAX8-AS1', 'BAHCC1', 'AC008124.1', 'AC004656.1', 'LSP1', 'TMIGD3', 'ZNF710-AS1', 'AL118505.1', 'LINC01094', 'AL355974.2', 'CSF2RA'}
tb2独有基因列表: set()
(1587, 496)
(1587, 927)
Unnamed: 0                            id          A2M        AAGAB  \
0           TCGA.P5.A781.01A.11R.A32Q.07  23443.58162  1435.524515   
1           TCGA.S9.A6WL.01A.21R.A33Z.07  37824.61859  1942.537321   
2           TCGA.P5.A737.01A.11R.A32Q.07  20450.13371  1063.513638   
3           TCGA.S9.A6U5.01A.12R.A33Z.07  23649.63401  1518.100497   
4           TCGA.HT.7607.01A.11R.2090.07  26284.19134  2835.889111   

Unnamed: 0       ABCA7        ABCC1       ABCC3        ABCC4      ABHD14B  \
0           352.997832   863.487003    7.240981  1901.662677  1257.215354   
1           706.377208   877.746826 

In [7]:
# 3. 合并数据，指定x和y
train_data = pd.merge(clinical_data, gene_expression_transposed, left_on='id', right_on='id')
print(train_data.head())

test_data = pd.merge(test_clinical_data, test_gene_expression_transposed, left_on='id', right_on='id')
print(test_data.head())


                             id  OS.time  OS.state Transcriptome.Subtype  \
0  TCGA.P5.A781.01A.11R.A32Q.07    134.0       0.0                    NE   
1  TCGA.S9.A6WL.01A.21R.A33Z.07    946.0       0.0                    NE   
2  TCGA.P5.A737.01A.11R.A32Q.07    372.0       0.0                    PN   
3  TCGA.S9.A6U5.01A.12R.A33Z.07    992.0       0.0                    PN   
4  TCGA.HT.7607.01A.11R.2090.07     96.0       1.0                    NE   

  diagnoses.primary_diagnosis Chr.1p_19q.codeletion Transcriptome.Subtype.1  \
0     Astrocytoma, anaplastic                 codel                  Mutant   
1     Astrocytoma, anaplastic                 codel                  Mutant   
2            Astrocytoma, NOS                 codel                  Mutant   
3            Astrocytoma, NOS                 codel                  Mutant   
4            Astrocytoma, NOS                 codel                  Mutant   

  2021classification_created By BZT demographic.vital_status   age  

In [8]:
# 4. 数据预处理

# 清除 OS.time < 10 的行
train_data = train_data[train_data['OS.time'] >= 10]
test_data = test_data[test_data['OS.time'] >= 10]

# 清除 OS.status == 0 且 OS.time < 100 的行
train_data = train_data[~((train_data['OS.state'] == 0) & (train_data['OS.time'] < 100))]
test_data = test_data[~((test_data['OS.state'] == 0) & (test_data['OS.time'] < 100))]


In [31]:
# 5. 划分训练集和测试集
# id	OS.time	OS.state	Transcriptome.Subtype	diagnoses.primary_diagnosis	Chr.1p_19q.codeletion	Transcriptome.Subtype.1	2021classification_created By BZT	demographic.vital_status	age	demographic.gender	Grade
X_train = train_data.drop(['id', 'OS.time','OS.state','Transcriptome.Subtype','diagnoses.primary_diagnosis','Chr.1p_19q.codeletion','Transcriptome.Subtype.1','2021classification_created By BZT','demographic.vital_status','age','demographic.gender','Grade'], axis=1)
Y_train = train_data[['OS.time']]  # 目标变量：生存时间和生存状态

# id	OS.time	OS.state	subtype	IDH stat	PRS_type	Histology	Grade	Gender	Radio_status (treated=1;un-treated=0)	Chemo_status (TMZ treated=1;un-treated=0)	1p19q_codeletion_status	2021classification_created By BZT
X_test = test_data.drop(['id', 'OS.time','OS.state','subtype','IDH stat','PRS_type','Histology','Grade','Gender','Radio_status (treated=1;un-treated=0)','Chemo_status (TMZ treated=1;un-treated=0)','1p19q_codeletion_status','2021classification_created By BZT'], axis=1)
Y_test = test_data[['OS.time']]  # 目标变量：生存时间和生存状态


feature_names_list = list(X_train.columns)
print(feature_names_list)


print(X_train.head())
print(Y_train.head())
print(X_test.head())
print(Y_test.head())

# 转成numpy
X_train = X_train.to_numpy()
Y_train = Y_train.to_numpy()
X_test = X_test.to_numpy()
Y_test = Y_test.to_numpy()

['A2M', 'AAGAB', 'ABCA7', 'ABCC1', 'ABCC3', 'ABCC4', 'ABHD14B', 'ABI3', 'AC092111.1', 'ACAD8', 'ACKR3', 'ACOT9', 'ACP2', 'ACP5', 'ACSL1', 'ACSS3', 'ACTB', 'ACTG1', 'ACTN1', 'ACTN4', 'ACTR2', 'ACTR3', 'ACVR1', 'ADA2', 'ADAM10', 'ADAM19', 'ADAM28', 'ADAM9', 'ADAMTS14', 'ADAMTS9', 'ADAP2', 'ADD1', 'ADGRB1', 'ADGRE5', 'ADHFE1', 'ADIPOR1', 'ADM', 'ADORA1', 'ADPGK', 'AEBP1', 'AGFG2', 'AGT', 'AGTRAP', 'AHNAK2', 'AHR', 'AIF1', 'AK4', 'AKAP12', 'AKR1C3', 'ALAS1', 'ALDH1L1', 'ALDH2', 'ALKBH5', 'ALOX5', 'ALOX5AP', 'AMIGO2', 'AMOTL2', 'AMPD3', 'ANGPTL4', 'ANKRD11', 'ANKRD6', 'ANO10', 'ANO6', 'ANTXR2', 'ANXA1', 'ANXA11', 'ANXA2', 'ANXA5', 'AOAH', 'AP1B1', 'AP1S2', 'AP3B1', 'APBA2', 'APBB1IP', 'APLN', 'APLNR', 'APLP2', 'APOBEC3C', 'APOL1', 'APOL2', 'APOL6', 'ARAP3', 'ARF6', 'ARHGAP18', 'ARHGAP30', 'ARHGAP33', 'ARHGAP4', 'ARHGAP45', 'ARHGDIB', 'ARHGEF3', 'ARHGEF4', 'ARL3', 'ARL4C', 'ARL5A', 'ARPC1B', 'ARPC2', 'ARRB2', 'ARRDC3', 'ARRDC4', 'ARSA', 'ARSD', 'ARSI', 'ARSJ', 'ASAH1', 'ASCL1', 'ASRGL1', 'AS

In [10]:
print(X_train.shape)
print(Y_train.shape)

print(X_train)

(447, 1587)
(447, 1)
[[23443.58162    1435.524515    352.9978315 ...  2690.929623
   1340.486637   3521.832211 ]
 [37824.61859    1942.537321    706.3772078 ...  2452.46643
   1589.348718   6955.098661 ]
 [20450.13371    1063.513638    115.5751498 ...  3773.973159
   2598.218271   6048.803271 ]
 ...
 [83518.62893    2967.619397    240.7868215 ...  2432.885027
    892.7875004 20110.39024  ]
 [39158.49854    1828.496882    437.2177817 ...  2423.518432
   1908.122637  11648.52408  ]
 [60943.17279    1766.271049    277.0621253 ...  3668.974205
   1064.170436  27104.86178  ]]


In [None]:
# 6. 训练GMDH模型
model = MultilayerGMDH()  # 初始化GMDH模型
model.fit(X_train, Y_train)  # 训练模型


train layer0 in 1511.33 sec
train layer1 in 5870.01 sec
train layer2 in 5890.95 sec
train layer3 in 5877.18 sec
train layer4 in 5880.13 sec
train layer5 in 5884.60 sec
train layer6 in 5886.25 sec
train layer7 in 5875.24 sec
train layer8 in 5901.60 sec
train layer9 in 5895.21 sec
train layer10 in 5902.18 sec
train layer11 in 5875.37 sec
train layer12 in 5895.58 sec
train layer13 in 5892.68 sec
train layer14 in 5888.51 sec
train layer15 in 5894.30 sec
train layer16 in 5868.07 sec
train layer17 in 5987.46 sec
train layer18 in 6220.51 sec
train layer19 in 5883.69 sec
train layer20 in 5857.36 sec
train layer21 in 5851.04 sec
train layer22 in 5855.18 sec
train layer23 in 5847.92 sec
train layer24 in 5850.40 sec
train layer25 in 5847.44 sec
train layer26 in 5854.17 sec
train layer27 in 5855.60 sec
train layer28 in 5849.99 sec
train layer29 in 5850.37 sec
train layer30 in 5850.34 sec
train layer31 in 5851.17 sec
train layer32 in 5975.02 sec
train layer33 in 6086.20 sec
train layer34 in 5900.85

AttributeError: 'NoneType' object has no attribute 'shape'

In [None]:
# 7. 模型评估
predictions = model.predict(X_test)

In [15]:
# 计算 mae
from sklearn.metrics import mean_absolute_error
mae = mean_absolute_error(Y_test, predictions)

print(f"Mean Absolute Error (MAE): {mae}")

Mean Absolute Error (MAE): 1003.727350910936


In [None]:
# 单个多项式
final_layer = model.layers[49]

final_model = final_layer[0] # 最后一层的最佳模型

print("W:")
print(final_model.w)  # 打印权重

print("Type of final model:")
print(final_model.ftype)  # 打印模型类型 
# rfLinerCov y = w[0] + w[1]*input1 + w[2]*input2 + w[3]*input1*input2

print(f"最终模型的输入1索引 (u1_index): {final_model.u1_index}")
print(f"最终模型的输入2索引 (u2_index): {final_model.u2_index}")
    

W:
[-2.16014254e+01  1.02821425e+00 -4.00707138e-05 -1.52246945e-02]
Type of final model:
RefFunctionType.rfLinearCov
最终模型的输入1索引 (u1_index): 0
最终模型的输入2索引 (u2_index): 853


In [None]:
# 完整多项式
from gmdhpy.gmdh import RefFunctionType 

def get_model_expression_with_file_output(gmdh_model, layer_index, model_index_in_layer, output_file_path, feature_names=None):
    """
    递归获取GMDH模型表达式，并直接写入文件，避免内存溢出
    """
    # 创建一个字典来存储已经计算过的子表达式ID
    memo_ids = {}
    # 打开文件准备写入
    with open(output_file_path, 'w') as f:
        f.write("# GMDH模型多项式表达式\n\n")
        
        # 先写入所有子表达式
        next_expr_id = [0]  # 使用列表作为可变引用
        
        def get_subexpr_id(layer_idx, model_idx, memo_ids, next_expr_id):
            key = (layer_idx, model_idx)
            if key not in memo_ids:
                memo_ids[key] = f"expr_{next_expr_id[0]}"
                next_expr_id[0] += 1
            return memo_ids[key]
        
        # 首先遍历所有层和模型，建立子表达式
        def build_subexpressions(layer_idx, model_idx, f):
            model = gmdh_model.layers[layer_idx][model_idx]
            expr_id = get_subexpr_id(layer_idx, model_idx, memo_ids, next_expr_id)
            
            # 递归处理输入
            input1_id = _resolve_input_id(gmdh_model, layer_idx, model.u1_index, memo_ids, next_expr_id, f, feature_names)
            input2_id = _resolve_input_id(gmdh_model, layer_idx, model.u2_index, memo_ids, next_expr_id, f, feature_names)
            
            # 写入此模型的表达式
            w = model.w
            ftype = model.ftype
            
            f.write(f"# 层 {layer_idx}, 模型 {model_idx}\n")
            f.write(f"{expr_id} = ")
            
            try:
                if ftype == RefFunctionType.rfLinear:
                    f.write(f"{w[0]:.6f} + {w[1]:.6f}*({input1_id}) + {w[2]:.6f}*({input2_id})\n\n")
                elif ftype == RefFunctionType.rfLinearCov:
                    f.write(f"{w[0]:.6f} + {w[1]:.6f}*({input1_id}) + {w[2]:.6f}*({input2_id}) + {w[3]:.6f}*({input1_id}*{input2_id})\n\n")
                elif ftype == RefFunctionType.rfQuadratic:
                    f.write(f"{w[0]:.6f} + {w[1]:.6f}*({input1_id}) + {w[2]:.6f}*({input2_id}) + ")
                    f.write(f"{w[3]:.6f}*({input1_id}*{input2_id}) + {w[4]:.6f}*({input1_id}**2) + ")
                    f.write(f"{w[5]:.6f}*({input2_id}**2)\n\n")
                # 其他模型类型...
            except IndexError:
                f.write(f"ERROR_WEIGHTS_MISSING_FOR_TYPE_{ftype}\n\n")
            
            return expr_id
        
        def _resolve_input_id(gmdh_model, current_layer_idx, input_idx, memo_ids, next_expr_id, f, feature_names=None):
            # 基本情况：第0层，直接是特征
            if current_layer_idx == 0:
                if feature_names and input_idx < len(feature_names):
                    # 特征名作为变量
                    return f"'{feature_names[input_idx]}'"
                else:
                    return f"x_{input_idx}"
                    
            # 检查输入是否来自前一层模型
            prev_layer_outputs_count = gmdh_model.layers[current_layer_idx - 1].l_count
            
            if input_idx < prev_layer_outputs_count:
                # 输入来自前一层模型
                return build_subexpressions(current_layer_idx - 1, input_idx, f)
            else:
                # 输入是原始特征
                if not gmdh_model.param.admix_features:
                    f.write(f"# 错误：尝试在layer={current_layer_idx}使用原始特征但admix_features=False\n")
                    return "ERROR"
                
                original_feature_idx = input_idx - prev_layer_outputs_count
                if feature_names and original_feature_idx < len(feature_names):
                    return f"'{feature_names[original_feature_idx]}'"
                else:
                    return f"x_{original_feature_idx}"
        
        # 从最终模型开始构建表达式树
        final_expr_id = build_subexpressions(layer_index, model_index_in_layer, f)
        
        # 最后，写入最终结果引用
        f.write("# 最终模型输出\n")
        f.write(f"final_result = {final_expr_id}\n")

# 使用方法
output_file = "gmdh_model_expression.txt"
get_model_expression_with_file_output(model, 49, 0, output_file, feature_names=feature_names_list)
print(f"模型表达式已写入文件: {output_file}")

模型表达式已写入文件: gmdh_model_expression.txt


In [41]:
from gmdhpy.gmdh import RefFunctionType #确保导入 RefFunctionType

def get_model_expression_recursive(gmdh_model, layer_index, model_index_in_layer, feature_names=None, memo=None):
    """
    递归获取GMDH模型中特定子模型的符号表达式。

    参数:
    gmdh_model -- 训练好的 MultilayerGMDH 对象。
    layer_index -- 当前模型所在的层索引。
    model_index_in_layer -- 当前模型在其层中的索引（在被选中的最佳模型列表中的索引）。
    feature_names -- 原始特征名称列表。如果为 None，则使用 x_0, x_1, ...。
    memo -- 用于记忆化的字典，以避免重复计算。

    返回:
    表示模型输出的字符串表达式。
    """
    if memo is None:
        memo = {}
    
    memo_key = (layer_index, model_index_in_layer)
    if memo_key in memo:
        return memo[memo_key]

    model = gmdh_model.layers[layer_index][model_index_in_layer]
    
    # 获取此模型两个输入的表达式字符串
    input1_str = _resolve_input_expression(gmdh_model, layer_index, model.u1_index, feature_names, memo)
    input2_str = _resolve_input_expression(gmdh_model, layer_index, model.u2_index, feature_names, memo)
    
    w = model.w
    ftype = model.ftype
    
    # 根据模型类型构建当前模型的表达式
    # 使用足够的括号确保运算顺序
    expr = ""
    try:
        if ftype == RefFunctionType.rfLinear:
            # y = w0 + w1*input1 + w2*input2
            expr = f"({w[0]:.6f} + {w[1]:.6f}*({input1_str}) + {w[2]:.6f}*({input2_str}))"
        elif ftype == RefFunctionType.rfLinearCov:
            # y = w0 + w1*input1 + w2*input2 + w3*input1*input2
            expr = f"({w[0]:.6f} + {w[1]:.6f}*({input1_str}) + {w[2]:.6f}*({input2_str}) + {w[3]:.6f}*(({input1_str})*({input2_str})))"
        elif ftype == RefFunctionType.rfQuadratic:
            # y = w0 + w1*input1 + w2*input2 + w3*input1*input2 + w4*input1^2 + w5*input2^2
            expr = (f"({w[0]:.6f} + {w[1]:.6f}*({input1_str}) + {w[2]:.6f}*({input2_str}) + "
                    f"{w[3]:.6f}*(({input1_str})*({input2_str})) + {w[4]:.6f}*(({input1_str})**2) + "
                    f"{w[5]:.6f}*(({input2_str})**2))")
        elif ftype == RefFunctionType.rfCubic:
            # y = w0 + w1*x1 + w2*x2 + w3*x1*x2 + w4*x1^2 + w5*x2^2 + w6*x1^3 + w7*x2^3 + w8*x1^2*x2 + w9*x1*x2^2
            expr = (f"({w[0]:.6f} + {w[1]:.6f}*({input1_str}) + {w[2]:.6f}*({input2_str}) + "
                    f"{w[3]:.6f}*(({input1_str})*({input2_str})) + {w[4]:.6f}*(({input1_str})**2) + "
                    f"{w[5]:.6f}*(({input2_str})**2) + {w[6]:.6f}*(({input1_str})**3) + "
                    f"{w[7]:.6f}*(({input2_str})**3) + {w[8]:.6f}*((({input1_str})**2)*({input2_str})) + "
                    f"{w[9]:.6f}*(({input1_str})*(({input2_str})**2)))")
        else:
            expr = f"UNSUPPORTED_FTYPE({ftype})"
    except IndexError:
        expr = f"WEIGHT_INDEX_ERROR_FOR_FTYPE({ftype})_EXPECTED_FW_SIZE({model.fw_size})_GOT_LEN_W({len(w)})"
        
    memo[memo_key] = expr
    return expr

def _resolve_input_expression(gmdh_model, current_model_layer_index, input_index_for_current_model, feature_names=None, memo=None):
    """
    解析给定模型输入的表达式。
    输入可以是前一层的模型输出，也可以是原始特征（如果 admix_features 为 True）。
    """
    # 基本情况：当前模型在第0层，其输入直接是原始特征。
    if current_model_layer_index == 0:
        if feature_names and input_index_for_current_model < len(feature_names):
            return feature_names[input_index_for_current_model]
        else:
            return f"x_{input_index_for_current_model}"

    # 递归情况：当前模型在第0层之后。
    # 其输入可能来自前一层的输出，或者（如果admix_features=True）来自原始特征。
    
    # 前一层选出的最佳模型数量
    # gmdh_model.layers[layer_idx] 存储的是该层选出的模型列表
    # l_count 是 Layer 对象的一个属性，表示该层选出了多少个模型
    prev_layer_outputs_count = gmdh_model.layers[current_model_layer_index - 1].l_count

    if input_index_for_current_model < prev_layer_outputs_count:
        # 输入来自前一层的一个模型输出。
        # input_index_for_current_model 是该模型在前一层被选出模型列表中的索引。
        return get_model_expression_recursive(gmdh_model, 
                                     current_model_layer_index - 1, 
                                     input_index_for_current_model, 
                                     feature_names, memo)
    else:
        # 输入是一个原始特征（仅当 gmdh_model.param.admix_features 为 True 时）
        if not gmdh_model.param.admix_features:
            raise ValueError(
                f"错误：在第 {current_model_layer_index} 层尝试访问原始特征，"
                f"但 admix_features=False。输入索引 {input_index_for_current_model} "
                f">= 前一层输出数量 {prev_layer_outputs_count}。"
            )
        
        original_feature_idx = input_index_for_current_model - prev_layer_outputs_count
        
        if original_feature_idx >= gmdh_model.n_features:
             raise ValueError(
                f"错误：原始特征索引 {original_feature_idx} 超出范围 "
                f"(总原始特征数: {gmdh_model.n_features})。"
                f"当前层: {current_model_layer_index}, 模型的输入索引: {input_index_for_current_model}"
            )

        if feature_names and original_feature_idx < len(feature_names):
            return feature_names[original_feature_idx]
        else:
            return f"x_{original_feature_idx}"


# 2. 调用函数获取最终模型的表达式
if model.layers: # 检查模型是否至少有一个层
    final_layer_idx = len(model.layers) - 1
    # 通常，每层排序后，索引为0的模型是该层的最佳模型
    final_model_idx_in_layer = 0 

    if model.layers[final_layer_idx]: # 检查最后一层是否为空
        print(f"正在为第 {final_layer_idx} 层, 模型 {final_model_idx_in_layer} 生成多项式...")

        # 确保 feature_names_list 已经定义
        if 'feature_names_list' not in locals() and model.feature_names is None:
            print("警告: `feature_names_list` 未定义，且 `model.feature_names` 为空。将使用通用特征名 'x_i'。")
            print("为了获得更有意义的多项式，请在调用此代码前定义 `feature_names_list`，")
            print("或者在初始化GMDH模型时提供 `feature_names` 参数。")
            active_feature_names = None
        elif model.feature_names is not None:
            active_feature_names = model.feature_names
            print(f"使用模型内存储的特征名: {len(active_feature_names)} 个特征。")
        else:
            active_feature_names = feature_names_list
            print(f"使用提供的 feature_names_list: {len(active_feature_names)} 个特征。")


        full_polynomial_expression = get_model_expression_recursive(
            model, 
            final_layer_idx, 
            final_model_idx_in_layer, 
            feature_names=active_feature_names # 使用您实际的特征名称列表
        )
        print("\n完整的多项式表达式:")
        print(full_polynomial_expression)
    else:
        print("模型的最后一层是空的。")
else:
    print("模型没有层。请确保模型已经成功训练。")


正在为第 49 层, 模型 0 生成多项式...
使用提供的 feature_names_list: 1587 个特征。


MemoryError: 