In [1]:
from scipy.io import arff
import pandas as pd

# 加载数据并解析列名
data_2c, meta_2c = arff.loadarff('column_2C_weka.arff')
columns = [attr[0] for attr in meta_2c._attributes]  # 获取列名
df_2c = pd.DataFrame(data_2c, columns=columns)

# 查看列名和数据类型
print("列名:", df_2c.columns.tolist())
print("数据类型:\n", df_2c.dtypes)

列名: ['p', 'p', 'l', 's', 'p', 'd', 'c']
数据类型:
 p    object
p    object
l    object
s    object
p    object
d    object
c    object
dtype: object


In [3]:
correct_columns = [
    'pelvic_incidence',    # 骨盆入射角
    'pelvic_tilt',         # 骨盆倾斜角
    'lumbar_lordosis_angle',  # 腰椎前凸角度
    'sacral_slope',        # 骶骨斜率
    'pelvic_radius',       # 骨盆半径
    'degree_spondylolisthesis',  # 脊椎滑脱程度
    'class'                # 类别标签
]

# 将列名替换为正确的名称
df_2c.columns = correct_columns

In [4]:
# 转换前六列为浮点数
for col in correct_columns[:-1]:
    df_2c[col] = df_2c[col].astype(float)

# 转换标签列：将字节字符串解码为普通字符串，再映射为0/1
df_2c['class'] = df_2c['class'].str.decode('utf-8')
df_2c['class'] = df_2c['class'].map({
    'NO': 0,    # 正常
    'AB': 1,    # 异常
    'DH': 1,    # 异常（椎间盘突出）
    'SL': 1     # 异常（脊椎滑脱）
})

# 删除可能存在的缺失值（如有）
df_2c = df_2c.dropna()

In [5]:
print("修复后的列名:", df_2c.columns.tolist())
print("数据类型:\n", df_2c.dtypes)
print("\n前五行数据:\n", df_2c.head())

修复后的列名: ['pelvic_incidence', 'pelvic_tilt', 'lumbar_lordosis_angle', 'sacral_slope', 'pelvic_radius', 'degree_spondylolisthesis', 'class']
数据类型:
 pelvic_incidence            float64
pelvic_tilt                 float64
lumbar_lordosis_angle       float64
sacral_slope                float64
pelvic_radius               float64
degree_spondylolisthesis    float64
class                       float64
dtype: object

前五行数据:
 Empty DataFrame
Columns: [pelvic_incidence, pelvic_tilt, lumbar_lordosis_angle, sacral_slope, pelvic_radius, degree_spondylolisthesis, class]
Index: []
