In [1]:
import pandas as pd
df = pd.read_parquet('C:/Users/86138/Tigercut/midterm/test_plus.parquet')

# 数据预处理
# 转换需要作为分类变量的列
df['城市'] = df['城市'].astype('category')
df['区域'] = df['区域'].astype('category')
df['板块'] = df['板块'].astype('category')
df = df.drop(columns=['小区名称', '交易时间'])

numeric_features = df.select_dtypes(include=['int32', 'int64', 'float64']).columns.tolist()
categorical_features = df.select_dtypes(include=['category']).columns.tolist()

In [2]:
from joblib import load
import pandas as pd

def load_and_predict(model_name, new_data):
    """
    加载指定模型并进行预测
    
    参数:
        model_name: 模型名称 ('Linear Regression', 'Lasso', 'Ridge', 'ElasticNet')
        new_data: 新数据 (DataFrame格式，与训练数据相同的特征)
    
    返回:
        预测结果 (numpy数组)
    """
    # 构造文件名
    filename = model_name.lower().replace(" ", "_") + '.joblib'
    model_path = f'C:/Users/86138/Tigercut/midterm/hypermodels_interaction_only/{filename}'
    
    # 加载模型
    try:
        model = load(model_path)
    except FileNotFoundError:
        raise ValueError(f"模型 {model_name} 未找到，请检查模型名称或路径")
    
    # 进行预测
    predictions = model.predict(new_data)
    return predictions

In [3]:
models = ['Linear Regression', 'Lasso', 'Ridge', 'ElasticNet']
final_predictions = {}
for model_name in models:
    preds = load_and_predict(model_name, df)
    final_predictions[model_name] = preds

In [4]:
# 转换为DataFrame查看结果
predictions_df = pd.DataFrame(final_predictions)
print(predictions_df)

       Linear Regression         Lasso         Ridge    ElasticNet
0             12362606.0  1.241837e+07  1.236850e+07  1.141424e+07
1              7728920.0  7.703361e+06  7.709564e+06  7.646691e+06
2              3509408.0  3.515680e+06  3.514141e+06  3.532648e+06
3              2761728.0  2.761994e+06  2.765250e+06  2.807023e+06
4              5530936.0  5.513074e+06  5.501451e+06  4.866905e+06
...                  ...           ...           ...           ...
14781           435386.0  4.080046e+05  4.170134e+05  3.427799e+05
14782           230900.0  2.488930e+05  2.464353e+05  8.994881e+05
14783           968302.0  9.641425e+05  9.678584e+05  1.234309e+06
14784          1175710.0  1.167828e+06  1.172378e+06  1.438294e+06
14785           571766.0  5.879302e+05  5.854081e+05  1.234040e+06

[14786 rows x 4 columns]


In [5]:
# 分别保存为四个文件，预测列统一命名为Price
for model in predictions_df.columns:
    predictions_df[[model]].rename(columns={model: 'Price'}).to_csv(
        f'C:/Users/86138/Tigercut/midterm/hyperprediction_interaction_only/{model}.csv', 
        index_label='ID'
    )