In [1]:
import pandas as pd
df = pd.read_parquet('C:/Users/86138/Tigercut/midterm/test_plus.parquet') # test数据处理同train一致，只需删除剔除outlier部分

# 数据预处理
# 转换需要作为分类变量的列
df['城市'] = df['城市'].astype('category')
df['区域'] = df['区域'].astype('category')
df['板块'] = df['板块'].astype('category')
df = df.drop(columns=['小区名称', '交易时间'])

numeric_features = df.select_dtypes(include=['int32', 'int64', 'float64']).columns.tolist()
categorical_features = df.select_dtypes(include=['category']).columns.tolist()

In [2]:
from joblib import load
import pandas as pd

def load_and_predict(model_name, new_data):
    """
    加载指定模型并进行预测
    
    参数:
        model_name: 模型名称 ('Linear Regression', 'Lasso', 'Ridge', 'ElasticNet')
        new_data: 新数据 (DataFrame格式，与训练数据相同的特征)
    
    返回:
        预测结果 (numpy数组)
    """
    # 构造文件名
    filename = model_name.lower().replace(" ", "_") + '.joblib'
    model_path = f'C:/Users/86138/Tigercut/midterm/hypermodels/{filename}'
    
    # 加载模型
    try:
        model = load(model_path)
    except FileNotFoundError:
        raise ValueError(f"模型 {model_name} 未找到，请检查模型名称或路径")
    
    # 进行预测
    predictions = model.predict(new_data)
    return predictions

In [4]:
models = ['Linear Regression', 'Lasso', 'Ridge', 'ElasticNet']
final_predictions = {}
for model_name in models:
    preds = load_and_predict(model_name, df)
    final_predictions[model_name] = preds

In [5]:
# 转换为DataFrame查看结果
predictions_df = pd.DataFrame(final_predictions)
print(predictions_df)

       Linear Regression         Lasso         Ridge    ElasticNet
0           1.181644e+07  1.188600e+07  1.182642e+07  1.086935e+07
1           7.771765e+06  7.744330e+06  7.752318e+06  7.679109e+06
2           3.596830e+06  3.599243e+06  3.598351e+06  3.610326e+06
3           2.784266e+06  2.784323e+06  2.788179e+06  2.829512e+06
4           5.702421e+06  5.670093e+06  5.665204e+06  5.080749e+06
...                  ...           ...           ...           ...
14781       3.639507e+05  3.269143e+05  3.441707e+05  2.519277e+05
14782       1.905037e+05  2.126832e+05  2.084660e+05  8.874635e+05
14783       9.138846e+05  8.953134e+05  9.153758e+05  1.184562e+06
14784       1.136347e+06  1.111856e+06  1.134343e+06  1.403881e+06
14785       5.385084e+05  5.570283e+05  5.534134e+05  1.224183e+06

[14786 rows x 4 columns]


In [7]:
# 分别保存为四个文件，预测列统一命名为Price
for model in predictions_df.columns:
    predictions_df[[model]].rename(columns={model: 'Price'}).to_csv(
        f'C:/Users/86138/Tigercut/midterm/hyperprediction/{model}.csv', 
        index_label='ID'
    )