In [3]:
import  torch
print(torch.cuda.is_available())

True


In [4]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error, r2_score
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader

# ─────────────────────────────────────────────────────────────────────────────
# 0. 检测设备 & cuDNN 调优
# ─────────────────────────────────────────────────────────────────────────────
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f'Using device: {device}')
torch.backends.cudnn.benchmark = True  # 如果输入尺寸固定，可加速卷积

# ─────────────────────────────────────────────────────────────────────────────
# 1. 读取并排序数据
# ─────────────────────────────────────────────────────────────────────────────
df = pd.read_csv('最终干净的数据改.csv')      # 替换为你的文件路径
df = df.sort_values('tradeTime').reset_index(drop=True)

# ─────────────────────────────────────────────────────────────────────────────
# 2. 特征选取 & 归一化
# ─────────────────────────────────────────────────────────────────────────────
feature_cols = [
    'followers','square','livingRoom','drawingRoom','kitchen','bathRoom',
    'buildingType','constructionTime','renovationCondition','buildingStructure',
    'ladderRatio','elevator','fiveYearsProperty','subway','district',
    'communityAverage','distance','Age','floorType','floorHeight',
    'room_count','room_ratio','north_south',
    'Investment in residential real estate development in Beijing',
    'Inflation rate'
]
target_col = ['price_log']

# 提取数据并归一化到 [0,1]
data = df[feature_cols + target_col].values.astype(float)
scaler = MinMaxScaler()
data_scaled = scaler.fit_transform(data)

# ─────────────────────────────────────────────────────────────────────────────
# 3. 构造时序样本（滑动窗口）
# ─────────────────────────────────────────────────────────────────────────────
def create_sequences(data, n_steps):
    X, y = [], []
    for i in range(len(data) - n_steps):
        X.append(data[i:i+n_steps, :-1])      # n_steps × 特征数
        y.append(data[i+n_steps, -1])         # 对应下一步的 price_log
    return np.array(X), np.array(y)

n_steps = 3
X, y = create_sequences(data_scaled, n_steps)
# 现在 X.shape = (样本数, n_steps, 特征数)， y.shape = (样本数,)

# ─────────────────────────────────────────────────────────────────────────────
# 4. 划分训练/验证集
# ─────────────────────────────────────────────────────────────────────────────
split = int(len(X) * 0.8)
X_train, X_val = X[:split], X[split:]
y_train, y_val = y[:split], y[split:]

# ─────────────────────────────────────────────────────────────────────────────
# 5. Dataset & DataLoader（启用 pin_memory 加速 GPU 拷贝）
# ─────────────────────────────────────────────────────────────────────────────
class SeqDataset(Dataset):
    def __init__(self, X, y):
        self.X = torch.from_numpy(X).float()
        self.y = torch.from_numpy(y).float().unsqueeze(1)
    def __len__(self):
        return len(self.X)
    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]

batch_size = 16
train_loader = DataLoader(
    SeqDataset(X_train, y_train),
    batch_size=batch_size, shuffle=True,
    pin_memory=False, num_workers=0
)
val_loader = DataLoader(
    SeqDataset(X_val, y_val),
    batch_size=batch_size, shuffle=False,
    pin_memory=False, num_workers=0
)

Using device: cuda


In [5]:
# ─────────────────────────────────────────────────────────────────────────────
# 6. 定义 LSTMRegressor 模型
# ─────────────────────────────────────────────────────────────────────────────
class LSTMRegressor(nn.Module):
    def __init__(self, input_size, hidden1=64, hidden2=32, dropout=0.2):
        super().__init__()
        self.lstm1 = nn.LSTM(input_size, hidden1, batch_first=True)
        self.dropout1 = nn.Dropout(dropout)
        self.lstm2 = nn.LSTM(hidden1, hidden2, batch_first=True)
        self.dropout2 = nn.Dropout(dropout)
        self.fc = nn.Linear(hidden2, 1)
    def forward(self, x):
        out, _ = self.lstm1(x)      # (batch, seq, hidden1)
        out = self.dropout1(out)
        out, _ = self.lstm2(out)    # (batch, seq, hidden2)
        out = self.dropout2(out)
        out = out[:, -1, :]         # 取最后时间步的隐藏状态
        return self.fc(out)

model = LSTMRegressor(input_size=X.shape[2]).to(device)
criterion = nn.MSELoss().to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)

# ─────────────────────────────────────────────────────────────────────────────
# 7. 训练循环 & 早停
# ─────────────────────────────────────────────────────────────────────────────
if __name__ == '__main__':  # 添加这个部分
    epochs = 20
    best_val_loss = float('inf')
    patience, wait = 10, 0


for epoch in range(1, epochs+1):
    model.train()
    train_losses = []
    for xb, yb in train_loader:
        xb = xb.to(device, non_blocking=True)
        yb = yb.to(device, non_blocking=True)
        optimizer.zero_grad()
        preds = model(xb)
        loss = criterion(preds, yb)
        loss.backward()
        optimizer.step()
        train_losses.append(loss.item())

    model.eval()
    val_losses = []
    with torch.no_grad():
        for xb, yb in val_loader:
            xb = xb.to(device, non_blocking=True)
            yb = yb.to(device, non_blocking=True)
            preds = model(xb)
            val_losses.append(criterion(preds, yb).item())

    train_loss = np.mean(train_losses)
    val_loss = np.mean(val_losses)
    print(f'Epoch {epoch:03d} | Train Loss: {train_loss:.4f} | Val Loss: {val_loss:.4f}')

    if val_loss < best_val_loss:
        best_val_loss = val_loss
        torch.save(model.state_dict(), 'best_model.pth')
        wait = 0
    else:
        wait += 1
        if wait >= patience:
            print("Early stopping. Restoring best model.")
            break

model.load_state_dict(torch.load('best_model.pth'))

# ─────────────────────────────────────────────────────────────────────────────
# 8. 预测 & 逆归一化
# ─────────────────────────────────────────────────────────────────────────────
model.eval()
all_preds = []
with torch.no_grad():
    for xb, _ in val_loader:
        xb = xb.to(device, non_blocking=True)
        all_preds.extend(model(xb).cpu().numpy().flatten())

# 构造 dummy 用于 inverse_transform
dummy_pred = np.zeros((len(all_preds), data.shape[1]))
dummy_pred[:, -1] = all_preds
pred_price_log = scaler.inverse_transform(dummy_pred)[:, -1]

y_flat = y_val.flatten()
dummy_true = np.zeros((len(y_flat), data.shape[1]))
dummy_true[:, -1] = y_flat
true_price_log = scaler.inverse_transform(dummy_true)[:, -1]

# ─────────────────────────────────────────────────────────────────────────────
# 9. 评估：RMSE & R²
# ─────────────────────────────────────────────────────────────────────────────
rmse = np.sqrt(mean_squared_error(true_price_log, pred_price_log))
r2   = r2_score(true_price_log, pred_price_log)
print(f'Validation RMSE (log-price) = {rmse:.4f}')
print(f'Validation R²     (log-price) = {r2:.4f}')


Epoch 001 | Train Loss: 0.0134 | Val Loss: 0.0803
Epoch 002 | Train Loss: 0.0121 | Val Loss: 0.0839
Epoch 003 | Train Loss: 0.0117 | Val Loss: 0.1128
Epoch 004 | Train Loss: 0.0114 | Val Loss: 0.1114
Epoch 005 | Train Loss: 0.0112 | Val Loss: 0.1196
Epoch 006 | Train Loss: 0.0111 | Val Loss: 0.1054
Epoch 007 | Train Loss: 0.0110 | Val Loss: 0.1158
Epoch 008 | Train Loss: 0.0109 | Val Loss: 0.0879
Epoch 009 | Train Loss: 0.0108 | Val Loss: 0.0836
Epoch 010 | Train Loss: 0.0107 | Val Loss: 0.0851
Epoch 011 | Train Loss: 0.0107 | Val Loss: 0.0857
Early stopping. Restoring best model.


  model.load_state_dict(torch.load('best_model.pth'))


Validation RMSE (log-price) = 0.7688
Validation R²     (log-price) = -3.6117
