## 使用深度学习方法进行建模

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
data=pd.read_csv("dubai_properties.csv")
data.head(2)

Unnamed: 0,Address,Rent,Beds,Baths,Type,Area_in_sqft,Rent_per_sqft,Rent_category,Frequency,Furnishing,Purpose,Posted_date,Age_of_listing_in_days,Location,City,Latitude,Longitude
0,"The Gate Tower 2, The Gate Tower, Shams Gate D...",124000,3,4,Apartment,1785,69.467787,Medium,Yearly,Unfurnished,For Rent,2024-03-07,45,Al Reem Island,Abu Dhabi,24.493598,54.407841
1,"Water's Edge, Yas Island, Abu Dhabi",140000,3,4,Apartment,1422,98.452883,Medium,Yearly,Unfurnished,For Rent,2024-03-08,44,Yas Island,Abu Dhabi,24.494022,54.607372


In [5]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, MinMaxScaler
import torch
import torch.nn as nn
import torch.optim as optim

# 准备特征和目标变量
data.dropna(inplace=True)
X = data.drop('Rent', axis=1)
y = data['Rent']

In [None]:
# 编码分类特征
label_encoders = {}
for column in X.select_dtypes(include=['object']).columns:
    le = LabelEncoder()
    X[column] = le.fit_transform(X[column])
    label_encoders[column] = le
    
	# 归一化数值特征
scaler = MinMaxScaler()
X_scaled = scaler.fit_transform(X)

# 2. 划分数据集
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

# 转换为PyTorch的张量
X_train_tensor = torch.FloatTensor(X_train)
y_train_tensor = torch.FloatTensor(y_train.values)
X_test_tensor = torch.FloatTensor(X_test)
y_test_tensor = torch.FloatTensor(y_test.values)

In [None]:
# 3. 构建神经网络
class RentPredictor(nn.Module):
    def __init__(self, input_dim):
        super(RentPredictor, self).__init__()
        self.layer1 = nn.Linear(input_dim, 64)
        self.layer2 = nn.Linear(64, 32)
        self.layer3 = nn.Linear(32, 1)
    
    def forward(self, x):
        x = torch.relu(self.layer1(x))
        x = torch.relu(self.layer2(x))
        x = self.layer3(x)
        return x

# 将模型放到GPU上运行    
if torch.cuda.is_available():
    device = torch.device("cuda")
else:
    device = torch.device("cpu")


# 实例化模型,将张量也同样放到GPU上
model = RentPredictor(X.shape[1]).to(device)
X_train_tensor = X_train_tensor.to(device)
y_train_tensor = y_train_tensor.to(device)
X_test_tensor = X_test_tensor.to(device)
y_test_tensor = y_test_tensor.to(device)
# 4. 训练模型
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.01)
epochs = 10

for epoch in range(epochs):
    model.train()
    optimizer.zero_grad()
    y_pred = model(X_train_tensor.to(device))
    loss = criterion(y_pred, y_train_tensor.unsqueeze(1).to(device))
    loss.backward()
    optimizer.step()
    
    # 评估模型
    model.eval()
    with torch.no_grad():
        y_val_pred = model(X_test_tensor.to(device))
        val_loss = criterion(y_val_pred, y_test_tensor.unsqueeze(1).to(device))
        print(f'Epoch {epoch+1}/{epochs}, Loss: {loss.item()}, Val Loss: {val_loss.item()}')

# 5. 计算指标
model.eval()
with torch.no_grad():
    y_test_pred = model(X_test_tensor.to(device))
    test_loss = criterion(y_test_pred, y_test_tensor.unsqueeze(1).to(device))
    mse = test_loss.item()  # 这是计算后的MSE，已经是一个浮点数
    rmse = torch.sqrt(test_loss).item()  # 正确计算RMSE
    y_mean = torch.mean(y_test_tensor.unsqueeze(1).to(device))
    ss_total = torch.sum((y_test_tensor.unsqueeze(1).to(device) - y_mean) ** 2)
    ss_res = torch.sum((y_test_tensor.unsqueeze(1).to(device) - y_test_pred) ** 2)
    r2 = 1 - (ss_res / ss_total).item()

# 打印指标
print(f'Test MSE: {mse}')
print(f'Test RMSE: {rmse}')
print(f'Test R²: {r2}')