In [1]:
import pandas as pd
import numpy as np
import pandas as pd
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
import json
import math

# 读取处理后的数据集
X_train = pd.read_csv('data/processed/X_train.csv')
y_train = pd.read_csv('data/processed/y_train.csv')
X_valid = pd.read_csv('data/processed/X_valid.csv')
y_valid = pd.read_csv('data/processed/y_valid.csv') 
X_train_full = pd.read_csv('data/processed/X_train_full.csv') 
y_train_full = pd.read_csv('data/processed/y_train_full.csv') 
X_test = pd.read_csv('data/processed/X_test.csv')

print('数据集读取完成')
print(f'训练集大小: {X_train.shape}')
print(f'验证集大小: {X_valid.shape}')
print(f'全量训练集大小: {X_train_full.shape}')
print(f'测试集大小: {X_test.shape}')


数据集读取完成
训练集大小: (20000, 68)
验证集大小: (5000, 68)
全量训练集大小: (25000, 68)
测试集大小: (10000, 68)


In [2]:
# 从JSON文件读取列名配置
with open('./data/columns.json', 'r') as f:
    columns_dict = json.load(f)

# 从字典中读取列名
del_cols = columns_dict['del_cols']
text_cols = columns_dict['text_cols'] 
date_cols = columns_dict['date_cols']
numeric_cols = columns_dict['numeric_cols']
log_cols = columns_dict['log_cols']
root_cols = columns_dict['root_cols']
categorical_cols = columns_dict['categorical_cols']

# 定义变换列
cat_nu_cols = ["manufactured", "curb_weight", "power", "engine_cap", "no_of_owners", "depreciation", "coe", "road_tax", "dereg_value", "mileage", "omv", "arf", "make_target_encoded", "-", "almost new car", "coe car", "consignment car", "direct owner sale", "electric cars", "hybrid cars", "imported used vehicle", "low mileage car", "opc car", "parf car", "premium ad car", "rare & exotic", "sgcarmart warranty cars", "sta evaluated car", "vintage cars", "type_of_vehicle_bus/mini bus", "type_of_vehicle_hatchback", "type_of_vehicle_luxury sedan", "type_of_vehicle_mid-sized sedan", "type_of_vehicle_mpv", "type_of_vehicle_others", "type_of_vehicle_sports car", "type_of_vehicle_stationwagon", "type_of_vehicle_suv", "type_of_vehicle_truck", "type_of_vehicle_van", "fuel_type_diesel", "fuel_type_diesel-electric", "fuel_type_electric", "fuel_type_petrol", "fuel_type_petrol-electric", "fuel_type_nan", "transmission_manual", "year", "month"]
cat_log_cols = ["manufactured", "curb_weight", "power_log", "engine_cap_log", "depreciation_log", "coe", "road_tax_log", "dereg_value_log", "mileage_log", "omv_log", "arf_log", "make_target_encoded", "-", "almost new car", "coe car", "consignment car", "direct owner sale", "electric cars", "hybrid cars", "imported used vehicle", "low mileage car", "opc car", "parf car", "premium ad car", "rare & exotic", "sgcarmart warranty cars", "sta evaluated car", "vintage cars", "type_of_vehicle_bus/mini bus", "type_of_vehicle_hatchback", "type_of_vehicle_luxury sedan", "type_of_vehicle_mid-sized sedan", "type_of_vehicle_mpv", "type_of_vehicle_others", "type_of_vehicle_sports car", "type_of_vehicle_stationwagon", "type_of_vehicle_suv", "type_of_vehicle_truck", "type_of_vehicle_van", "fuel_type_diesel", "fuel_type_diesel-electric", "fuel_type_electric", "fuel_type_petrol", "fuel_type_petrol-electric", "fuel_type_nan", "transmission_manual", "year", "month"]
cat_root_cols = ["manufactured", "curb_weight", "power_root", "engine_cap_root", "depreciation_root", "coe", "road_tax_root", "dereg_value_root", "mileage_root", "omv_root", "arf_root", "make_target_encoded", "-", "almost new car", "coe car", "consignment car", "direct owner sale", "electric cars", "hybrid cars", "imported used vehicle", "low mileage car", "opc car", "parf car", "premium ad car", "rare & exotic", "sgcarmart warranty cars", "sta evaluated car", "vintage cars", "type_of_vehicle_bus/mini bus", "type_of_vehicle_hatchback", "type_of_vehicle_luxury sedan", "type_of_vehicle_mid-sized sedan", "type_of_vehicle_mpv", "type_of_vehicle_others", "type_of_vehicle_sports car", "type_of_vehicle_stationwagon", "type_of_vehicle_suv", "type_of_vehicle_truck", "type_of_vehicle_van", "fuel_type_diesel", "fuel_type_diesel-electric", "fuel_type_electric", "fuel_type_petrol", "fuel_type_petrol-electric", "fuel_type_nan", "transmission_manual", "year", "month"]

## Baseline

In [3]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import DataLoader, TensorDataset




class MLPModel(nn.Module):
    def __init__(self, input_dim):
        super(MLPModel, self).__init__()
        self.layer1 = nn.Linear(input_dim, 128)
        self.layer2 = nn.Linear(128, 64)
        self.layer3 = nn.Linear(64, 32)
        self.output_layer = nn.Linear(32, 1)
        self.relu = nn.ReLU()
    
    def forward(self, x):
        x = self.relu(self.layer1(x))
        x = self.relu(self.layer2(x))
        x = self.relu(self.layer3(x))
        x = self.output_layer(x)
        return x

In [4]:
def train_network(model, cat_nu_cols, X_train, y_train, X_valid, y_valid, lr=0.001, wd=0):
    # 损失函数和优化器
    criterion = nn.MSELoss()
    optimizer = optim.Adam(model.parameters(), lr=lr, weight_decay=wd)

    # 数据转换为torch.Tensor
    X_train_tensor = torch.tensor(X_train[cat_nu_cols].values, dtype=torch.float32)
    y_train_tensor = torch.tensor(y_train.values, dtype=torch.float32).view(-1, 1)
    train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
    train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
    X_valid_tensor = torch.tensor(X_valid[cat_nu_cols].values, dtype=torch.float32)
    y_valid_tensor = torch.tensor(y_valid.values, dtype=torch.float32).view(-1, 1)

    epochs = 50
    for epoch in range(epochs):
        model.train()
        for inputs, targets in train_loader:
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, targets)
            loss.backward()
            optimizer.step()

        print(f'Epoch {epoch+1}/{epochs}, Loss: {loss.item()}')

        model.eval()
    
        # 关闭梯度计算
        with torch.no_grad():
            y_test_pred_mlp = model(X_valid_tensor)
    
        mse_test_mlp = mean_squared_error(y_valid_tensor.numpy(), y_test_pred_mlp.numpy())
        rmse_test_mlp = np.sqrt(mse_test_mlp)
        r2_test_mlp = r2_score(y_valid_tensor.numpy(), y_test_pred_mlp.numpy())
    
        #print(f'Network Valid Mean Squared Error: {mse_test_mlp}')
        print(f'Network Valid Root Mean Squared Error: {rmse_test_mlp}')
        #print(f'Network Valid R² Score: {r2_test_mlp}')

input_dim = len(cat_nu_cols)
print(input_dim)
        


49


In [5]:

model = MLPModel(input_dim)
train_network(model, cat_nu_cols, X_train, y_train, X_valid, y_valid)

Epoch 1/50, Loss: 909138880.0
Network Valid Root Mean Squared Error: 55713.1640625
Epoch 2/50, Loss: 4657540096.0
Network Valid Root Mean Squared Error: 45509.52734375
Epoch 3/50, Loss: 373871488.0
Network Valid Root Mean Squared Error: 44428.22265625
Epoch 4/50, Loss: 1775585152.0
Network Valid Root Mean Squared Error: 41552.1484375
Epoch 5/50, Loss: 866988800.0
Network Valid Root Mean Squared Error: 39328.64453125
Epoch 6/50, Loss: 1073672064.0
Network Valid Root Mean Squared Error: 37358.9453125
Epoch 7/50, Loss: 477785664.0
Network Valid Root Mean Squared Error: 35961.21875
Epoch 8/50, Loss: 1193552256.0
Network Valid Root Mean Squared Error: 42860.1328125
Epoch 9/50, Loss: 1671794816.0
Network Valid Root Mean Squared Error: 34626.18359375
Epoch 10/50, Loss: 175820560.0
Network Valid Root Mean Squared Error: 34614.51953125
Epoch 11/50, Loss: 774986048.0
Network Valid Root Mean Squared Error: 37556.7109375
Epoch 12/50, Loss: 367050624.0
Network Valid Root Mean Squared Error: 34562.9

## Other NN Models

In [6]:
model = MLPModel(input_dim)
train_network(model, cat_nu_cols, X_train, y_train, X_valid, y_valid, 0.0001)
model = MLPModel(input_dim)
train_network(model, cat_nu_cols, X_train, y_train, X_valid, y_valid, 0.001, 0.01)
model = MLPModel(input_dim)
train_network(model, cat_nu_cols, X_train, y_train, X_valid, y_valid, 0.0001, 0.01)

Epoch 1/50, Loss: 2750364416.0
Network Valid Root Mean Squared Error: 62412.30078125
Epoch 2/50, Loss: 1067208256.0
Network Valid Root Mean Squared Error: 52535.37109375
Epoch 3/50, Loss: 1058689152.0
Network Valid Root Mean Squared Error: 50259.57421875
Epoch 4/50, Loss: 910470784.0
Network Valid Root Mean Squared Error: 50249.546875
Epoch 5/50, Loss: 3934881024.0
Network Valid Root Mean Squared Error: 48449.75390625
Epoch 6/50, Loss: 1239331456.0
Network Valid Root Mean Squared Error: 47809.33203125
Epoch 7/50, Loss: 4001431296.0
Network Valid Root Mean Squared Error: 47625.86328125
Epoch 8/50, Loss: 882428864.0
Network Valid Root Mean Squared Error: 47210.1796875
Epoch 9/50, Loss: 4758539264.0
Network Valid Root Mean Squared Error: 46626.703125
Epoch 10/50, Loss: 676722368.0
Network Valid Root Mean Squared Error: 45546.1953125
Epoch 11/50, Loss: 549311616.0
Network Valid Root Mean Squared Error: 44828.9296875
Epoch 12/50, Loss: 730121536.0
Network Valid Root Mean Squared Error: 4534

In [7]:
class MLPDropoutModel(nn.Module):
    def __init__(self, input_dim, dropout_rate=0.5):
        super(MLPDropoutModel, self).__init__()
        self.layer1 = nn.Linear(input_dim, 128)
        self.layer2 = nn.Linear(128, 64)
        self.layer3 = nn.Linear(64, 32)
        self.output_layer = nn.Linear(32, 1)
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(dropout_rate)
    
    def forward(self, x):
        x = self.relu(self.layer1(x))
        x = self.dropout(x)
        x = self.relu(self.layer2(x))
        x = self.dropout(x)
        x = self.relu(self.layer3(x))
        x = self.dropout(x)
        x = self.output_layer(x)
        return x

In [8]:
model = MLPDropoutModel(input_dim)
train_network(model, cat_nu_cols, X_train, y_train, X_valid, y_valid)
model = MLPDropoutModel(input_dim)
train_network(model, cat_nu_cols, X_train, y_train, X_valid, y_valid, 0.0001)
model = MLPDropoutModel(input_dim)
train_network(model, cat_nu_cols, X_train, y_train, X_valid, y_valid, 0.001, 0.01)
model = MLPDropoutModel(input_dim)
train_network(model, cat_nu_cols, X_train, y_train, X_valid, y_valid, 0.0001, 0.01)

Epoch 1/50, Loss: 5330982912.0
Network Valid Root Mean Squared Error: 52101.5
Epoch 2/50, Loss: 2981260800.0
Network Valid Root Mean Squared Error: 65911.4453125
Epoch 3/50, Loss: 2814116864.0
Network Valid Root Mean Squared Error: 48959.2421875
Epoch 4/50, Loss: 33106421760.0
Network Valid Root Mean Squared Error: 47234.9140625
Epoch 5/50, Loss: 15704512512.0
Network Valid Root Mean Squared Error: 48352.0234375
Epoch 6/50, Loss: 2907944704.0
Network Valid Root Mean Squared Error: 46438.25390625
Epoch 7/50, Loss: 2626076672.0
Network Valid Root Mean Squared Error: 46977.06640625
Epoch 8/50, Loss: 2495718912.0
Network Valid Root Mean Squared Error: 47155.19921875
Epoch 9/50, Loss: 16203672576.0
Network Valid Root Mean Squared Error: 45966.7265625
Epoch 10/50, Loss: 2584260352.0
Network Valid Root Mean Squared Error: 55140.8125
Epoch 11/50, Loss: 3831356672.0
Network Valid Root Mean Squared Error: 46818.7734375
Epoch 12/50, Loss: 1933256576.0
Network Valid Root Mean Squared Error: 52737.

In [9]:
model = MLPDropoutModel(input_dim, 0.1)
train_network(model, cat_nu_cols, X_train, y_train, X_valid, y_valid)
model = MLPDropoutModel(input_dim, 0.1)
train_network(model, cat_nu_cols, X_train, y_train, X_valid, y_valid, 0.0001)
model = MLPDropoutModel(input_dim, 0.1)
train_network(model, cat_nu_cols, X_train, y_train, X_valid, y_valid, 0.001, 0.01)
model = MLPDropoutModel(input_dim, 0.1)
train_network(model, cat_nu_cols, X_train, y_train, X_valid, y_valid, 0.0001, 0.01)

Epoch 1/50, Loss: 3519416832.0
Network Valid Root Mean Squared Error: 51651.75390625
Epoch 2/50, Loss: 1512231808.0
Network Valid Root Mean Squared Error: 57229.30078125
Epoch 3/50, Loss: 889876224.0
Network Valid Root Mean Squared Error: 45154.57421875
Epoch 4/50, Loss: 1039006720.0
Network Valid Root Mean Squared Error: 43056.609375
Epoch 5/50, Loss: 1125871872.0
Network Valid Root Mean Squared Error: 43140.22265625
Epoch 6/50, Loss: 423361728.0
Network Valid Root Mean Squared Error: 44120.52734375
Epoch 7/50, Loss: 451778176.0
Network Valid Root Mean Squared Error: 40029.546875
Epoch 8/50, Loss: 597036032.0
Network Valid Root Mean Squared Error: 38774.36328125
Epoch 9/50, Loss: 505592672.0
Network Valid Root Mean Squared Error: 38355.08203125
Epoch 10/50, Loss: 622230080.0
Network Valid Root Mean Squared Error: 41679.21875
Epoch 11/50, Loss: 1046806784.0
Network Valid Root Mean Squared Error: 35750.41015625
Epoch 12/50, Loss: 445744704.0
Network Valid Root Mean Squared Error: 36490.

In [10]:
class DeeperMLPModel(nn.Module):
    def __init__(self, input_dim):
        super(DeeperMLPModel, self).__init__()
        # Increasing the depth with more layers
        self.layer1 = nn.Linear(input_dim, 256)
        self.layer2 = nn.Linear(256, 128)
        self.layer3 = nn.Linear(128, 128)
        self.layer4 = nn.Linear(128, 64)
        self.layer5 = nn.Linear(64, 64)
        self.layer6 = nn.Linear(64, 32)
        self.layer7 = nn.Linear(32, 16)
        self.output_layer = nn.Linear(16, 1)
        
        # ReLU activation
        self.relu = nn.ReLU()
        
        # Initializing weights with Kaiming initialization suitable for ReLU
        self._initialize_weights()

    def forward(self, x):
        x = self.relu(self.layer1(x))
        x = self.relu(self.layer2(x))
        x = self.relu(self.layer3(x))
        x = self.relu(self.layer4(x))
        x = self.relu(self.layer5(x))
        x = self.relu(self.layer6(x))
        x = self.relu(self.layer7(x))
        x = self.output_layer(x)
        return x
    
    def _initialize_weights(self):
        # Apply Kaiming He initialization to all linear layers in the model
        for m in self.modules():
            if isinstance(m, nn.Linear):
                nn.init.kaiming_normal_(m.weight, mode='fan_in', nonlinearity='relu')
                if m.bias is not None:
                    m.bias.data.fill_(0.01)



In [11]:
model = DeeperMLPModel(input_dim)
train_network(model, cat_nu_cols, X_train, y_train, X_valid, y_valid)
model = DeeperMLPModel(input_dim)
train_network(model, cat_nu_cols, X_train, y_train, X_valid, y_valid, 0.0001)
model = DeeperMLPModel(input_dim)
train_network(model, cat_nu_cols, X_train, y_train, X_valid, y_valid, 0.001, 0.01)
model = DeeperMLPModel(input_dim)
train_network(model, cat_nu_cols, X_train, y_train, X_valid, y_valid, 0.0001, 0.01)

Epoch 1/50, Loss: 5383701504.0
Network Valid Root Mean Squared Error: 56601.2109375
Epoch 2/50, Loss: 542772928.0
Network Valid Root Mean Squared Error: 41310.0234375
Epoch 3/50, Loss: 1512777856.0
Network Valid Root Mean Squared Error: 51131.48828125
Epoch 4/50, Loss: 527186880.0
Network Valid Root Mean Squared Error: 37230.71875
Epoch 5/50, Loss: 223329344.0
Network Valid Root Mean Squared Error: 42457.11328125
Epoch 6/50, Loss: 554405696.0
Network Valid Root Mean Squared Error: 39597.265625
Epoch 7/50, Loss: 603160512.0
Network Valid Root Mean Squared Error: 33219.62890625
Epoch 8/50, Loss: 971457920.0
Network Valid Root Mean Squared Error: 33218.41796875
Epoch 9/50, Loss: 90288624.0
Network Valid Root Mean Squared Error: 33135.875
Epoch 10/50, Loss: 702115328.0
Network Valid Root Mean Squared Error: 31137.04296875
Epoch 11/50, Loss: 103468536.0
Network Valid Root Mean Squared Error: 30015.416015625
Epoch 12/50, Loss: 615935616.0
Network Valid Root Mean Squared Error: 29997.65234375

In [12]:
class DeepResidualMLP(nn.Module):
    def __init__(self, input_dim):
        super(DeepResidualMLP, self).__init__()
        self.layer1 = nn.Linear(input_dim, 256)
        self.layer2 = nn.Linear(256, 256)
        self.layer3 = nn.Linear(256, 256)
        self.layer4 = nn.Linear(256, 256)
        self.layer5 = nn.Linear(256, 256)
        self.output_layer = nn.Linear(256, 1)
        
        # ReLU activation
        self.relu = nn.ReLU()
        
        # Initializing weights with He initialization suitable for ReLU
        for m in self.modules():
            if isinstance(m, nn.Linear):
                nn.init.kaiming_normal_(m.weight, mode='fan_in', nonlinearity='relu')

    def forward(self, x):
        identity = x
        x = self.relu(self.layer1(x))
        
        # Residual block 1
        out = self.relu(self.layer2(x))
        x = out + x  # Changed from 'out += x' to 'x = out + x'
    
        # Residual block 2
        out = self.relu(self.layer3(x))
        x = out + x  # Changed from 'x += out' to 'x = out + x'
    
        # Residual block 3
        out = self.relu(self.layer4(x))
        x = out + x  # Changed from 'out += x' to 'x = out + x'
    
        # Residual block 4
        out = self.relu(self.layer5(x))
        x = out + x  # Changed from 'x += out' to 'x = out + x'
    
        x = self.output_layer(x)
        return x




In [13]:

model = DeepResidualMLP(input_dim)
train_network(model, cat_nu_cols, X_train, y_train, X_valid, y_valid)
model = DeepResidualMLP(input_dim)
train_network(model, cat_nu_cols, X_train, y_train, X_valid, y_valid, 0.0001)
model = DeepResidualMLP(input_dim)
train_network(model, cat_nu_cols, X_train, y_train, X_valid, y_valid, 0.001, 0.1)
model = DeepResidualMLP(input_dim)
train_network(model, cat_nu_cols, X_train, y_train, X_valid, y_valid, 0.0001, 0.1)

Epoch 1/50, Loss: 690575168.0
Network Valid Root Mean Squared Error: 49012.01953125
Epoch 2/50, Loss: 491682240.0
Network Valid Root Mean Squared Error: 53006.0078125
Epoch 3/50, Loss: 10586249216.0
Network Valid Root Mean Squared Error: 42337.30078125
Epoch 4/50, Loss: 2004536832.0
Network Valid Root Mean Squared Error: 51288.37109375
Epoch 5/50, Loss: 455833632.0
Network Valid Root Mean Squared Error: 43786.57421875
Epoch 6/50, Loss: 2152384000.0
Network Valid Root Mean Squared Error: 43097.71875
Epoch 7/50, Loss: 468655424.0
Network Valid Root Mean Squared Error: 33898.625
Epoch 8/50, Loss: 623676480.0
Network Valid Root Mean Squared Error: 40539.4375
Epoch 9/50, Loss: 782007808.0
Network Valid Root Mean Squared Error: 42344.78515625
Epoch 10/50, Loss: 3657537536.0
Network Valid Root Mean Squared Error: 33686.3203125
Epoch 11/50, Loss: 222788064.0
Network Valid Root Mean Squared Error: 33225.90234375
Epoch 12/50, Loss: 186454944.0
Network Valid Root Mean Squared Error: 30410.1738281

In [14]:
class DeepResidualMLP(nn.Module):
    def __init__(self, input_dim):
        super(DeepResidualMLP, self).__init__()
        self.layer1 = nn.Linear(input_dim, 256)
        self.layer2 = nn.Linear(256, 256)
        self.layer3 = nn.Linear(256, 256)
        self.layer4 = nn.Linear(256, 256)
        self.layer5 = nn.Linear(256, 256)
        self.output_layer = nn.Linear(256, 1)
        
        # ReLU activation
        self.relu = nn.ReLU()
        
        # Initializing weights with He initialization suitable for ReLU
        for m in self.modules():
            if isinstance(m, nn.Linear):
                nn.init.kaiming_normal_(m.weight, mode='fan_in', nonlinearity='relu')

    def forward(self, x):
        # Input pass
        identity = x
        x = self.relu(self.layer1(x))
        
        # Residual block 1
        out = self.relu(self.layer2(x))
        out += x  # Adding input after the block
        
        # Residual block 2
        x = self.relu(self.layer3(out))
        x += out  # Adding input from the previous block
        
        # Residual block 3
        out = self.relu(self.layer4(x))
        out += x  # Adding input from the previous block
        
        # Residual block 4
        x = self.relu(self.layer5(out))
        x += out  # Adding input from the previous block

        # Output pass
        x = self.output_layer(x)
        return x

model = DeepResidualMLP(input_dim)
train_network(model, cat_nu_cols, X_train, y_train, X_valid, y_valid)

RuntimeError: one of the variables needed for gradient computation has been modified by an inplace operation: [torch.FloatTensor [32, 256]], which is output 0 of ReluBackward0, is at version 1; expected version 0 instead. Hint: enable anomaly detection to find the operation that failed to compute its gradient, with torch.autograd.set_detect_anomaly(True).

In [None]:
model = DeepResidualMLP(input_dim)
train_network(model, cat_nu_cols, X_train, y_train, X_valid, y_valid)
model = DeepResidualMLP(input_dim)
train_network(model, cat_nu_cols, X_train, y_train, X_valid, y_valid, 0.0001)
model = DeepResidualMLP(input_dim)
train_network(model, cat_nu_cols, X_train, y_train, X_valid, y_valid, 0.001, 0.01)
model = DeepResidualMLP(input_dim)
train_network(model, cat_nu_cols, X_train, y_train, X_valid, y_valid, 0.0001, 0.01)

In [None]:
class Attention(nn.Module):
    def __init__(self, feature_dim):
        super(Attention, self).__init__()
        self.feature_dim = feature_dim
        
        self.key_layer = nn.Linear(feature_dim, feature_dim, bias=False)
        self.query_layer = nn.Linear(feature_dim, feature_dim, bias=False)

    def forward(self, x):
        query = self.query_layer(x).unsqueeze(1)  # Adding batch dimension
        key = self.key_layer(x).unsqueeze(-1)  # Adding an extra dimension for bmm
        
        # Compute attention scores and apply softmax
        scores = torch.bmm(query, key)  # Should work as both are 3D now
        weights = F.softmax(scores, dim=-1)
        
        # Apply weights to the original input features, using batch matrix multiplication
        attended = torch.bmm(weights, x.unsqueeze(1))  # x also needs to be 3D
        return attended.squeeze(1)  # Remove the extra dimension to match expected output shape





class DeepResidualMLPWithAttention(nn.Module):
    def __init__(self, input_dim):
        super(DeepResidualMLPWithAttention, self).__init__()
        self.layer1 = nn.Linear(input_dim, 256)
        self.layer2 = nn.Linear(256, 256)
        self.layer3 = nn.Linear(256, 256)
        self.layer4 = nn.Linear(256, 256)
        self.layer5 = nn.Linear(256, 256)
        self.attention = Attention(256)  # Attention layer after layer5
        self.output_layer = nn.Linear(256, 1)
        
        self.relu = nn.ReLU()
        
        # Initializing weights
        for m in self.modules():
            if isinstance(m, nn.Linear):
                nn.init.kaiming_normal_(m.weight, mode='fan_in', nonlinearity='relu')

    def forward(self, x):
        identity = x
        x = self.relu(self.layer1(x))
        
        out = self.relu(self.layer2(x))
        out = out + x  # Use out-of-place operation

        x = self.relu(self.layer3(out))
        x = x + out  # Use out-of-place operation

        out = self.relu(self.layer4(x))
        out = out + x  # Use out-of-place operation

        x = self.relu(self.layer5(out))
        x = x + out  # Use out-of-place operation

        # Apply attention
        x = self.attention(x)
        x = self.output_layer(x)
        return x



In [None]:
model = DeepResidualMLPWithAttention(input_dim)
train_network(model, cat_nu_cols, X_train, y_train, X_valid, y_valid)
model = DeepResidualMLPWithAttention(input_dim)
train_network(model, cat_nu_cols, X_train, y_train, X_valid, y_valid, 0.0001)
model = DeepResidualMLPWithAttention(input_dim)
train_network(model, cat_nu_cols, X_train, y_train, X_valid, y_valid, 0.001, 0.01)
model = DeepResidualMLPWithAttention(input_dim)
train_network(model, cat_nu_cols, X_train, y_train, X_valid, y_valid, 0.0001, 0.01)

## Prediction on test set

In [None]:
# 选择表现最好的模型进行最终训练和预测
best_model = DeeperMLPModel(input_dim)

print("使用全量数据训练最终模型...")
# 转换数据为tensor
X_train_full_tensor = torch.tensor(X_train_full[cat_nu_cols].values, dtype=torch.float32)
y_train_full_tensor = torch.tensor(y_train_full.values, dtype=torch.float32).view(-1, 1)
train_dataset = TensorDataset(X_train_full_tensor, y_train_full_tensor)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)

# 训练模型
criterion = nn.MSELoss()
optimizer = optim.Adam(best_model.parameters(), lr=0.001)

epochs = 50
for epoch in range(epochs):
    best_model.train()
    for inputs, targets in train_loader:
        optimizer.zero_grad()
        outputs = best_model(inputs)
        loss = criterion(outputs, targets)
        loss.backward()
        optimizer.step()
    print(f'Epoch {epoch+1}/{epochs}, Loss: {loss.item()}')

print("生成测试集预测结果...")
best_model.eval()
X_test_tensor = torch.tensor(X_test[cat_nu_cols].values, dtype=torch.float32)
with torch.no_grad():
    test_predictions = best_model(X_test_tensor).numpy()

# 创建预测结果DataFrame
predictions_df = pd.DataFrame({
    'Id': range(len(test_predictions)),
    'Predicted': test_predictions.flatten()
})

# 保存预测结果
predictions_df.to_csv('data/predictions.csv', index=False)
print("预测结果已保存到 data/predictions.csv")