In [1]:
import pandas as pd
import numpy as np
import pandas as pd
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
import json
import math

# 读取处理后的数据集
X_train = pd.read_csv('data/processed/X_train.csv')
y_train = pd.read_csv('data/processed/y_train.csv')
X_valid = pd.read_csv('data/processed/X_valid.csv')
y_valid = pd.read_csv('data/processed/y_valid.csv') 
X_train_full = pd.read_csv('data/processed/X_train_full.csv') 
y_train_full = pd.read_csv('data/processed/y_train_full.csv') 
X_test = pd.read_csv('data/processed/X_test.csv')

print('数据集读取完成')
print(f'训练集大小: {X_train.shape}')
print(f'验证集大小: {X_valid.shape}')
print(f'全量训练集大小: {X_train_full.shape}')
print(f'测试集大小: {X_test.shape}')


数据集读取完成
训练集大小: (20000, 68)
验证集大小: (5000, 68)
全量训练集大小: (25000, 68)
测试集大小: (10000, 68)


In [2]:
# 从JSON文件读取列名配置
with open('./data/columns.json', 'r') as f:
    columns_dict = json.load(f)

# 从字典中读取列名
del_cols = columns_dict['del_cols']
text_cols = columns_dict['text_cols'] 
date_cols = columns_dict['date_cols']
numeric_cols = columns_dict['numeric_cols']
log_cols = columns_dict['log_cols']
root_cols = columns_dict['root_cols']
categorical_cols = columns_dict['categorical_cols']

# 定义变换列
cat_nu_cols = ["manufactured", "curb_weight", "power", "engine_cap", "no_of_owners", "depreciation", "coe", "road_tax", "dereg_value", "mileage", "omv", "arf", "make_target_encoded", "-", "almost new car", "coe car", "consignment car", "direct owner sale", "electric cars", "hybrid cars", "imported used vehicle", "low mileage car", "opc car", "parf car", "premium ad car", "rare & exotic", "sgcarmart warranty cars", "sta evaluated car", "vintage cars", "type_of_vehicle_bus/mini bus", "type_of_vehicle_hatchback", "type_of_vehicle_luxury sedan", "type_of_vehicle_mid-sized sedan", "type_of_vehicle_mpv", "type_of_vehicle_others", "type_of_vehicle_sports car", "type_of_vehicle_stationwagon", "type_of_vehicle_suv", "type_of_vehicle_truck", "type_of_vehicle_van", "fuel_type_diesel", "fuel_type_diesel-electric", "fuel_type_electric", "fuel_type_petrol", "fuel_type_petrol-electric", "fuel_type_nan", "transmission_manual", "year", "month"]
cat_log_cols = ["manufactured", "curb_weight", "power_log", "engine_cap_log", "depreciation_log", "coe", "road_tax_log", "dereg_value_log", "mileage_log", "omv_log", "arf_log", "make_target_encoded", "-", "almost new car", "coe car", "consignment car", "direct owner sale", "electric cars", "hybrid cars", "imported used vehicle", "low mileage car", "opc car", "parf car", "premium ad car", "rare & exotic", "sgcarmart warranty cars", "sta evaluated car", "vintage cars", "type_of_vehicle_bus/mini bus", "type_of_vehicle_hatchback", "type_of_vehicle_luxury sedan", "type_of_vehicle_mid-sized sedan", "type_of_vehicle_mpv", "type_of_vehicle_others", "type_of_vehicle_sports car", "type_of_vehicle_stationwagon", "type_of_vehicle_suv", "type_of_vehicle_truck", "type_of_vehicle_van", "fuel_type_diesel", "fuel_type_diesel-electric", "fuel_type_electric", "fuel_type_petrol", "fuel_type_petrol-electric", "fuel_type_nan", "transmission_manual", "year", "month"]
cat_root_cols = ["manufactured", "curb_weight", "power_root", "engine_cap_root", "depreciation_root", "coe", "road_tax_root", "dereg_value_root", "mileage_root", "omv_root", "arf_root", "make_target_encoded", "-", "almost new car", "coe car", "consignment car", "direct owner sale", "electric cars", "hybrid cars", "imported used vehicle", "low mileage car", "opc car", "parf car", "premium ad car", "rare & exotic", "sgcarmart warranty cars", "sta evaluated car", "vintage cars", "type_of_vehicle_bus/mini bus", "type_of_vehicle_hatchback", "type_of_vehicle_luxury sedan", "type_of_vehicle_mid-sized sedan", "type_of_vehicle_mpv", "type_of_vehicle_others", "type_of_vehicle_sports car", "type_of_vehicle_stationwagon", "type_of_vehicle_suv", "type_of_vehicle_truck", "type_of_vehicle_van", "fuel_type_diesel", "fuel_type_diesel-electric", "fuel_type_electric", "fuel_type_petrol", "fuel_type_petrol-electric", "fuel_type_nan", "transmission_manual", "year", "month"]

## Baseline

In [3]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset

class MLPModel(nn.Module):
    def __init__(self, input_dim):
        super(MLPModel, self).__init__()
        self.layer1 = nn.Linear(input_dim, 128)
        self.layer2 = nn.Linear(128, 64)
        self.layer3 = nn.Linear(64, 32)
        self.output_layer = nn.Linear(32, 1)
        self.relu = nn.ReLU()
    
    def forward(self, x):
        x = self.relu(self.layer1(x))
        x = self.relu(self.layer2(x))
        x = self.relu(self.layer3(x))
        x = self.output_layer(x)
        return x

In [19]:
def train_network(model, cat_nu_cols, X_train, y_train, X_valid, y_valid, lr=0.001):
    # 损失函数和优化器
    criterion = nn.MSELoss()
    optimizer = optim.Adam(model.parameters(), lr=lr)

    # 数据转换为torch.Tensor
    X_train_tensor = torch.tensor(X_train[cat_nu_cols].values, dtype=torch.float32)
    y_train_tensor = torch.tensor(y_train.values, dtype=torch.float32).view(-1, 1)
    train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
    train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
    X_valid_tensor = torch.tensor(X_valid[cat_nu_cols].values, dtype=torch.float32)
    y_valid_tensor = torch.tensor(y_valid.values, dtype=torch.float32).view(-1, 1)

    epochs = 50
    for epoch in range(epochs):
        model.train()
        for inputs, targets in train_loader:
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, targets)
            loss.backward()
            optimizer.step()

        print(f'Epoch {epoch+1}/{epochs}, Loss: {loss.item()}')

        model.eval()
    
        # 关闭梯度计算
        with torch.no_grad():
            y_test_pred_mlp = model(X_valid_tensor)
    
        mse_test_mlp = mean_squared_error(y_valid_tensor.numpy(), y_test_pred_mlp.numpy())
        rmse_test_mlp = np.sqrt(mse_test_mlp)
        r2_test_mlp = r2_score(y_valid_tensor.numpy(), y_test_pred_mlp.numpy())
    
        #print(f'Network Valid Mean Squared Error: {mse_test_mlp}')
        print(f'Network Valid Root Mean Squared Error: {rmse_test_mlp}')
        #print(f'Network Valid R² Score: {r2_test_mlp}')
        
input_dim = len(cat_nu_cols)
print(input_dim)
model = MLPModel(input_dim)
train_network(model, cat_nu_cols, X_train, y_train, X_valid, y_valid)

49
Epoch 1/50, Loss: 607748864.0
Network Valid Root Mean Squared Error: 48295.9453125
Epoch 2/50, Loss: 741855424.0
Network Valid Root Mean Squared Error: 46138.24609375
Epoch 3/50, Loss: 463487040.0
Network Valid Root Mean Squared Error: 45090.375
Epoch 4/50, Loss: 850344320.0
Network Valid Root Mean Squared Error: 42757.08984375
Epoch 5/50, Loss: 890156224.0
Network Valid Root Mean Squared Error: 39826.515625
Epoch 6/50, Loss: 344895392.0
Network Valid Root Mean Squared Error: 38284.546875
Epoch 7/50, Loss: 15711396864.0
Network Valid Root Mean Squared Error: 36710.96875
Epoch 8/50, Loss: 2077256192.0
Network Valid Root Mean Squared Error: 38485.1484375
Epoch 9/50, Loss: 446072128.0
Network Valid Root Mean Squared Error: 34490.77734375
Epoch 10/50, Loss: 623417728.0
Network Valid Root Mean Squared Error: 34954.97265625
Epoch 11/50, Loss: 1538889472.0
Network Valid Root Mean Squared Error: 32575.796875
Epoch 12/50, Loss: 311087712.0
Network Valid Root Mean Squared Error: 33021.4609375

## Other NN Models

In [14]:
input_dim = len(cat_nu_cols)
print(input_dim)
model = MLPModel(input_dim)
train_network(model, cat_nu_cols, X_train, y_train, X_valid, y_valid, 0.0005)

49
Epoch 1/50, Loss: 1244009984.0
Network Valid Root Mean Squared Error: 50034.6640625
Epoch 2/50, Loss: 695280128.0
Network Valid Root Mean Squared Error: 48275.3125
Epoch 3/50, Loss: 709139904.0
Network Valid Root Mean Squared Error: 45900.26171875
Epoch 4/50, Loss: 342665216.0
Network Valid Root Mean Squared Error: 47031.33203125
Epoch 5/50, Loss: 2849889280.0
Network Valid Root Mean Squared Error: 44549.375
Epoch 6/50, Loss: 1426520448.0
Network Valid Root Mean Squared Error: 45747.2734375
Epoch 7/50, Loss: 401597376.0
Network Valid Root Mean Squared Error: 41804.140625
Epoch 8/50, Loss: 377034368.0
Network Valid Root Mean Squared Error: 41713.91796875
Epoch 9/50, Loss: 263914176.0
Network Valid Root Mean Squared Error: 39354.09765625
Epoch 10/50, Loss: 267312448.0
Network Valid Root Mean Squared Error: 38761.703125
Epoch 11/50, Loss: 115367080.0
Network Valid Root Mean Squared Error: 39302.4453125
Epoch 12/50, Loss: 1177873920.0
Network Valid Root Mean Squared Error: 37432.4648437

In [8]:
class DeeperMLPModel(nn.Module):
    def __init__(self, input_dim):
        super(DeeperMLPModel, self).__init__()
        # Increasing the depth with more layers
        self.layer1 = nn.Linear(input_dim, 256)
        self.layer2 = nn.Linear(256, 128)
        self.layer3 = nn.Linear(128, 128)
        self.layer4 = nn.Linear(128, 64)
        self.layer5 = nn.Linear(64, 64)
        self.layer6 = nn.Linear(64, 32)
        self.layer7 = nn.Linear(32, 16)
        self.output_layer = nn.Linear(16, 1)
        
        # ReLU activation
        self.relu = nn.ReLU()
        
        # Initializing weights with Kaiming initialization suitable for ReLU
        self._initialize_weights()

    def forward(self, x):
        x = self.relu(self.layer1(x))
        x = self.relu(self.layer2(x))
        x = self.relu(self.layer3(x))
        x = self.relu(self.layer4(x))
        x = self.relu(self.layer5(x))
        x = self.relu(self.layer6(x))
        x = self.relu(self.layer7(x))
        x = self.output_layer(x)
        return x
    
    def _initialize_weights(self):
        # Apply Kaiming He initialization to all linear layers in the model
        for m in self.modules():
            if isinstance(m, nn.Linear):
                nn.init.kaiming_normal_(m.weight, mode='fan_in', nonlinearity='relu')
                if m.bias is not None:
                    m.bias.data.fill_(0.01)

input_dim = len(cat_nu_cols)
print(input_dim)
model = DeeperMLPModel(input_dim)
train_network(model, cat_nu_cols, X_train, y_train, X_valid, y_valid)

49
Epoch 1/50, Loss: 238669056.0
Network Valid Root Mean Squared Error: 46417.55859375
Epoch 2/50, Loss: 8387710464.0
Network Valid Root Mean Squared Error: 45679.55078125
Epoch 3/50, Loss: 3386553856.0
Network Valid Root Mean Squared Error: 41511.38671875
Epoch 4/50, Loss: 605278016.0
Network Valid Root Mean Squared Error: 36053.1796875
Epoch 5/50, Loss: 648258112.0
Network Valid Root Mean Squared Error: 35183.30078125
Epoch 6/50, Loss: 464052448.0
Network Valid Root Mean Squared Error: 34837.453125
Epoch 7/50, Loss: 494918464.0
Network Valid Root Mean Squared Error: 35201.1015625
Epoch 8/50, Loss: 918542464.0
Network Valid Root Mean Squared Error: 35367.4765625
Epoch 9/50, Loss: 726468224.0
Network Valid Root Mean Squared Error: 31921.751953125
Epoch 10/50, Loss: 749561856.0
Network Valid Root Mean Squared Error: 32754.83984375
Epoch 11/50, Loss: 436575136.0
Network Valid Root Mean Squared Error: 34722.421875
Epoch 12/50, Loss: 1458785152.0
Network Valid Root Mean Squared Error: 4341

In [15]:
input_dim = len(cat_nu_cols)
print(input_dim)
model = DeeperMLPModel(input_dim)
train_network(model, cat_nu_cols, X_train, y_train, X_valid, y_valid, 0.0001)

49
Epoch 1/50, Loss: 660415808.0
Network Valid Root Mean Squared Error: 47059.23046875
Epoch 2/50, Loss: 405663488.0
Network Valid Root Mean Squared Error: 42398.02734375
Epoch 3/50, Loss: 709602240.0
Network Valid Root Mean Squared Error: 41479.63671875
Epoch 4/50, Loss: 73629872.0
Network Valid Root Mean Squared Error: 40655.3984375
Epoch 5/50, Loss: 934678592.0
Network Valid Root Mean Squared Error: 34870.49609375
Epoch 6/50, Loss: 263358976.0
Network Valid Root Mean Squared Error: 34588.28125
Epoch 7/50, Loss: 713655104.0
Network Valid Root Mean Squared Error: 33697.5546875
Epoch 8/50, Loss: 279689536.0
Network Valid Root Mean Squared Error: 37912.296875
Epoch 9/50, Loss: 274044672.0
Network Valid Root Mean Squared Error: 31179.0234375
Epoch 10/50, Loss: 193827856.0
Network Valid Root Mean Squared Error: 31457.990234375
Epoch 11/50, Loss: 1256726656.0
Network Valid Root Mean Squared Error: 30391.701171875
Epoch 12/50, Loss: 488639616.0
Network Valid Root Mean Squared Error: 31082.9

In [7]:
class DeepResidualMLP(nn.Module):
    def __init__(self, input_dim):
        super(DeepResidualMLP, self).__init__()
        self.layer1 = nn.Linear(input_dim, 256)
        self.layer2 = nn.Linear(256, 256)
        self.layer3 = nn.Linear(256, 256)
        self.layer4 = nn.Linear(256, 256)
        self.layer5 = nn.Linear(256, 256)
        self.output_layer = nn.Linear(256, 1)
        
        # ReLU activation
        self.relu = nn.ReLU()
        
        # Initializing weights with He initialization suitable for ReLU
        for m in self.modules():
            if isinstance(m, nn.Linear):
                nn.init.kaiming_normal_(m.weight, mode='fan_in', nonlinearity='relu')

    def forward(self, x):
        identity = x
        x = self.relu(self.layer1(x))
        
        # Residual block 1
        out = self.relu(self.layer2(x))
        x = out + x  # Changed from 'out += x' to 'x = out + x'
    
        # Residual block 2
        out = self.relu(self.layer3(x))
        x = out + x  # Changed from 'x += out' to 'x = out + x'
    
        # Residual block 3
        out = self.relu(self.layer4(x))
        x = out + x  # Changed from 'out += x' to 'x = out + x'
    
        # Residual block 4
        out = self.relu(self.layer5(x))
        x = out + x  # Changed from 'x += out' to 'x = out + x'
    
        x = self.output_layer(x)
        return x


input_dim = len(cat_nu_cols)
print(input_dim)
model = DeepResidualMLP(input_dim)
train_network(model, cat_nu_cols, X_train, y_train, X_valid, y_valid)

49
Epoch 1/50, Loss: 8230856704.0
Network Valid Root Mean Squared Error: 152490.828125
Epoch 2/50, Loss: 448840832.0
Network Valid Root Mean Squared Error: 41769.66015625
Epoch 3/50, Loss: 872214592.0
Network Valid Root Mean Squared Error: 37724.640625
Epoch 4/50, Loss: 654452416.0
Network Valid Root Mean Squared Error: 43114.01953125
Epoch 5/50, Loss: 1007332608.0
Network Valid Root Mean Squared Error: 36842.6171875
Epoch 6/50, Loss: 824398272.0
Network Valid Root Mean Squared Error: 38666.828125
Epoch 7/50, Loss: 431981120.0
Network Valid Root Mean Squared Error: 35677.109375
Epoch 8/50, Loss: 1682814720.0
Network Valid Root Mean Squared Error: 32295.369140625
Epoch 9/50, Loss: 294976576.0
Network Valid Root Mean Squared Error: 32825.45703125
Epoch 10/50, Loss: 347056000.0
Network Valid Root Mean Squared Error: 31785.64453125
Epoch 11/50, Loss: 241114240.0
Network Valid Root Mean Squared Error: 35083.66015625
Epoch 12/50, Loss: 132194160.0
Network Valid Root Mean Squared Error: 35768

In [16]:
input_dim = len(cat_nu_cols)
print(input_dim)
model = DeepResidualMLP(input_dim)
train_network(model, cat_nu_cols, X_train, y_train, X_valid, y_valid, 0.0001)

49
Epoch 1/50, Loss: 6451342848.0
Network Valid Root Mean Squared Error: 48297.8515625
Epoch 2/50, Loss: 61067698176.0
Network Valid Root Mean Squared Error: 74393.5703125
Epoch 3/50, Loss: 3496197376.0
Network Valid Root Mean Squared Error: 40046.98828125
Epoch 4/50, Loss: 184934688.0
Network Valid Root Mean Squared Error: 40290.625
Epoch 5/50, Loss: 2120498688.0
Network Valid Root Mean Squared Error: 39415.9375
Epoch 6/50, Loss: 398072256.0
Network Valid Root Mean Squared Error: 37204.62890625
Epoch 7/50, Loss: 5739028992.0
Network Valid Root Mean Squared Error: 83277.796875
Epoch 8/50, Loss: 957508224.0
Network Valid Root Mean Squared Error: 43278.57421875
Epoch 9/50, Loss: 1395768320.0
Network Valid Root Mean Squared Error: 33590.7734375
Epoch 10/50, Loss: 2252508672.0
Network Valid Root Mean Squared Error: 34651.39453125
Epoch 11/50, Loss: 7398542848.0
Network Valid Root Mean Squared Error: 87909.53125
Epoch 12/50, Loss: 1189833984.0
Network Valid Root Mean Squared Error: 34843.48

In [None]:
class DeepResidualMLP(nn.Module):
    def __init__(self, input_dim):
        super(DeepResidualMLP, self).__init__()
        self.layer1 = nn.Linear(input_dim, 256)
        self.layer2 = nn.Linear(256, 256)
        self.layer3 = nn.Linear(256, 256)
        self.layer4 = nn.Linear(256, 256)
        self.layer5 = nn.Linear(256, 256)
        self.output_layer = nn.Linear(256, 1)
        
        # ReLU activation
        self.relu = nn.ReLU()
        
        # Initializing weights with He initialization suitable for ReLU
        for m in self.modules():
            if isinstance(m, nn.Linear):
                nn.init.kaiming_normal_(m.weight, mode='fan_in', nonlinearity='relu')

    def forward(self, x):
        # Input pass
        identity = x
        x = self.relu(self.layer1(x))
        
        # Residual block 1
        out = self.relu(self.layer2(x))
        out += x  # Adding input after the block
        
        # Residual block 2
        x = self.relu(self.layer3(out))
        x += out  # Adding input from the previous block
        
        # Residual block 3
        out = self.relu(self.layer4(x))
        out += x  # Adding input from the previous block
        
        # Residual block 4
        x = self.relu(self.layer5(out))
        x += out  # Adding input from the previous block

        # Output pass
        x = self.output_layer(x)
        return x

input_dim = len(cat_nu_cols)
print(input_dim)
model = DeepResidualMLP(input_dim)
train_network(model, cat_nu_cols, X_train, y_train, X_valid, y_valid)

In [17]:
input_dim = len(cat_nu_cols)
print(input_dim)
model = DeepResidualMLP(input_dim)
train_network(model, cat_nu_cols, X_train, y_train, X_valid, y_valid, 0.0001)

49
Epoch 1/50, Loss: 11512886272.0
Network Valid Root Mean Squared Error: 58903.44140625
Epoch 2/50, Loss: 4610254336.0
Network Valid Root Mean Squared Error: 59210.67578125
Epoch 3/50, Loss: 1613288704.0
Network Valid Root Mean Squared Error: 41686.02734375
Epoch 4/50, Loss: 1370993024.0
Network Valid Root Mean Squared Error: 36505.40625
Epoch 5/50, Loss: 782782016.0
Network Valid Root Mean Squared Error: 35099.46875
Epoch 6/50, Loss: 1310430592.0
Network Valid Root Mean Squared Error: 36917.71875
Epoch 7/50, Loss: 1279900288.0
Network Valid Root Mean Squared Error: 36712.07421875
Epoch 8/50, Loss: 553649024.0
Network Valid Root Mean Squared Error: 36838.90625
Epoch 9/50, Loss: 1977183104.0
Network Valid Root Mean Squared Error: 35165.53515625
Epoch 10/50, Loss: 2122180992.0
Network Valid Root Mean Squared Error: 35312.19140625
Epoch 11/50, Loss: 5941832704.0
Network Valid Root Mean Squared Error: 45682.4375
Epoch 12/50, Loss: 782708736.0
Network Valid Root Mean Squared Error: 33787.5

In [10]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class Attention(nn.Module):
    def __init__(self, feature_dim):
        super(Attention, self).__init__()
        self.feature_dim = feature_dim
        
        self.key_layer = nn.Linear(feature_dim, feature_dim, bias=False)
        self.query_layer = nn.Linear(feature_dim, feature_dim, bias=False)

    def forward(self, x):
        query = self.query_layer(x).unsqueeze(1)  # Adding batch dimension
        key = self.key_layer(x).unsqueeze(-1)  # Adding an extra dimension for bmm
        
        # Compute attention scores and apply softmax
        scores = torch.bmm(query, key)  # Should work as both are 3D now
        weights = F.softmax(scores, dim=-1)
        
        # Apply weights to the original input features, using batch matrix multiplication
        attended = torch.bmm(weights, x.unsqueeze(1))  # x also needs to be 3D
        return attended.squeeze(1)  # Remove the extra dimension to match expected output shape





class DeepResidualMLPWithAttention(nn.Module):
    def __init__(self, input_dim):
        super(DeepResidualMLPWithAttention, self).__init__()
        self.layer1 = nn.Linear(input_dim, 256)
        self.layer2 = nn.Linear(256, 256)
        self.layer3 = nn.Linear(256, 256)
        self.layer4 = nn.Linear(256, 256)
        self.layer5 = nn.Linear(256, 256)
        self.attention = Attention(256)  # Attention layer after layer5
        self.output_layer = nn.Linear(256, 1)
        
        self.relu = nn.ReLU()
        
        # Initializing weights
        for m in self.modules():
            if isinstance(m, nn.Linear):
                nn.init.kaiming_normal_(m.weight, mode='fan_in', nonlinearity='relu')

    def forward(self, x):
        identity = x
        x = self.relu(self.layer1(x))
        
        out = self.relu(self.layer2(x))
        out = out + x  # Use out-of-place operation

        x = self.relu(self.layer3(out))
        x = x + out  # Use out-of-place operation

        out = self.relu(self.layer4(x))
        out = out + x  # Use out-of-place operation

        x = self.relu(self.layer5(out))
        x = x + out  # Use out-of-place operation

        # Apply attention
        x = self.attention(x)
        x = self.output_layer(x)
        return x


input_dim = len(cat_nu_cols)
print(input_dim)
model = DeepResidualMLPWithAttention(input_dim)
train_network(model, cat_nu_cols, X_train, y_train, X_valid, y_valid)

49
Epoch 1/50, Loss: 1887785856.0
Network Valid Root Mean Squared Error: 71526.5859375
Epoch 2/50, Loss: 948286592.0
Network Valid Root Mean Squared Error: 46926.1875
Epoch 3/50, Loss: 32050630656.0
Network Valid Root Mean Squared Error: 43911.2890625
Epoch 4/50, Loss: 862836224.0
Network Valid Root Mean Squared Error: 35629.88671875
Epoch 5/50, Loss: 108751912.0
Network Valid Root Mean Squared Error: 35285.72265625
Epoch 6/50, Loss: 295243456.0
Network Valid Root Mean Squared Error: 39063.46484375
Epoch 7/50, Loss: 822696768.0
Network Valid Root Mean Squared Error: 33228.5703125
Epoch 8/50, Loss: 201165728.0
Network Valid Root Mean Squared Error: 34531.9375
Epoch 9/50, Loss: 183127696.0
Network Valid Root Mean Squared Error: 31048.7734375
Epoch 10/50, Loss: 1224703360.0
Network Valid Root Mean Squared Error: 36200.25
Epoch 11/50, Loss: 1976968576.0
Network Valid Root Mean Squared Error: 31735.328125
Epoch 12/50, Loss: 86958320.0
Network Valid Root Mean Squared Error: 30018.66015625
Ep

In [18]:
input_dim = len(cat_nu_cols)
print(input_dim)
model = DeepResidualMLPWithAttention(input_dim)
train_network(model, cat_nu_cols, X_train, y_train, X_valid, y_valid, 0.0001)

49
Epoch 1/50, Loss: 691887872.0
Network Valid Root Mean Squared Error: 45991.30078125
Epoch 2/50, Loss: 1080788992.0
Network Valid Root Mean Squared Error: 43969.65625
Epoch 3/50, Loss: 861395584.0
Network Valid Root Mean Squared Error: 45759.296875
Epoch 4/50, Loss: 3304551680.0
Network Valid Root Mean Squared Error: 38121.75
Epoch 5/50, Loss: 1366723968.0
Network Valid Root Mean Squared Error: 43957.43359375
Epoch 6/50, Loss: 640371520.0
Network Valid Root Mean Squared Error: 37139.93359375
Epoch 7/50, Loss: 785327872.0
Network Valid Root Mean Squared Error: 34948.69140625
Epoch 8/50, Loss: 7401106944.0
Network Valid Root Mean Squared Error: 59052.0
Epoch 9/50, Loss: 2596687360.0
Network Valid Root Mean Squared Error: 35894.109375
Epoch 10/50, Loss: 316112736.0
Network Valid Root Mean Squared Error: 32746.7734375
Epoch 11/50, Loss: 1351611648.0
Network Valid Root Mean Squared Error: 33017.90234375
Epoch 12/50, Loss: 617819456.0
Network Valid Root Mean Squared Error: 34227.5078125
Ep

## Prediction on test set

In [20]:
# 选择表现最好的模型进行最终训练和预测
best_model = DeeperMLPModel(input_dim)

print("使用全量数据训练最终模型...")
# 转换数据为tensor
X_train_full_tensor = torch.tensor(X_train_full[cat_nu_cols].values, dtype=torch.float32)
y_train_full_tensor = torch.tensor(y_train_full.values, dtype=torch.float32).view(-1, 1)
train_dataset = TensorDataset(X_train_full_tensor, y_train_full_tensor)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)

# 训练模型
criterion = nn.MSELoss()
optimizer = optim.Adam(best_model.parameters(), lr=0.001)

epochs = 50
for epoch in range(epochs):
    best_model.train()
    for inputs, targets in train_loader:
        optimizer.zero_grad()
        outputs = best_model(inputs)
        loss = criterion(outputs, targets)
        loss.backward()
        optimizer.step()
    print(f'Epoch {epoch+1}/{epochs}, Loss: {loss.item()}')

print("生成测试集预测结果...")
best_model.eval()
X_test_tensor = torch.tensor(X_test[cat_nu_cols].values, dtype=torch.float32)
with torch.no_grad():
    test_predictions = best_model(X_test_tensor).numpy()

# 创建预测结果DataFrame
predictions_df = pd.DataFrame({
    'Id': range(len(test_predictions)),
    'Predicted': test_predictions.flatten()
})

# 保存预测结果
predictions_df.to_csv('data/predictions.csv', index=False)
print("预测结果已保存到 data/predictions.csv")

使用全量数据训练最终模型...
Epoch 1/50, Loss: 601384704.0
Epoch 2/50, Loss: 344548352.0
Epoch 3/50, Loss: 228334368.0
Epoch 4/50, Loss: 363200256.0
Epoch 5/50, Loss: 80958416.0
Epoch 6/50, Loss: 84301576.0
Epoch 7/50, Loss: 130191664.0
Epoch 8/50, Loss: 75877600.0
Epoch 9/50, Loss: 149877904.0
Epoch 10/50, Loss: 21615413248.0
Epoch 11/50, Loss: 247241296.0
Epoch 12/50, Loss: 24582738.0
Epoch 13/50, Loss: 117100784.0
Epoch 14/50, Loss: 1098592384.0
Epoch 15/50, Loss: 281298624.0
Epoch 16/50, Loss: 109165384.0
Epoch 17/50, Loss: 76281576.0
Epoch 18/50, Loss: 153431168.0
Epoch 19/50, Loss: 257027136.0
Epoch 20/50, Loss: 112057608.0
Epoch 21/50, Loss: 499631744.0
Epoch 22/50, Loss: 120539304.0
Epoch 23/50, Loss: 456576672.0
Epoch 24/50, Loss: 703028352.0
Epoch 25/50, Loss: 26557404.0
Epoch 26/50, Loss: 173423232.0
Epoch 27/50, Loss: 1163090304.0
Epoch 28/50, Loss: 152474288.0
Epoch 29/50, Loss: 327823680.0
Epoch 30/50, Loss: 12463853.0
Epoch 31/50, Loss: 217957232.0
Epoch 32/50, Loss: 121490160.0
Epoc