In [1]:
import pandas as pd
import numpy as np
import pandas as pd
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
import json
import math

# 读取处理后的数据集
X_train = pd.read_csv('data/processed/X_train.csv')
y_train = pd.read_csv('data/processed/y_train.csv')
X_valid = pd.read_csv('data/processed/X_valid.csv')
y_valid = pd.read_csv('data/processed/y_valid.csv') 
X_train_full = pd.read_csv('data/processed/X_train_full.csv') 
y_train_full = pd.read_csv('data/processed/y_train_full.csv') 
X_test = pd.read_csv('data/processed/X_test.csv')

print('数据集读取完成')
print(f'训练集大小: {X_train.shape}')
print(f'验证集大小: {X_valid.shape}')
print(f'全量训练集大小: {X_train_full.shape}')
print(f'测试集大小: {X_test.shape}')


数据集读取完成
训练集大小: (20000, 74)
验证集大小: (5000, 74)
全量训练集大小: (25000, 74)
测试集大小: (10000, 74)


In [2]:
import torch
print("PyTorch Version:", torch.__version__)
print("CUDA available:", torch.cuda.is_available())
print("CUDA Version:", torch.version.cuda)
print(torch.cuda.is_available())

PyTorch Version: 2.5.1+cu121
CUDA available: True
CUDA Version: 12.1
True


In [3]:
# 直接定义列名配置
del_cols = ['listing_id', 'original_reg_date', 'opc_scheme', 'lifespan', 'eco_category', 'indicative_price']
text_cols = ['title', 'description', 'features', 'accessories']
date_cols = ['reg_date']
numeric_cols = ['manufactured', 'curb_weight', 'power', 'engine_cap', 'depreciation', 'coe', 'road_tax', 
                'dereg_value', 'mileage', 'omv', 'arf', 'year', 'month',
                'text_brand_popularity_score', 'text_model_value_score', 'text_condition_score',
                'text_feature_rarity_score', 'text_performance_score', 'text_sentiment_score']
log_cols = ['manufactured', 'curb_weight', 'power_log', 'engine_cap_log', 'depreciation_log', 'coe', 
            'road_tax_log', 'dereg_value_log', 'mileage_log', 'omv_log', 'arf_log', 'year', 'month']
root_cols = ['manufactured', 'curb_weight', 'power_root', 'engine_cap_root', 'depreciation_root', 'coe', 
             'road_tax_root', 'dereg_value_root', 'mileage_root', 'omv_root', 'arf_root', 'year', 'month']
categorical_cols = ['make', 'model', 'type_of_vehicle', 'category', 'transmission', 'fuel_type', 'no_of_owners']

# 更新变换列，添加GPT特征
cat_nu_cols = [
    "manufactured", "curb_weight", "power", "engine_cap", "no_of_owners", "depreciation", 
    "coe", "road_tax", "dereg_value", "mileage", "omv", "arf", "make_target_encoded",
    "text_brand_popularity_score", "text_model_value_score", "text_condition_score",
    "text_feature_rarity_score", "text_performance_score", "text_sentiment_score",
    "-", "almost new car", "coe car", "consignment car", "direct owner sale", 
    "electric cars", "hybrid cars", "imported used vehicle", "low mileage car", 
    "opc car", "parf car", "premium ad car", "rare & exotic", "sgcarmart warranty cars", 
    "sta evaluated car", "vintage cars", "type_of_vehicle_bus/mini bus", 
    "type_of_vehicle_hatchback", "type_of_vehicle_luxury sedan", 
    "type_of_vehicle_mid-sized sedan", "type_of_vehicle_mpv", "type_of_vehicle_others", 
    "type_of_vehicle_sports car", "type_of_vehicle_stationwagon", "type_of_vehicle_suv", 
    "type_of_vehicle_truck", "type_of_vehicle_van", "fuel_type_diesel", 
    "fuel_type_diesel-electric", "fuel_type_electric", "fuel_type_petrol", 
    "fuel_type_petrol-electric", "fuel_type_nan", "transmission_manual", "year", "month"
]

cat_log_cols = [
    "manufactured", "curb_weight", "power_log", "engine_cap_log", "depreciation_log", 
    "coe", "road_tax_log", "dereg_value_log", "mileage_log", "omv_log", "arf_log", 
    "make_target_encoded", "text_brand_popularity_score", "text_model_value_score", 
    "text_condition_score", "text_feature_rarity_score", "text_performance_score", 
    "text_sentiment_score", "-", "almost new car", "coe car", "consignment car", 
    "direct owner sale", "electric cars", "hybrid cars", "imported used vehicle", 
    "low mileage car", "opc car", "parf car", "premium ad car", "rare & exotic", 
    "sgcarmart warranty cars", "sta evaluated car", "vintage cars", 
    "type_of_vehicle_bus/mini bus", "type_of_vehicle_hatchback", 
    "type_of_vehicle_luxury sedan", "type_of_vehicle_mid-sized sedan", 
    "type_of_vehicle_mpv", "type_of_vehicle_others", "type_of_vehicle_sports car", 
    "type_of_vehicle_stationwagon", "type_of_vehicle_suv", "type_of_vehicle_truck", 
    "type_of_vehicle_van", "fuel_type_diesel", "fuel_type_diesel-electric", 
    "fuel_type_electric", "fuel_type_petrol", "fuel_type_petrol-electric", 
    "fuel_type_nan", "transmission_manual", "year", "month"
]

cat_root_cols = [
    "manufactured", "curb_weight", "power_root", "engine_cap_root", "depreciation_root", 
    "coe", "road_tax_root", "dereg_value_root", "mileage_root", "omv_root", "arf_root", 
    "make_target_encoded", "text_brand_popularity_score", "text_model_value_score", 
    "text_condition_score", "text_feature_rarity_score", "text_performance_score", 
    "text_sentiment_score", "-", "almost new car", "coe car", "consignment car", 
    "direct owner sale", "electric cars", "hybrid cars", "imported used vehicle", 
    "low mileage car", "opc car", "parf car", "premium ad car", "rare & exotic", 
    "sgcarmart warranty cars", "sta evaluated car", "vintage cars", 
    "type_of_vehicle_bus/mini bus", "type_of_vehicle_hatchback", 
    "type_of_vehicle_luxury sedan", "type_of_vehicle_mid-sized sedan", 
    "type_of_vehicle_mpv", "type_of_vehicle_others", "type_of_vehicle_sports car", 
    "type_of_vehicle_stationwagon", "type_of_vehicle_suv", "type_of_vehicle_truck", 
    "type_of_vehicle_van", "fuel_type_diesel", "fuel_type_diesel-electric", 
    "fuel_type_electric", "fuel_type_petrol", "fuel_type_petrol-electric", 
    "fuel_type_nan", "transmission_manual", "year", "month"
]

# 丢弃log和root变换的结果
X_train = X_train[cat_nu_cols]
X_valid = X_valid[cat_nu_cols]
X_test = X_test[cat_nu_cols]
X_train_full = X_train_full[cat_nu_cols]

print(f'训练集: {X_train.shape}')
print(f'全量集: {X_train_full.shape}')
print(f'测试集: {X_test.shape}')

训练集: (20000, 55)
全量集: (25000, 55)
测试集: (10000, 55)


In [4]:
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
from umap.umap_ import UMAP  # 正确的导入方式

# 加载BERT向量
bert_train_vectors = np.load('data/processed/train_vectors.npy')
bert_valid_vectors = np.load('data/processed/valid_vectors.npy')
bert_train_full_vectors = np.load('data/processed/train_full_vectors.npy')
bert_test_vectors = np.load('data/processed/test_vectors.npy')

# BERT降维
scaler = StandardScaler()

# UMAP参数设置
umap = UMAP(
    n_components=8,
    n_neighbors=20,
    min_dist=0.1,
    metric='cosine',
    random_state=42
)

# umap = UMAP(
#     n_components=16,
#     n_neighbors=30,
#     min_dist=0.3,
#     metric='cosine',
#     random_state=42
# )

# umap = UMAP(
#     n_components=24,
#     n_neighbors=50,
#     min_dist=0.5,
#     metric='cosine',
#     random_state=42
# )

# 对BERT向量进行UMAP降维
bert_train_scaled = scaler.fit_transform(bert_train_vectors)
bert_train_umap = umap.fit_transform(bert_train_scaled)

# 对验证集和测试集应用相同的转换
bert_valid_scaled = scaler.transform(bert_valid_vectors)
bert_valid_umap = umap.transform(bert_valid_scaled)

bert_test_scaled = scaler.transform(bert_test_vectors)
bert_test_umap = umap.transform(bert_test_scaled)

# 对完整训练集进行转换
bert_train_full_scaled = scaler.transform(bert_train_full_vectors)
bert_train_full_umap = umap.transform(bert_train_full_scaled)

# 拼接特征
X_train_combined = np.hstack((X_train[cat_nu_cols].values, bert_train_umap))
X_valid_combined = np.hstack((X_valid[cat_nu_cols].values, bert_valid_umap))
X_test_combined = np.hstack((X_test[cat_nu_cols].values, bert_test_umap))
X_train_full_combined = np.hstack((X_train_full[cat_nu_cols].values, bert_train_full_umap))


# 打印维度信息
print("特征维度:")
print(f"原始特征: {X_train[cat_nu_cols].shape[1]}")
print(f"UMAP特征: {bert_train_umap.shape[1]}")
print(f"组合特征: {X_train_combined.shape[1]}")

  warn(


特征维度:
原始特征: 55
UMAP特征: 8
组合特征: 63


In [5]:
print(X_train_full_combined.shape)
print(X_test_combined.shape)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
X_train = torch.tensor(X_train_combined, dtype=torch.float32).to(device)
X_valid = torch.tensor(X_valid_combined, dtype=torch.float32).to(device)
X_test = torch.tensor(X_test_combined, dtype=torch.float32).to(device)
X_train_full = torch.tensor(X_train_full_combined, dtype=torch.float32).to(device)
y_train = torch.tensor(y_train.values, dtype=torch.float32).view(-1, 1).to(device)
y_valid = torch.tensor(y_valid.values, dtype=torch.float32).view(-1, 1).to(device)
y_train_full = torch.tensor(y_train_full.values, dtype=torch.float32).view(-1, 1).to(device)

(25000, 63)
(10000, 63)


## Baseline

In [6]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import DataLoader, TensorDataset

class MLPModel(nn.Module):
    def __init__(self, input_dim):
        super(MLPModel, self).__init__()
        self.layer1 = nn.Linear(input_dim, 128)
        self.layer2 = nn.Linear(128, 64)
        self.layer3 = nn.Linear(64, 32)
        self.output_layer = nn.Linear(32, 1)
        self.relu = nn.ReLU()
        
        # Initializing weights
        for m in self.modules():
            if isinstance(m, nn.Linear):
                nn.init.kaiming_normal_(m.weight, mode='fan_in', nonlinearity='relu')

    
    def forward(self, x):
        x = self.relu(self.layer1(x))
        x = self.relu(self.layer2(x))
        x = self.relu(self.layer3(x))
        x = self.output_layer(x)
        return x

In [7]:
import random

def set_seed(seed_value=42):
    """Set seed for reproducibility."""
    random.seed(seed_value)
    np.random.seed(seed_value)
    torch.manual_seed(seed_value)
    torch.cuda.manual_seed_all(seed_value)  # if using multi-GPU
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    
def train_network(model_class, input_dim, X_train_tensor, y_train_tensor, X_valid_tensor, y_valid_tensor, lr=0.001, wd=0, seed=42):
    set_seed(seed)  # Set the random seed for reproducibility
    
    # Initialize the model with the provided model class and input dimension
    model = model_class(input_dim)
    model.to(torch.device("cuda" if torch.cuda.is_available() else "cpu"))

    criterion = nn.MSELoss()
    optimizer = optim.Adam(model.parameters(), lr=lr, weight_decay=wd)

    # Convert data to tensors and create data loaders
    train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
    train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True, num_workers=0)
    y_valid_n = y_valid_tensor.cpu().numpy()

    epochs = 100
    best_rmse = float('inf')
    patience = 10
    patience_counter = 0

    for epoch in range(epochs):
        model.train()
        for inputs, targets in train_loader:
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, targets)
            loss.backward()
            optimizer.step()

        model.eval()
        with torch.no_grad():
            y_valid_pred = model(X_valid_tensor).cpu().numpy().flatten()
            mse_valid = mean_squared_error(y_valid_n, y_valid_pred)
            rmse_valid = np.sqrt(mse_valid)
        if rmse_valid < best_rmse:
            best_rmse = rmse_valid
            patience_counter = 0
        else:
            patience_counter += 1
            if patience_counter >= patience:
                print("Early stopping triggered.")
                break

    print(f'Best Valid RMSE: {best_rmse:.4f}')
    return best_rmse

input_dim = X_train_combined.shape[1]
print(input_dim)
        
# input_dim = len(cat_nu_cols)
# print(input_dim)
        


63


In [8]:
def run_with_multiple_seeds_and_average(model_class, input_dim, X_train, y_train, X_valid, y_valid, lr, wd):
    total_rmse = 0
    num_seeds = 10  # 种子数量从 40 到 49，共 10 个
    for seed in range(40, 50):  # 从 40 到 49
        rmse = train_network(model_class, input_dim, X_train, y_train, X_valid, y_valid, lr, wd, seed)
        total_rmse += rmse
    average_rmse = total_rmse / num_seeds  # 计算平均 RMSE
    print(f"Average RMSE for seeds 40 to 49: {average_rmse:.4f}")


In [9]:
run_with_multiple_seeds_and_average(MLPModel, input_dim, X_train, y_train, X_valid, y_valid, 1e-2, 0)
run_with_multiple_seeds_and_average(MLPModel, input_dim, X_train, y_train, X_valid, y_valid, 1e-3, 0)
run_with_multiple_seeds_and_average(MLPModel, input_dim, X_train, y_train, X_valid, y_valid, 1e-4, 0)

Early stopping triggered.
Best Valid RMSE: 29282.6602
Early stopping triggered.
Best Valid RMSE: 29965.7734
Early stopping triggered.
Best Valid RMSE: 28235.6035
Early stopping triggered.
Best Valid RMSE: 29435.5547
Early stopping triggered.
Best Valid RMSE: 29096.4844
Early stopping triggered.
Best Valid RMSE: 28660.7207
Early stopping triggered.
Best Valid RMSE: 28739.4512
Early stopping triggered.
Best Valid RMSE: 29053.4004
Early stopping triggered.
Best Valid RMSE: 30842.9785
Early stopping triggered.
Best Valid RMSE: 29083.4219
Average RMSE for seeds 40 to 49: 29239.6049
Early stopping triggered.
Best Valid RMSE: 24435.5801
Early stopping triggered.
Best Valid RMSE: 25168.1211
Early stopping triggered.
Best Valid RMSE: 24917.9355
Early stopping triggered.
Best Valid RMSE: 26982.2344
Early stopping triggered.
Best Valid RMSE: 24666.9141
Early stopping triggered.
Best Valid RMSE: 24109.6855
Early stopping triggered.
Best Valid RMSE: 23795.8066
Early stopping triggered.
Best Valid R

In [10]:
run_with_multiple_seeds_and_average(MLPModel, input_dim, X_train, y_train, X_valid, y_valid, 1e-3, 1e-1)
run_with_multiple_seeds_and_average(MLPModel, input_dim, X_train, y_train, X_valid, y_valid, 1e-3, 1e-2)
run_with_multiple_seeds_and_average(MLPModel, input_dim, X_train, y_train, X_valid, y_valid, 1e-3, 1e-3)
run_with_multiple_seeds_and_average(MLPModel, input_dim, X_train, y_train, X_valid, y_valid, 1e-3, 0)

Early stopping triggered.
Best Valid RMSE: 26457.8633
Early stopping triggered.
Best Valid RMSE: 26193.6738
Early stopping triggered.
Best Valid RMSE: 26207.1133
Early stopping triggered.
Best Valid RMSE: 26997.6719
Early stopping triggered.
Best Valid RMSE: 24965.5000
Early stopping triggered.
Best Valid RMSE: 25879.0449
Early stopping triggered.
Best Valid RMSE: 26302.8281
Early stopping triggered.
Best Valid RMSE: 26020.3203
Early stopping triggered.
Best Valid RMSE: 27951.2715
Early stopping triggered.
Best Valid RMSE: 25890.0137
Average RMSE for seeds 40 to 49: 26286.5301
Early stopping triggered.
Best Valid RMSE: 24933.8926
Early stopping triggered.
Best Valid RMSE: 26187.6973
Early stopping triggered.
Best Valid RMSE: 25018.0215
Early stopping triggered.
Best Valid RMSE: 24824.9004
Early stopping triggered.
Best Valid RMSE: 24928.3555
Early stopping triggered.
Best Valid RMSE: 26618.4590
Early stopping triggered.
Best Valid RMSE: 26352.5039
Early stopping triggered.
Best Valid R

In [11]:
run_with_multiple_seeds_and_average(MLPModel, input_dim, X_train, y_train, X_valid, y_valid, 1e-3, 1e-4)
run_with_multiple_seeds_and_average(MLPModel, input_dim, X_train, y_train, X_valid, y_valid, 1e-3, 1e-5)
run_with_multiple_seeds_and_average(MLPModel, input_dim, X_train, y_train, X_valid, y_valid, 1e-3, 1e-6)

Early stopping triggered.
Best Valid RMSE: 26125.9062
Best Valid RMSE: 24921.6641
Early stopping triggered.
Best Valid RMSE: 26581.1191
Early stopping triggered.
Best Valid RMSE: 27020.0059
Early stopping triggered.
Best Valid RMSE: 24582.0215
Early stopping triggered.
Best Valid RMSE: 24528.7734
Early stopping triggered.
Best Valid RMSE: 26733.9551
Early stopping triggered.
Best Valid RMSE: 25791.9004
Early stopping triggered.
Best Valid RMSE: 27375.8320
Early stopping triggered.
Best Valid RMSE: 26803.4141
Average RMSE for seeds 40 to 49: 26046.4592
Early stopping triggered.
Best Valid RMSE: 25483.8105
Early stopping triggered.
Best Valid RMSE: 26133.1992
Early stopping triggered.
Best Valid RMSE: 24843.3105
Early stopping triggered.
Best Valid RMSE: 25046.2070
Early stopping triggered.
Best Valid RMSE: 25085.9023
Early stopping triggered.
Best Valid RMSE: 24571.6328
Early stopping triggered.
Best Valid RMSE: 24858.9414
Early stopping triggered.
Best Valid RMSE: 25072.3008
Early stop

## Other NN Models

In [12]:
class MLPWithBertModel(nn.Module):
    def __init__(self, input_dim, bert_dim=8, reduced_dim=16):
        super(MLPWithBertModel, self).__init__()
        # Assuming the BERT output is at the last part of the input
        self.bert_processor = nn.Sequential(
            nn.Linear(bert_dim, reduced_dim),
            nn.ReLU()
        )
        
        # New input dimension after concatenating reduced BERT output
        new_input_dim = input_dim - bert_dim + reduced_dim
        
        self.layer1 = nn.Linear(new_input_dim, 128)
        self.layer2 = nn.Linear(128, 64)
        self.layer3 = nn.Linear(64, 32)
        self.output_layer = nn.Linear(32, 1)
        self.relu = nn.ReLU()
        
        # Initializing weights
        for m in self.modules():
            if isinstance(m, nn.Linear):
                nn.init.kaiming_normal_(m.weight, mode='fan_in', nonlinearity='relu')
    
    def forward(self, x):
        # Assuming x is the input where the last 8 elements are the BERT vector
        bert_vector = x[:, -8:]  # Extract the last 8 dimensions
        other_features = x[:, :-8]  # Extract all other features
        
        # Process the BERT vector
        processed_bert = self.bert_processor(bert_vector)
        
        # Concatenate the processed BERT output with other features
        x = torch.cat((other_features, processed_bert), dim=1)
        
        # Feed through the subsequent layers
        x = self.relu(self.layer1(x))
        x = self.relu(self.layer2(x))
        x = self.relu(self.layer3(x))
        x = self.output_layer(x)
        return x


In [13]:
run_with_multiple_seeds_and_average(MLPWithBertModel, input_dim, X_train, y_train, X_valid, y_valid, 1e-3, 0)

Early stopping triggered.
Best Valid RMSE: 26703.6641
Early stopping triggered.
Best Valid RMSE: 24819.3340
Early stopping triggered.
Best Valid RMSE: 27647.3672
Early stopping triggered.
Best Valid RMSE: 28471.7637
Early stopping triggered.
Best Valid RMSE: 25371.4707
Early stopping triggered.
Best Valid RMSE: 24761.3027
Early stopping triggered.
Best Valid RMSE: 27227.7070
Early stopping triggered.
Best Valid RMSE: 26635.1699
Early stopping triggered.
Best Valid RMSE: 23765.0293
Early stopping triggered.
Best Valid RMSE: 27358.3848
Average RMSE for seeds 40 to 49: 26276.1193


In [14]:
class MLPDropoutModel(nn.Module):
    def __init__(self, input_dim, dropout_rate=0.5):
        super(MLPDropoutModel, self).__init__()
        self.layer1 = nn.Linear(input_dim, 128)
        self.layer2 = nn.Linear(128, 64)
        self.layer3 = nn.Linear(64, 32)
        self.output_layer = nn.Linear(32, 1)
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(dropout_rate)
    
    def forward(self, x):
        x = self.relu(self.layer1(x))
        x = self.dropout(x)
        x = self.relu(self.layer2(x))
        x = self.dropout(x)
        x = self.relu(self.layer3(x))
        x = self.dropout(x)
        x = self.output_layer(x)
        return x

In [15]:
run_with_multiple_seeds_and_average(MLPDropoutModel, input_dim, X_train, y_train, X_valid, y_valid, 1e-3, 0)

Early stopping triggered.
Best Valid RMSE: 39446.8633
Early stopping triggered.
Best Valid RMSE: 39724.5430
Early stopping triggered.
Best Valid RMSE: 38784.5352
Early stopping triggered.
Best Valid RMSE: 38807.5625
Early stopping triggered.
Best Valid RMSE: 41661.5234
Early stopping triggered.
Best Valid RMSE: 39772.0430
Early stopping triggered.
Best Valid RMSE: 40497.1992
Early stopping triggered.
Best Valid RMSE: 44484.5977
Early stopping triggered.
Best Valid RMSE: 42918.1680
Early stopping triggered.
Best Valid RMSE: 38590.3125
Average RMSE for seeds 40 to 49: 40468.7348


In [16]:
class MLPDropoutModel(nn.Module):
    def __init__(self, input_dim, dropout_rate=0.1):
        super(MLPDropoutModel, self).__init__()
        self.layer1 = nn.Linear(input_dim, 128)
        self.layer2 = nn.Linear(128, 64)
        self.layer3 = nn.Linear(64, 32)
        self.output_layer = nn.Linear(32, 1)
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(dropout_rate)
    
    def forward(self, x):
        x = self.relu(self.layer1(x))
        x = self.dropout(x)
        x = self.relu(self.layer2(x))
        x = self.dropout(x)
        x = self.relu(self.layer3(x))
        x = self.dropout(x)
        x = self.output_layer(x)
        return x

In [17]:
run_with_multiple_seeds_and_average(MLPDropoutModel, input_dim, X_train, y_train, X_valid, y_valid, 1e-3, 0)

Early stopping triggered.
Best Valid RMSE: 30420.2422
Early stopping triggered.
Best Valid RMSE: 29679.4355
Early stopping triggered.
Best Valid RMSE: 31220.8086
Early stopping triggered.
Best Valid RMSE: 29977.5039
Early stopping triggered.
Best Valid RMSE: 29877.0801
Early stopping triggered.
Best Valid RMSE: 31880.7109
Early stopping triggered.
Best Valid RMSE: 30816.5488
Early stopping triggered.
Best Valid RMSE: 29205.3633
Early stopping triggered.
Best Valid RMSE: 31278.4980
Early stopping triggered.
Best Valid RMSE: 31060.9258
Average RMSE for seeds 40 to 49: 30541.7117


In [18]:
class MLPBNModel(nn.Module):
    def __init__(self, input_dim):
        super(MLPBNModel, self).__init__()
        self.layer1 = nn.Linear(input_dim, 128)
        self.bn1 = nn.BatchNorm1d(128)  # Batch normalization layer for the first layer output
        self.layer2 = nn.Linear(128, 64)
        self.bn2 = nn.BatchNorm1d(64)   # Batch normalization layer for the second layer output
        self.layer3 = nn.Linear(64, 32)
        self.bn3 = nn.BatchNorm1d(32)   # Batch normalization layer for the third layer output
        self.output_layer = nn.Linear(32, 1)
        self.relu = nn.ReLU()
        
        # Initializing weights
        self._initialize_weights()

    def forward(self, x):
        x = self.relu(self.bn1(self.layer1(x)))  # Apply BatchNorm after linear transformation, before ReLU
        x = self.relu(self.bn2(self.layer2(x)))
        x = self.relu(self.bn3(self.layer3(x)))
        x = self.output_layer(x)
        return x

    def _initialize_weights(self):
        for m in self.modules():
            if isinstance(m, nn.Linear):
                nn.init.kaiming_normal_(m.weight, mode='fan_in', nonlinearity='relu')
                if m.bias is not None:
                    m.bias.data.fill_(0.01)  # Initialize biases to a small constant



In [19]:
run_with_multiple_seeds_and_average(MLPBNModel, input_dim, X_train, y_train, X_valid, y_valid, 1e-3, 0)

Best Valid RMSE: 142330.3281
Best Valid RMSE: 138713.0625
Best Valid RMSE: 142498.4688
Best Valid RMSE: 140954.1875
Best Valid RMSE: 143754.3125
Best Valid RMSE: 147133.5312
Best Valid RMSE: 141508.2031
Best Valid RMSE: 146971.8594
Best Valid RMSE: 139035.9375
Best Valid RMSE: 142847.6562
Average RMSE for seeds 40 to 49: 142574.7547


In [20]:
class DeeperMLPModel(nn.Module):
    def __init__(self, input_dim):
        super(DeeperMLPModel, self).__init__()
        # Increasing the depth with more layers
        self.layer1 = nn.Linear(input_dim, 256)
        self.layer2 = nn.Linear(256, 128)
        self.layer3 = nn.Linear(128, 128)
        self.layer4 = nn.Linear(128, 64)
        self.layer5 = nn.Linear(64, 64)
        self.layer6 = nn.Linear(64, 32)
        self.layer7 = nn.Linear(32, 16)
        self.output_layer = nn.Linear(16, 1)
        
        # ReLU activation
        self.relu = nn.ReLU()
        
        # Initializing weights with Kaiming initialization suitable for ReLU
        self._initialize_weights()

    def forward(self, x):
        x = self.relu(self.layer1(x))
        x = self.relu(self.layer2(x))
        x = self.relu(self.layer3(x))
        x = self.relu(self.layer4(x))
        x = self.relu(self.layer5(x))
        x = self.relu(self.layer6(x))
        x = self.relu(self.layer7(x))
        x = self.output_layer(x)
        return x
    
    def _initialize_weights(self):
        # Apply Kaiming He initialization to all linear layers in the model
        for m in self.modules():
            if isinstance(m, nn.Linear):
                nn.init.kaiming_normal_(m.weight, mode='fan_in', nonlinearity='relu')
                if m.bias is not None:
                    m.bias.data.fill_(0.01)



In [22]:
run_with_multiple_seeds_and_average(DeeperMLPModel, input_dim, X_train, y_train, X_valid, y_valid, 1e-3, 0)

Early stopping triggered.
Best Valid RMSE: 27056.4609
Early stopping triggered.
Best Valid RMSE: 24468.6035
Early stopping triggered.
Best Valid RMSE: 23973.1699
Early stopping triggered.
Best Valid RMSE: 27135.1504
Early stopping triggered.
Best Valid RMSE: 23657.3457
Early stopping triggered.
Best Valid RMSE: 23696.1895
Early stopping triggered.
Best Valid RMSE: 26114.7480
Early stopping triggered.
Best Valid RMSE: 25216.7930
Early stopping triggered.
Best Valid RMSE: 24818.8945
Early stopping triggered.
Best Valid RMSE: 25326.0664
Average RMSE for seeds 40 to 49: 25146.3422


In [23]:
class DeeperMLPWithBertModel(nn.Module):
    def __init__(self, input_dim, bert_dim=8, reduced_bert_dim=128):
        super(DeeperMLPWithBertModel, self).__init__()
        
        # Processing the BERT vector
        self.bert_processor = nn.Sequential(
            nn.Linear(bert_dim, reduced_bert_dim),
            nn.ReLU()
        )
        
        # New input dimension after reducing the BERT vector and concatenating it back
        new_input_dim = input_dim - bert_dim + reduced_bert_dim

        # Increasing the depth with more layers
        self.layer1 = nn.Linear(new_input_dim, 256)
        self.layer2 = nn.Linear(256, 128)
        self.layer3 = nn.Linear(128, 128)
        self.layer4 = nn.Linear(128, 64)
        self.layer5 = nn.Linear(64, 64)
        self.layer6 = nn.Linear(64, 32)
        self.layer7 = nn.Linear(32, 16)
        self.output_layer = nn.Linear(16, 1)
        
        # ReLU activation
        self.relu = nn.ReLU()
        
        # Initializing weights with Kaiming initialization suitable for ReLU
        self._initialize_weights()

    def forward(self, x):
        # Split the input into BERT vector and other features
        bert_vector = x[:, -8:]  # Assuming BERT vector is the last 8 elements
        other_features = x[:, :-8]  # The rest of the features

        # Process the BERT vector
        processed_bert = self.bert_processor(bert_vector)

        # Concatenate the processed BERT vector with other features
        x = torch.cat((other_features, processed_bert), dim=1)

        # Sequentially process through all layers
        x = self.relu(self.layer1(x))
        x = self.relu(self.layer2(x))
        x = self.relu(self.layer3(x))
        x = self.relu(self.layer4(x))
        x = self.relu(self.layer5(x))
        x = self.relu(self.layer6(x))
        x = self.relu(self.layer7(x))
        x = self.output_layer(x)
        return x
    
    def _initialize_weights(self):
        for m in self.modules():
            if isinstance(m, nn.Linear):
                nn.init.kaiming_normal_(m.weight, mode='fan_in', nonlinearity='relu')
                if m.bias is not None:
                    m.bias.data.fill_(0.01)

In [24]:
run_with_multiple_seeds_and_average(DeeperMLPWithBertModel, input_dim, X_train, y_train, X_valid, y_valid, 1e-3, 0)

Early stopping triggered.
Best Valid RMSE: 26850.2734
Early stopping triggered.
Best Valid RMSE: 28228.9844
Early stopping triggered.
Best Valid RMSE: 26440.4453
Early stopping triggered.
Best Valid RMSE: 23957.5000
Early stopping triggered.
Best Valid RMSE: 25107.8770
Early stopping triggered.
Best Valid RMSE: 23834.5000
Early stopping triggered.
Best Valid RMSE: 24558.5254
Early stopping triggered.
Best Valid RMSE: 29403.0195
Early stopping triggered.
Best Valid RMSE: 23973.0527
Early stopping triggered.
Best Valid RMSE: 24926.8789
Average RMSE for seeds 40 to 49: 25728.1057


In [71]:
class DeepResidualMLP(nn.Module):
    def __init__(self, input_dim):
        super(DeepResidualMLP, self).__init__()
        self.layer1 = nn.Linear(input_dim, 256)
        self.layer2 = nn.Linear(256, 256)
        self.layer3 = nn.Linear(256, 256)
        self.layer4 = nn.Linear(256, 256)
        self.layer5 = nn.Linear(256, 256)
        self.output_layer = nn.Linear(256, 1)
        
        # ReLU activation
        self.relu = nn.ReLU()
        
        # Initializing weights with He initialization suitable for ReLU
        for m in self.modules():
            if isinstance(m, nn.Linear):
                nn.init.kaiming_normal_(m.weight, mode='fan_in', nonlinearity='relu')

    def forward(self, x):
        identity = self.relu(self.layer1(x))
        
        # Residual block 1
        out = self.relu(self.layer2(identity))
        x = out + identity  # Ensure no inplace operation
        
        # Residual block 2
        identity = x  # Update identity to current x before the next block
        out = self.relu(self.layer3(x))
        x = out + identity
        
        # Residual block 3
        identity = x
        out = self.relu(self.layer4(x))
        x = out + identity
        
        # Residual block 4
        identity = x
        out = self.relu(self.layer5(x))
        x = out + identity
        
        x = self.output_layer(x)
        return x





In [72]:
run_with_multiple_seeds_and_average(DeepResidualMLP, input_dim, X_train, y_train, X_valid, y_valid, 1e-3, 0)

Early stopping triggered.
Best Valid RMSE: 24050.7891
Early stopping triggered.
Best Valid RMSE: 27886.6406
Early stopping triggered.
Best Valid RMSE: 24212.5938
Early stopping triggered.
Best Valid RMSE: 24250.9727
Early stopping triggered.
Best Valid RMSE: 24007.9141
Early stopping triggered.
Best Valid RMSE: 23162.4062
Early stopping triggered.
Best Valid RMSE: 24812.3711
Early stopping triggered.
Best Valid RMSE: 24609.7969
Early stopping triggered.
Best Valid RMSE: 23728.8730
Early stopping triggered.
Best Valid RMSE: 23343.9824
Average RMSE for seeds 40 to 49: 24406.6340


In [31]:
class DeeperResidualMLP(nn.Module):
    def __init__(self, input_dim):
        super(DeeperResidualMLP, self).__init__()
        self.layer1 = nn.Linear(input_dim, 256)
        self.layer2 = nn.Linear(256, 256)
        self.layer3 = nn.Linear(256, 256)
        self.layer4 = nn.Linear(256, 256)
        self.layer5 = nn.Linear(256, 256)
        # Adding intermediate layers
        self.layer6 = nn.Linear(256, 128)
        self.layer7 = nn.Linear(128, 64)
        self.layer8 = nn.Linear(64, 32)
        self.layer9 = nn.Linear(32, 16)
        self.output_layer = nn.Linear(16, 1)
        
        # ReLU activation
        self.relu = nn.ReLU()
        
        # Initializing weights with He initialization suitable for ReLU
        for m in self.modules():
            if isinstance(m, nn.Linear):
                nn.init.kaiming_normal_(m.weight, mode='fan_in', nonlinearity='relu')

    def forward(self, x):
        # Input pass
        identity = x
        x = self.relu(self.layer1(x))
        
        # Residual block 1
        out = self.relu(self.layer2(x))
        out = out + x  # Replacing inplace operation with out-of-place operation
        
        # Residual block 2
        x = self.relu(self.layer3(out))
        x = x + out  # Replacing inplace operation with out-of-place operation
        
        # Residual block 3
        out = self.relu(self.layer4(x))
        out = out + x  # Replacing inplace operation with out-of-place operation
        
        # Residual block 4
        x = self.relu(self.layer5(out))
        x = x + out  # Replacing inplace operation with out-of-place operation
    
        # Gradual reduction layers
        x = self.relu(self.layer6(x))
        x = self.relu(self.layer7(x))
        x = self.relu(self.layer8(x))
        x = self.relu(self.layer9(x))
    
        # Output pass
        x = self.output_layer(x)
        return x

In [32]:
run_with_multiple_seeds_and_average(DeeperResidualMLP, input_dim, X_train, y_train, X_valid, y_valid, 1e-3, 0)

Early stopping triggered.
Best Valid RMSE: 23998.5527
Early stopping triggered.
Best Valid RMSE: 23995.7695
Early stopping triggered.
Best Valid RMSE: 24411.0527
Early stopping triggered.
Best Valid RMSE: 29195.5312
Early stopping triggered.
Best Valid RMSE: 24530.7578
Early stopping triggered.
Best Valid RMSE: 26299.5176
Early stopping triggered.
Best Valid RMSE: 24565.1309
Early stopping triggered.
Best Valid RMSE: 23506.8828
Early stopping triggered.
Best Valid RMSE: 25207.9688
Early stopping triggered.
Best Valid RMSE: 26825.5410
Average RMSE for seeds 40 to 49: 25253.6705


In [33]:
class Attention(nn.Module):
    def __init__(self, feature_dim):
        super(Attention, self).__init__()
        self.feature_dim = feature_dim
        
        self.key_layer = nn.Linear(feature_dim, feature_dim, bias=False)
        self.query_layer = nn.Linear(feature_dim, feature_dim, bias=False)

    def forward(self, x):
        query = self.query_layer(x).unsqueeze(1)  # Adding batch dimension
        key = self.key_layer(x).unsqueeze(-1)  # Adding an extra dimension for bmm
        
        # Compute attention scores and apply softmax
        scores = torch.bmm(query, key)  # Should work as both are 3D now
        weights = F.softmax(scores, dim=-1)
        
        # Apply weights to the original input features, using batch matrix multiplication
        attended = torch.bmm(weights, x.unsqueeze(1))  # x also needs to be 3D
        return attended.squeeze(1)  # Remove the extra dimension to match expected output shape





class DeepResidualMLPWithAttention(nn.Module):
    def __init__(self, input_dim):
        super(DeepResidualMLPWithAttention, self).__init__()
        self.layer1 = nn.Linear(input_dim, 256)
        self.layer2 = nn.Linear(256, 256)
        self.layer3 = nn.Linear(256, 256)
        self.layer4 = nn.Linear(256, 256)
        self.layer5 = nn.Linear(256, 256)
        self.attention = Attention(256)  # Attention layer after layer5
        self.output_layer = nn.Linear(256, 1)
        
        self.relu = nn.ReLU()
        
        # Initializing weights
        for m in self.modules():
            if isinstance(m, nn.Linear):
                nn.init.kaiming_normal_(m.weight, mode='fan_in', nonlinearity='relu')

    def forward(self, x):
        identity = x
        x = self.relu(self.layer1(x))
        
        out = self.relu(self.layer2(x))
        out = out + x  # Use out-of-place operation

        x = self.relu(self.layer3(out))
        x = x + out  # Use out-of-place operation

        out = self.relu(self.layer4(x))
        out = out + x  # Use out-of-place operation

        x = self.relu(self.layer5(out))
        x = x + out  # Use out-of-place operation

        # Apply attention
        x = self.attention(x)
        x = self.output_layer(x)
        return x



In [34]:
run_with_multiple_seeds_and_average(DeepResidualMLPWithAttention, input_dim, X_train, y_train, X_valid, y_valid, 1e-3, 0)

Early stopping triggered.
Best Valid RMSE: 23995.5645
Early stopping triggered.
Best Valid RMSE: 27950.9746
Early stopping triggered.
Best Valid RMSE: 26850.6328
Early stopping triggered.
Best Valid RMSE: 24806.5977
Early stopping triggered.
Best Valid RMSE: 24531.3066
Early stopping triggered.
Best Valid RMSE: 24396.6992
Early stopping triggered.
Best Valid RMSE: 23207.5859
Early stopping triggered.
Best Valid RMSE: 27993.7988
Early stopping triggered.
Best Valid RMSE: 24122.9551
Early stopping triggered.
Best Valid RMSE: 28209.8926
Average RMSE for seeds 40 to 49: 25606.6008


In [35]:
class BroaderMLPModel(nn.Module):
    def __init__(self, input_dim):
        super(BroaderMLPModel, self).__init__()
        # Adjusting the layer sizes to match the diagram
        self.layer1 = nn.Linear(input_dim, 512)
        self.layer2 = nn.Linear(512, 256)
        self.layer3 = nn.Linear(256, 128)
        self.layer4 = nn.Linear(128, 64)
        self.output_layer = nn.Linear(64, 1)
        
        # ReLU activation
        self.relu = nn.ReLU()
        
        # Initializing weights with Kaiming initialization suitable for ReLU
        self._initialize_weights()

    def forward(self, x):
        x = self.relu(self.layer1(x))
        x = self.relu(self.layer2(x))
        x = self.relu(self.layer3(x))
        x = self.relu(self.layer4(x))
        x = self.output_layer(x)
        return x
    
    def _initialize_weights(self):
        # Apply Kaiming He initialization to all linear layers in the model
        for m in self.modules():
            if isinstance(m, nn.Linear):
                nn.init.kaiming_normal_(m.weight, mode='fan_in', nonlinearity='relu')
                if m.bias is not None:
                    m.bias.data.fill_(0.01)


In [36]:
run_with_multiple_seeds_and_average(BroaderMLPModel, input_dim, X_train, y_train, X_valid, y_valid, 1e-3, 0)

Early stopping triggered.
Best Valid RMSE: 25440.8809
Early stopping triggered.
Best Valid RMSE: 23883.6855
Early stopping triggered.
Best Valid RMSE: 26357.7812
Early stopping triggered.
Best Valid RMSE: 27368.0566
Early stopping triggered.
Best Valid RMSE: 26315.0820
Early stopping triggered.
Best Valid RMSE: 24414.5234
Early stopping triggered.
Best Valid RMSE: 23893.6387
Early stopping triggered.
Best Valid RMSE: 25038.7246
Early stopping triggered.
Best Valid RMSE: 23528.3613
Early stopping triggered.
Best Valid RMSE: 24964.4375
Average RMSE for seeds 40 to 49: 25120.5172


## Prediction on test set

In [73]:
import copy

def train_network_and_get_result(pre_best_rmse, model_class, input_dim, X_train_tensor, y_train_tensor, X_valid_tensor, y_valid_tensor, X_test_tensor, lr=0.001, wd=0, seed=42):
    set_seed(seed)  # Set the random seed for reproducibility
    
    # Initialize the model with the provided model class and input dimension
    model = model_class(input_dim)
    model.to(torch.device("cuda" if torch.cuda.is_available() else "cpu"))

    criterion = nn.MSELoss()
    optimizer = optim.Adam(model.parameters(), lr=lr, weight_decay=wd)

    # Convert data to tensors and create data loaders
    train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
    train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True, num_workers=0)
    y_valid_n = y_valid_tensor.cpu().numpy()

    epochs = 100
    best_rmse = float('inf')
    patience = 10
    patience_counter = 0

    for epoch in range(epochs):
        model.train()
        for inputs, targets in train_loader:
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, targets)
            loss.backward()
            optimizer.step()

        model.eval()
        with torch.no_grad():
            y_valid_pred = model(X_valid_tensor).cpu().numpy().flatten()
            mse_valid = mean_squared_error(y_valid_n, y_valid_pred)
            rmse_valid = np.sqrt(mse_valid)

        if rmse_valid < best_rmse:
            best_rmse = rmse_valid
            best_model = copy.deepcopy(model)
            patience_counter = 0
        else:
            patience_counter += 1
            if patience_counter >= patience:
                break
            
    if pre_best_rmse > best_rmse:
        pre_best_rmse = best_rmse
        torch.save(best_model.state_dict(), 'best_model.pth')  # Save the best model
        
        best_model.eval()
        with torch.no_grad():
            y_valid_pred = best_model(X_valid_tensor).cpu().numpy().flatten()
            # mse_valid = mean_squared_error(y_valid_n, y_valid_pred)
            # rmse_valid = np.sqrt(mse_valid)
            # print(rmse_valid)
        valid_predictions_df = pd.DataFrame({
            'Id': range(len(y_valid_pred)),
            'Predicted': y_valid_pred
        })
        valid_predictions_df.to_csv('data/nn_valid.csv', index=False)
        print("Validation results saved to data/nn_valid.csv")

        
        best_model.eval()
        with torch.no_grad():
            y_test_pred = best_model(X_test_tensor).cpu().numpy().flatten()
        test_predictions_df = pd.DataFrame({
            'Id': range(len(y_test_pred)),
            'Predicted': y_test_pred
        })
        test_predictions_df.to_csv('data/nn_test.csv', index=False)
        print("Test results saved to data/nn_test.csv")
            
    return pre_best_rmse



In [74]:
def run_with_multiple_seeds_and_get_best(model_class, input_dim, X_train, y_train, X_valid, y_valid, X_test, lr, wd):
    pre_best_rmse = 1e6
    for seed in range(100):
        best_rmse = train_network_and_get_result(pre_best_rmse, model_class, input_dim, X_train, y_train, X_valid, y_valid, X_test, lr, wd, seed)
        pre_best_rmse = best_rmse
        print(f"Best RMSE till round {seed+1} : {pre_best_rmse:.4f}")
        

In [75]:
run_with_multiple_seeds_and_get_best(DeepResidualMLP, input_dim, X_train, y_train, X_valid, y_valid, X_test, 1e-3, 0)

Validation results saved to data/nn_valid.csv
Test results saved to data/nn_test.csv
Best RMSE till round 1 : 24816.1582
Validation results saved to data/nn_valid.csv
Test results saved to data/nn_test.csv
Best RMSE till round 2 : 24116.5840
Best RMSE till round 3 : 24116.5840
Best RMSE till round 4 : 24116.5840
Validation results saved to data/nn_valid.csv
Test results saved to data/nn_test.csv
Best RMSE till round 5 : 23776.6270
Best RMSE till round 6 : 23776.6270
Best RMSE till round 7 : 23776.6270
Best RMSE till round 8 : 23776.6270
Best RMSE till round 9 : 23776.6270
Validation results saved to data/nn_valid.csv
Test results saved to data/nn_test.csv
Best RMSE till round 10 : 23739.2695
Best RMSE till round 11 : 23739.2695
Best RMSE till round 12 : 23739.2695
Best RMSE till round 13 : 23739.2695
Best RMSE till round 14 : 23739.2695
Best RMSE till round 15 : 23739.2695
Best RMSE till round 16 : 23739.2695
Best RMSE till round 17 : 23739.2695
Best RMSE till round 18 : 23739.2695
Bes

In [None]:
# from sklearn.model_selection import train_test_split

# def full_train_network_and_get_result(model, cat_nu_cols, X_train_full, y_train_full, X_test, lr=0.001, wd=0, seed=42):
#     set_seed(seed)  # Set the random seed for reproducibility    
#     train_indices, valid_indices = train_test_split(np.arange(len(X_train_full)), test_size=0.2, random_state=42)
#     X_train = X_train_full[train_indices]
#     y_train = y_train_full.iloc[train_indices]
#     X_valid = X_train_full[valid_indices]
#     y_valid = y_train_full.iloc[valid_indices]
    
#     criterion = nn.MSELoss()
#     optimizer = optim.Adam(model.parameters(), lr=lr, weight_decay=wd)

#     X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
#     y_train_tensor = torch.tensor(y_train.values, dtype=torch.float32).view(-1, 1)
#     train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
#     train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True, num_workers=0)
#     X_valid_tensor = torch.tensor(X_valid, dtype=torch.float32)
#     y_valid_tensor = torch.tensor(y_valid.values, dtype=torch.float32).view(-1, 1)
#     X_test_tensor = torch.tensor(X_test, dtype=torch.float32)

#     epochs = 100
#     best_rmse = float('inf')
#     best_model = None
#     patience = 10
#     patience_counter = 0

#     for epoch in range(epochs):
#         model.train()
#         for inputs, targets in train_loader:
#             optimizer.zero_grad()
#             outputs = model(inputs)
#             loss = criterion(outputs, targets)
#             loss.backward()
#             optimizer.step()

#         model.eval()
#         with torch.no_grad():
#             y_valid_pred = model(X_valid_tensor).numpy().flatten()
#             mse_valid = mean_squared_error(y_valid_tensor.numpy(), y_valid_pred)
#             rmse_valid = np.sqrt(mse_valid)

#         if rmse_valid < best_rmse:
#             best_rmse = rmse_valid
#             best_model = model
#             torch.save(best_model.state_dict(), 'best_model.pth')  # Save the best model
#             patience_counter = 0
#         else:
#             patience_counter += 1
#             if patience_counter >= patience:
#                 print("Early stopping triggered.")
#                 break

#     print(f'Best Valid RMSE: {best_rmse:.4f}')

#     # Saving validation predictions to a CSV file
#     valid_predictions_df = pd.DataFrame({
#         'Id': range(len(y_valid_pred)),
#         'Predicted': y_valid_pred
#     })
#     valid_predictions_df.to_csv('data/full_nn_valid.csv', index=False)
#     print("Validation results saved to data/full_nn_valid.csv")

#     # Saving test predictions to a CSV file if the best model was found
#     if best_model:
#         best_model.eval()
#         with torch.no_grad():
#             y_test_pred = best_model(X_test_tensor).numpy().flatten()

#         test_predictions_df = pd.DataFrame({
#             'Id': range(len(y_test_pred)),
#             'Predicted': y_test_pred
#         })
#         test_predictions_df.to_csv('data/full_nn_test.csv', index=False)
#         print("Test results saved to data/full_nn_test.csv")
        
# model = DeeperMLPModel(input_dim)
# full_train_network_and_get_result(model, cat_nu_cols, X_train_full, y_train_full , X_test,0.001, 0.01)



In [None]:
# # 选择表现最好的模型进行最终训练和预测
# best_model = DeeperMLPModel(input_dim)

# print("使用全量数据训练最终模型...")
# # 转换数据为tensor
# X_train_full_tensor = torch.tensor(X_train_full[cat_nu_cols].values, dtype=torch.float32)
# y_train_full_tensor = torch.tensor(y_train_full.values, dtype=torch.float32).view(-1, 1)
# train_dataset = TensorDataset(X_train_full_tensor, y_train_full_tensor)
# train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)

# # 训练模型
# criterion = nn.MSELoss()
# optimizer = optim.Adam(best_model.parameters(), lr=0.001)

# epochs = 50
# for epoch in range(epochs):
#     best_model.train()
#     for inputs, targets in train_loader:
#         optimizer.zero_grad()
#         outputs = best_model(inputs)
#         loss = criterion(outputs, targets)
#         loss.backward()
#         optimizer.step()
#     print(f'Epoch {epoch+1}/{epochs}, Loss: {loss.item()}')

# print("生成测试集预测结果...")
# best_model.eval()
# X_test_tensor = torch.tensor(X_test[cat_nu_cols].values, dtype=torch.float32)
# with torch.no_grad():
#     test_predictions = best_model(X_test_tensor).numpy()

# # 创建预测结果DataFrame
# predictions_df = pd.DataFrame({
#     'Id': range(len(test_predictions)),
#     'Predicted': test_predictions.flatten()
# })

# # 保存预测结果
# predictions_df.to_csv('data/nn_test.csv', index=False)
# print("预测结果已保存到 data/nn_test.csv")