In [1]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, TensorDataset
from torch.optim import Adam
import torch
import os
from torch.utils.tensorboard import SummaryWriter
from torch.nn.functional import l1_loss

In [2]:
if torch.cuda.is_available():
    print("Available CUDA devices:", torch.cuda.device_count())
    for i in range(torch.cuda.device_count()):
        print(f"Device {i}: {torch.cuda.get_device_name(i)}")
else:
    print("CUDA is not available.")
    
device = torch.device('cuda:1')
inputdir = '../data/constructed/'
resultdir = '../data/result/'
BatchSize = 2030
# 特征和文件路径配置
feature_names = [
    'hour_sin', 'hour_cos', 
    'day_of_week_sin', 'day_of_week_cos', 
    'month_sin', 'month_cos',
    'etat_barre_0', 'etat_barre_1', 'etat_barre_2', 'etat_barre_3',
    'constructed'
]

if not os.path.exists(resultdir):
    os.makedirs(resultdir)

Available CUDA devices: 2
Device 0: NVIDIA GeForce RTX 4090
Device 1: NVIDIA GeForce RTX 4090


In [4]:
def load_dataset(filepath, feature_names, target_name='target', batch_size=BatchSize, shuffle=True, train=True):
    data = pd.read_csv(filepath)
    features = data[feature_names].values
    features_tensor = torch.tensor(features, dtype=torch.float32).unsqueeze(1)
    print(features_tensor.shape)

    if train:
        targets = data[target_name].values
        targets_tensor = torch.tensor(targets, dtype=torch.float32).unsqueeze(1)
        dataset = TensorDataset(features_tensor, targets_tensor)
    else:
        dataset = TensorDataset(features_tensor)

    return DataLoader(dataset, batch_size=batch_size, shuffle=shuffle)


In [5]:
# train_loader = load_dataset(f'{inputdir}train_dataset_constructed.csv', feature_names)
eval_loader = load_dataset(f'{inputdir}eval_dataset_constructed.csv', feature_names)
# test_loader = load_dataset(f'{inputdir}test_dataset_constructed_x.csv', feature_names, train=False)

torch.Size([1422638, 1, 11])


In [4]:
class LSTMModel(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, output_size, dropout=0.5):
        super(LSTMModel, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers  
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True, dropout=dropout if num_layers > 1 else 0)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
        c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
        out, _ = self.lstm(x, (h0, c0))
        out = self.fc(out[:, -1, :])  # 取最后一个时间步
        return out


In [5]:
model = LSTMModel(input_size=len(feature_names), hidden_size=50, num_layers=3, output_size=1, dropout=0.5).to(device)
criterion = nn.L1Loss()
optimizer = Adam(model.parameters(), lr=0.001)

def train_model(model, train_loader, criterion, optimizer, num_epochs, log_interval=10, checkpoint_dir='./logs/LSTM_v2/',TBlog_dir='./runs/LSTM_v2/'):
    # 指定使用 GPU
    device = torch.device('cuda:1')
    model.to(device)
    
    # 初始化 TensorBoard 记录器
    writer = SummaryWriter(TBlog_dir)
    start_epoch = 1
    flag = True
    checkpoint_path = os.path.join(checkpoint_dir, 'checkpoint.pth')  # 指定检查点文件名
    
    # 确保检查点目录存在
    if not os.path.exists(checkpoint_dir):
        os.makedirs(checkpoint_dir)
    # 检查是否存在检查点
    if os.path.exists(checkpoint_path):
        print("Loading checkpoint...")
        checkpoint = torch.load(checkpoint_path, map_location=device)
        model.load_state_dict(checkpoint['model_state_dict'])
        optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
        start_epoch = checkpoint['epoch'] + 1  # 从下一个周期开始
        
    model.train()
    for epoch in range(start_epoch, num_epochs + start_epoch):
        for batch_idx, (inputs, targets) in enumerate(train_loader):
            inputs, targets = inputs.to(device), targets.to(device)
            if flag:
                print(next(model.parameters()).is_cuda)  # 确认模型参数是否在 GPU 上
                print(inputs.is_cuda)  # 确认输入是否在 GPU 上
                flag = False
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, targets)
            loss.backward()
            optimizer.step()
            # 在 TensorBoard 中记录损失
            if batch_idx % log_interval == 0:
                writer.add_scalar('Loss/train', loss.item(), epoch * len(train_loader) + batch_idx)
            
            print(f'Epoch {epoch}, Batch {batch_idx+1}, Loss: {loss.item()}')
            # 定期保存检查点
            if batch_idx % 100 == 0:
                torch.save({
                    'epoch': epoch,
                    'model_state_dict': model.state_dict(),
                    'optimizer_state_dict': optimizer.state_dict(),
                    'loss': loss.item(),
                }, checkpoint_path)
    # 关闭 TensorBoard 记录器
    writer.close()


train_model(model, train_loader, criterion, optimizer, num_epochs=1)

Loading checkpoint...
True
True
Epoch 62, Batch 1, Loss: 169.9705047607422
Epoch 62, Batch 2, Loss: 169.2726287841797
Epoch 62, Batch 3, Loss: 189.20648193359375
Epoch 62, Batch 4, Loss: 190.93142700195312
Epoch 62, Batch 5, Loss: 159.04183959960938
Epoch 62, Batch 6, Loss: 185.5626220703125
Epoch 62, Batch 7, Loss: 157.0928955078125
Epoch 62, Batch 8, Loss: 185.16195678710938
Epoch 62, Batch 9, Loss: 167.30276489257812
Epoch 62, Batch 10, Loss: 180.39268493652344
Epoch 62, Batch 11, Loss: 178.6160125732422
Epoch 62, Batch 12, Loss: 169.9308624267578
Epoch 62, Batch 13, Loss: 176.42127990722656
Epoch 62, Batch 14, Loss: 180.1195526123047
Epoch 62, Batch 15, Loss: 177.68218994140625
Epoch 62, Batch 16, Loss: 174.66014099121094
Epoch 62, Batch 17, Loss: 179.386962890625
Epoch 62, Batch 18, Loss: 176.37574768066406
Epoch 62, Batch 19, Loss: 177.43934631347656
Epoch 62, Batch 20, Loss: 180.46414184570312
Epoch 62, Batch 21, Loss: 193.04234313964844
Epoch 62, Batch 22, Loss: 175.88258361816

In [6]:
def calculate_mae(model, data_loader, device):
    model.eval()  
    total_mae = 0.0
    total_count = 0
    
    with torch.no_grad():  
        for inputs, targets in data_loader:
            inputs, targets = inputs.to(device), targets.to(device)
            outputs = model(inputs)
            mae = l1_loss(outputs, targets, reduction='sum')
            total_mae += mae.item()
            total_count += targets.size(0)
    
    average_mae = total_mae / total_count
    return average_mae


average_mae = calculate_mae(model, eval_loader, device)
print(f'Average MAE on evaluation data: {average_mae:.4f}')

Average MAE on evaluation data: 313.5852


In [7]:
def evaluate_model(model, loader):
    model.eval()
    predictions = []
    with torch.no_grad():
        for features in loader:
            features = features[0].to(device)
            outputs = model(features)
            # predictions.extend(outputs.cpu().numpy())
            predictions.extend(outputs.round().cpu().numpy())
    return predictions

predictions = evaluate_model(model, test_loader)

predictions_df = pd.DataFrame(predictions, columns=['estimate_q'])
predictions_df.index = predictions_df.index + 1  # Adjust index to start from 1
print(len(predictions_df))
# Save the predictions to a CSV file
predictions_df.to_csv(f'{resultdir}LSTM_v2_new_predictions.csv', index_label='id')

print("Predictions saved to 'LSTM_v2_predictions.csv', with IDs starting from 1.")

439298
Predictions saved to 'LSTM_v2_predictions.csv', with IDs starting from 1.
