In [1]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, TensorDataset
from torch.optim import Adam
import torch
import os
from torch.utils.tensorboard import SummaryWriter
from torch.nn.functional import l1_loss

In [10]:
if torch.cuda.is_available():
    print("Available CUDA devices:", torch.cuda.device_count())
    for i in range(torch.cuda.device_count()):
        print(f"Device {i}: {torch.cuda.get_device_name(i)}")
else:
    print("CUDA is not available.")
    
device = torch.device('cuda:1')
inputdir = '../data/processed/'
resultdir = '../data/result/'
BatchSize = 2048

if not os.path.exists(resultdir):
    os.makedirs(resultdir)

Available CUDA devices: 2
Device 0: NVIDIA GeForce RTX 4090
Device 1: NVIDIA GeForce RTX 4090


In [3]:
def load_dataset(filepath, feature_names, target_name='target', batch_size=BatchSize, shuffle=True, train=True):
    data = pd.read_csv(filepath)
    features = data[feature_names].values
    features_tensor = torch.tensor(features, dtype=torch.float32).unsqueeze(1)

    if train:
        targets = data[target_name].values
        targets_tensor = torch.tensor(targets, dtype=torch.float32).unsqueeze(1)
        dataset = TensorDataset(features_tensor, targets_tensor)
    else:
        dataset = TensorDataset(features_tensor)

    return DataLoader(dataset, batch_size=batch_size, shuffle=shuffle)

# 特征和文件路径配置
feature_names = [
    'hour_sin', 'hour_cos', 
    'day_of_week_sin', 'day_of_week_cos', 
    'month_sin', 'month_cos',
    'etat_barre_0', 'etat_barre_1', 'etat_barre_2', 'etat_barre_3'
]

train_loader = load_dataset(f'{inputdir}train_dataset.csv', feature_names)
eval_loader = load_dataset(f'{inputdir}eval_dataset.csv', feature_names)
test_loader = load_dataset(f'{inputdir}test_dataset_x.csv', feature_names, train=False)


In [4]:
class LSTMModel(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, output_size, dropout=0.5):
        super(LSTMModel, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers  
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True, dropout=dropout if num_layers > 1 else 0)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
        c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
        out, _ = self.lstm(x, (h0, c0))
        out = self.fc(out[:, -1, :])  # 取最后一个时间步
        return out


In [9]:
model = LSTMModel(input_size=len(feature_names), hidden_size=50, num_layers=3, output_size=1, dropout=0.5).to(device)
criterion = nn.L1Loss()
optimizer = Adam(model.parameters(), lr=0.001)

def train_model(model, train_loader, criterion, optimizer, num_epochs, log_interval=10, checkpoint_dir='./logs/LSTM_v2/',TBlog_dir='./runs/LSTM_v2/'):
    # 指定使用 GPU
    device = torch.device('cuda:1')
    model.to(device)
    
    # 初始化 TensorBoard 记录器
    writer = SummaryWriter(TBlog_dir)
    start_epoch = 1
    flag = True
    checkpoint_path = os.path.join(checkpoint_dir, 'checkpoint.pth')  # 指定检查点文件名
    
    # 确保检查点目录存在
    if not os.path.exists(checkpoint_dir):
        os.makedirs(checkpoint_dir)
    # 检查是否存在检查点
    if os.path.exists(checkpoint_path):
        print("Loading checkpoint...")
        checkpoint = torch.load(checkpoint_path, map_location=device)
        model.load_state_dict(checkpoint['model_state_dict'])
        optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
        start_epoch = checkpoint['epoch'] + 1  # 从下一个周期开始
        
    model.train()
    for epoch in range(start_epoch, num_epochs + start_epoch):
        for batch_idx, (inputs, targets) in enumerate(train_loader):
            inputs, targets = inputs.to(device), targets.to(device)
            if flag:
                print(next(model.parameters()).is_cuda)  # 确认模型参数是否在 GPU 上
                print(inputs.is_cuda)  # 确认输入是否在 GPU 上
                flag = False
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, targets)
            loss.backward()
            optimizer.step()
            # 在 TensorBoard 中记录损失
            if batch_idx % log_interval == 0:
                writer.add_scalar('Loss/train', loss.item(), epoch * len(train_loader) + batch_idx)
            
            print(f'Epoch {epoch}, Batch {batch_idx+1}, Loss: {loss.item()}')
            # 定期保存检查点
            if batch_idx % 100 == 0:
                torch.save({
                    'epoch': epoch,
                    'model_state_dict': model.state_dict(),
                    'optimizer_state_dict': optimizer.state_dict(),
                    'loss': loss.item(),
                }, checkpoint_path)
    # 关闭 TensorBoard 记录器
    writer.close()


train_model(model, train_loader, criterion, optimizer, num_epochs=10)

Loading checkpoint...
True
True
Epoch 21, Batch 1, Loss: 326.6923828125
Epoch 21, Batch 2, Loss: 319.3524169921875
Epoch 21, Batch 3, Loss: 320.24639892578125
Epoch 21, Batch 4, Loss: 311.2806396484375
Epoch 21, Batch 5, Loss: 342.4772644042969
Epoch 21, Batch 6, Loss: 323.30877685546875
Epoch 21, Batch 7, Loss: 331.1265869140625
Epoch 21, Batch 8, Loss: 311.82244873046875
Epoch 21, Batch 9, Loss: 344.8785400390625
Epoch 21, Batch 10, Loss: 318.82977294921875
Epoch 21, Batch 11, Loss: 323.7701416015625
Epoch 21, Batch 12, Loss: 335.04095458984375
Epoch 21, Batch 13, Loss: 340.42474365234375
Epoch 21, Batch 14, Loss: 336.9275207519531
Epoch 21, Batch 15, Loss: 319.11602783203125
Epoch 21, Batch 16, Loss: 329.70172119140625
Epoch 21, Batch 17, Loss: 340.95849609375
Epoch 21, Batch 18, Loss: 340.4143981933594
Epoch 21, Batch 19, Loss: 329.3349609375
Epoch 21, Batch 20, Loss: 351.347412109375
Epoch 21, Batch 21, Loss: 340.1726989746094
Epoch 21, Batch 22, Loss: 331.08837890625
Epoch 21, Ba

In [11]:
def calculate_mae(model, data_loader, device):
    model.eval()  
    total_mae = 0.0
    total_count = 0
    
    with torch.no_grad():  
        for inputs, targets in data_loader:
            inputs, targets = inputs.to(device), targets.to(device)
            outputs = model(inputs)
            mae = l1_loss(outputs, targets, reduction='sum')
            total_mae += mae.item()
            total_count += targets.size(0)
    
    average_mae = total_mae / total_count
    return average_mae


average_mae = calculate_mae(model, eval_loader, device)
print(f'Average MAE on evaluation data: {average_mae:.4f}')

Average MAE on evaluation data: 313.8738


In [17]:
def evaluate_model(model, loader):
    model.eval()
    predictions = []
    with torch.no_grad():
        for features in loader:
            features = features[0].to(device)
            outputs = model(features)
            # predictions.extend(outputs.cpu().numpy())
            predictions.extend(outputs.round().cpu().numpy())
    return predictions

predictions = evaluate_model(model, test_loader)

predictions_df = pd.DataFrame(predictions, columns=['estimate_q'])
predictions_df.index = predictions_df.index + 1  # Adjust index to start from 1
print(len(predictions_df))
# Save the predictions to a CSV file
predictions_df.to_csv(f'{resultdir}LSTM_v2_predictions.csv', index_label='id')

print("Predictions saved to 'LSTM_v2_predictions.csv', with IDs starting from 1.")

439298
Predictions saved to 'LSTM_v2_predictions.csv', with IDs starting from 1.
