In [None]:
import torch
from torch.utils.data import TensorDataset
import numpy as np

train_dataset = torch.load('flight_data_dataset_train1.pt')
test_dataset = torch.load('flight_data_dataset_test1.pt')
valid_dataset = torch.load('flight_data_dataset_val1.pt')


In [None]:
from torch.utils.data import DataLoader

dataloader = DataLoader(train_dataset, batch_size=80000, shuffle=True, num_workers=20)
test_dataloader = DataLoader(test_dataset, batch_size=80000, shuffle=True, num_workers=20)
    

In [None]:
# 读取embedding信息
 
import pickle
 
f = open('new_feature_columns.pkl', 'rb')
feature_columns = pickle.load(f)
f.close()
print(feature_columns)

In [None]:
feature_columns

In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
#device = 'cpu'
print('使用设备:', device)

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F

In [None]:
dense_feature_cols, sparse_feature_cols = feature_columns

In [None]:
import torch
import torch.nn as nn

class FlightDelayRNN(nn.Module):
    def __init__(self, feature_columns, hidden_size, num_layers, output_size):
        super(FlightDelayRNN, self).__init__()
        
        
        self.dense_feature_cols, self.sparse_feature_cols = feature_columns
        
        self.embed_layers = nn.ModuleDict({
            'embed_'+str(i):nn.Embedding(num_embeddings=feat['feat_num'], embedding_dim=feat['embed_dim']) for i, feat in enumerate(self.sparse_feature_cols)
        })
        
        
        self.input_dim = len(self.dense_feature_cols) + len(self.sparse_feature_cols)*self.sparse_feature_cols[0]['embed_dim']
        
        
        self.lstm = nn.GRU(
            input_size=self.input_dim,
            hidden_size=hidden_size,
            num_layers=num_layers,
            batch_first=True,
            dropout=0.2 if num_layers > 1 else 0  # 多层时启用dropout
        )
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        dense_inputs, sparse_inputs = x[:, :, :len(self.dense_feature_cols)], x[:, :, len(self.dense_feature_cols):]
        sparse_inputs = sparse_inputs.long()
        sparse_embeds = [self.embed_layers['embed_'+str(i)](sparse_inputs[:,:,i]) for i in range(sparse_inputs.shape[-1])]
        sparse_embeds = torch.cat(sparse_embeds, dim=-1)
        x = torch.cat([sparse_embeds, dense_inputs], dim=-1)
        
        x, _ = self.lstm(x)
        x = self.fc(x)

        return x


hidden_size = 32  
num_layers = 2  # RNN层数
output_size = 2  

model = FlightDelayRNN(feature_columns, hidden_size, num_layers, output_size)
model = model.to(device)

In [None]:
hidden_size = 32  
num_layers = 2  
output_size = 2  

model = FlightDelayCNN_LSTM(feature_columns, hidden_size, num_layers, output_size)
model = model.to(device)

In [None]:
model

In [None]:
def sequence_mask(X, valid_len, value=0):
    #在序列中屏蔽不相关的项
    maxlen = X.size(1)
    mask = torch.arange((maxlen), dtype=torch.float32,
                        device=X.device)[None, :] < valid_len[:, None]
    X[~mask] = value
    return X

X = torch.tensor([[1, 2, 3, 4], [4, 5, 6, 9]])
sequence_mask(X, torch.tensor([1, 3]))

In [None]:
class MaskedSoftmaxCELoss(nn.CrossEntropyLoss):
    
    def __init__(self, pos_weight=4.5):
        super(MaskedSoftmaxCELoss, self).__init__(reduction='none')
        self.pos_weight = pos_weight

    def forward(self, pred, label, valid_len):
        # 带权重张量，初始值为1
        weights = torch.ones_like(label, dtype=torch.float)
        
        # 增加正类的权重
        weights[label == 1] *= self.pos_weight
        
        # 只取序列的有效部分
        weights = sequence_mask(weights, valid_len)
        
        # 有效加权损失
        unweighted_loss = super().forward(pred.permute(0, 2, 1), label)
        weighted_loss = unweighted_loss * weights
        
        total_loss = weighted_loss.sum()
        total_valid_len = valid_len.sum()
       # print(unweighted_loss, weighted_loss, total_loss, total_valid_len)
        
        
        if total_valid_len > 0:
            return total_loss / total_valid_len
        else:
            return torch.tensor(0.0).to(weighted_loss.device)

In [None]:
import torch
import os
import numpy as np
from tqdm import tqdm
from torch.utils.data import DataLoader

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")


optimizer = torch.optim.Adam(
    model.parameters(), 
    lr=0.001,          
    weight_decay=1e-4
)

criterion = MaskedSoftmaxCELoss()  

CHECKPOINT_PATH = "mlstm_model_checkpoint.pth"
BEST_MODEL_PATH = "mlstm_best_model.pth"

num_epochs = 50
best_val_loss = float('inf')
loss_history = []
lr_history = []

scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
    optimizer,
    T_max=num_epochs,  
    eta_min=1e-6       
)


if os.path.exists(CHECKPOINT_PATH):
    checkpoint = torch.load(CHECKPOINT_PATH, map_location=device)
    model.load_state_dict(checkpoint['model_state'])
    optimizer.load_state_dict(checkpoint['optimizer_state'])
    scheduler.load_state_dict(checkpoint['scheduler_state'])  
    start_epoch = checkpoint['epoch'] + 1
    best_val_loss = checkpoint['best_val_loss']
    loss_history = checkpoint['train_loss_history']
    lr_history = checkpoint['lr_history']  
    print(f"恢复训练：从第{start_epoch}轮开始，最佳验证损失：{best_val_loss:.4f}")
else:
    start_epoch = 0

for epoch in range(start_epoch, num_epochs):
    model.train()
    epoch_loss = 0.0
    progress_bar = tqdm(dataloader, desc=f'Epoch {epoch+1}/{num_epochs}')
    
    for batch in progress_bar:
        dense_feat, sparse_feat, two_labels, valid_lens = [x.to(device) for x in batch]
        optimizer.zero_grad()
        labels = two_labels[:,:,0]
        
        
        with torch.cuda.amp.autocast():
            features = torch.cat([dense_feat, sparse_feat], dim=-1)
            outputs = model(features)
            loss = criterion(outputs, labels.long(), valid_lens)
        
        loss.backward()
        optimizer.step()
        
        loss_history.append(loss.item())
        epoch_loss += loss.item()
        avg_loss = epoch_loss / (progress_bar.n + 1)
        progress_bar.set_postfix({
            'train_loss': f"{avg_loss:.4f}",
            'lr': f"{optimizer.param_groups[0]['lr']:.2e}"
        })

    scheduler.step()  
    current_lr = optimizer.param_groups[0]['lr']
    lr_history.append(current_lr)
    print(f"\n当前学习率：{current_lr:.2e}")

    model.eval()
    val_loss = 0.0
    with torch.no_grad():
        for i, val_batch in enumerate(test_dataloader):
            if i >= 10000:  
                break
            v_dense, v_sparse, two_v_labels, v_lens = [x.to(device) for x in val_batch]
            v_labels = two_v_labels[:,:,0]
            v_features = torch.cat([v_dense, v_sparse], dim=-1)
            v_outputs = model(v_features)
            val_loss += criterion(v_outputs, v_labels.long(), v_lens).item()
    
    avg_val_loss = val_loss / min(10000, len(test_dataloader))
    print(f"验证损失：{avg_val_loss:.4f} | 历史最佳：{best_val_loss:.4f}")

    checkpoint = {
        'epoch': epoch,
        'model_state': model.state_dict(),
        'optimizer_state': optimizer.state_dict(),
        'scheduler_state': scheduler.state_dict(), 
        'best_val_loss': best_val_loss,
        'train_loss_history': loss_history,
        'lr_history': lr_history  
    }
    torch.save(checkpoint, CHECKPOINT_PATH)

    if avg_val_loss < best_val_loss:
        best_val_loss = avg_val_loss
        torch.save(model.state_dict(), BEST_MODEL_PATH)
        print(f"★ 发现新最佳模型，验证损失：{best_val_loss:.4f}")

torch.save(final_checkpoint, "rnn0_final_model.pth")
print(f"训练完成！最佳模型已保存至 {BEST_MODEL_PATH}")

In [None]:
import matplotlib.pyplot as plt
import numpy as np

plt.rcParams.update({
    'font.family': 'Times New Roman',  
    'font.size': 12,                  
    'axes.labelsize': 14,              
    'axes.linewidth': 1.2,             
    'lines.linewidth': 2,              
    'xtick.labelsize': 12,             
    'ytick.labelsize': 12,             
    'mathtext.fontset': 'stix',       
    'savefig.dpi': 300,                
    'figure.figsize': (8, 5)          
})

epochs = np.arange(len(lr_history))  
max_lr = max(lr_history)
min_lr = min(lr_history)


fig, ax = plt.subplots()


ax.plot(epochs, lr_history, 
        color='#1f77b4',  
        marker='o', 
        markersize=5,
        markevery=int(len(epochs)/10),  
        label='Learning Rate')


ax.annotate(f'Initial LR: {max_lr:.1e}', 
           xy=(0, max_lr), 
           xytext=(5, max_lr*1.2),
           arrowprops=dict(arrowstyle="->", lw=1.5))

ax.annotate(f'Min LR: {min_lr:.1e}', 
           xy=(epochs[-1], min_lr), 
           xytext=(epochs[-1]-20, min_lr*1.5),
           arrowprops=dict(arrowstyle="->", lw=1.5))


ax.set_xlabel('Training Epochs', fontweight='bold')
ax.set_ylabel('Learning Rate', fontweight='bold')
ax.set_title('Cosine Annealing Learning Rate Schedule\n(T_max=120 epochs)', 
            fontsize=14, pad=15)
ax.grid(True, linestyle='--', alpha=0.6)
ax.set_yscale('log')  


secax = ax.secondary_xaxis('top')
secax.set_xticks(np.linspace(0, len(epochs), 5))
secax.set_xticklabels(['0%', '25%', '50%', '75%', '100%'])
secax.set_xlabel('Training Progress', fontweight='bold')


ax.legend(frameon=True, 
         loc='upper right',
         facecolor='white',
         framealpha=0.8)


plt.tight_layout()
plt.savefig('cosine_lr_schedule.pdf', format='pdf', bbox_inches='tight')
#plt.close()

In [None]:
import matplotlib as mpl
import matplotlib.pyplot as plt


mpl.rcParams.update({
    'font.family': 'serif',               
    'font.size': 12,                      
    'axes.labelsize': 14,                 
    'axes.linewidth': 1.5,                
    'xtick.direction': 'in',              
    'ytick.direction': 'in',              
    'xtick.labelsize': 12,                
    'ytick.labelsize': 12,                
    'grid.linestyle': '--',              
    'grid.alpha': 0.6,                    
    'mathtext.fontset': 'stix',           
    'savefig.dpi': 300,                   
    'savefig.bbox': 'tight'               
})


fig, ax = plt.subplots(figsize=(8, 5))   
ax.plot(
    range(1, len(loss_history)+1), 
    loss_history, 
    marker='o', 
    markersize=6,
    linewidth=2,
    color='#1f77b4',                      
    markeredgecolor='k',                  
    markeredgewidth=0.5
)


ax.set_xlabel('Epoch', fontweight='bold') 
ax.set_ylabel('Loss', fontweight='bold')
ax.set_title('Training Loss Progression', fontsize=16, pad=15)  
ax.grid(True, which='both', alpha=0.6)    


ax.xaxis.set_minor_locator(mpl.ticker.AutoMinorLocator(2))  
ax.yaxis.set_minor_locator(mpl.ticker.AutoMinorLocator(2))  
ax.tick_params(which='minor', length=3, width=1)           


plt.savefig('training_loss.pdf', dpi=300)
plt.show()

In [None]:
def load_flight_model(device, 
                     feature_columns,
                     hidden_size,
                     num_layers,
                     output_size,
                     model_path=BEST_MODEL_PATH):

    
    model = FlightDelayRNN(
        feature_columns=feature_columns,
        hidden_size=hidden_size,
        num_layers=num_layers,
        output_size=output_size
    ).to(device)
    
    
    model.load_state_dict(torch.load(model_path, map_location=device))
    model.eval()
    return model

In [None]:
import matplotlib.pyplot as plt
import numpy as np
import torch
from sklearn.metrics import (accuracy_score, recall_score, precision_score, 
                           f1_score, roc_auc_score, roc_curve, confusion_matrix)
from torch.utils.data import DataLoader

def evaluate_model(model, dataloader, device):
    model.eval()  
    all_logits, all_labels, all_valid_lens = [], [], []

    with torch.no_grad():
        for batch in dataloader:
            dense_feat, sparse_feat, two_labels, valid_lens = [x.to(device) for x in batch]
            labels = two_labels[:,:,0]
            feature = torch.cat([dense_feat, sparse_feat], dim=-1)
            logits = model(feature)
            
            all_logits.append(logits.cpu())
            all_labels.append(labels.cpu())
            all_valid_lens.append(valid_lens.cpu())

    logits = torch.cat(all_logits, dim=0).numpy()
    labels = torch.cat(all_labels, dim=0).numpy()
    valid_lens = torch.cat(all_valid_lens, dim=0).numpy()

    position_mask = (np.arange(logits.shape[1]) < valid_lens[:, None])
    
    valid_probs = torch.softmax(torch.from_numpy(logits), dim=-1)[..., 1].numpy()
    final_probs = valid_probs[position_mask].flatten()
    final_labels = labels[position_mask].flatten()

    fpr, tpr, thresholds = roc_curve(final_labels, final_probs)
    J = tpr - fpr  
    optimal_idx = np.argmax(J)
    optimal_threshold = thresholds[optimal_idx]
    
    try:
        auc = roc_auc_score(final_labels, final_probs)
    except ValueError:
        auc = 0.5

    pred_labels = (final_probs >= optimal_threshold).astype(int)
    
    return {
        "Accuracy": accuracy_score(final_labels, pred_labels),
        "Precision": precision_score(final_labels, pred_labels, zero_division=0),
        "Recall": recall_score(final_labels, pred_labels, zero_division=0),
        "F1": f1_score(final_labels, pred_labels),
        "AUC": auc
    }, final_probs, final_labels

In [None]:
BEST_MODEL_PATH

In [None]:
model = load_flight_model(
    device=device,
    feature_columns=feature_columns,
    hidden_size=hidden_size,
    num_layers=num_layers,
    output_size=output_size
)
device = next(model.parameters()).device

train_dataloader = DataLoader(train_dataset, 
                            batch_size=80000, 
                            shuffle=False, num_workers=20)
val_dataloader = DataLoader(valid_dataset, 
                           batch_size=80000, 
                           shuffle=False, num_workers=20)
test_dataloader = DataLoader(test_dataset,  
                            batch_size=80000,
                            shuffle=False, num_workers=20)

train_metrics, _, _ = evaluate_model(model, train_dataloader, device)
val_metrics, _, _ = evaluate_model(model, val_dataloader, device)
test_metrics, final_probs, final_labels = evaluate_model(model, test_dataloader, device)  


print("\n{:<15} {:<10} {:<10} {:<10}".format("Metric", "Train", "Val", "Test"))
for key in train_metrics:
    print("{:<15} {:<10.4f} {:<10.4f} {:<10.4f}".format(
        key + ":", 
        train_metrics[key], 
        val_metrics[key],
        test_metrics[key]  
    ))