In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from pathlib import Path
import warnings
warnings.filterwarnings('ignore')

# PyTorch
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset

# Sklearn
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from sklearn.metrics import mean_squared_error, r2_score

# 随机种子
def set_seed(seed=42):
    np.random.seed(seed)
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed_all(seed)

set_seed(42)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"PyTorch: {torch.__version__}")
print(f"Device: {device}")
if torch.cuda.is_available():
    print(f"GPU: {torch.cuda.get_device_name(0)}")

## 加载数据

In [None]:
train_df = pd.read_csv('../data/train.csv')
test_df = pd.read_csv('../data/test.csv')

train_df['Timestamp'] = pd.to_datetime(train_df['Timestamp'])
test_df['Timestamp'] = pd.to_datetime(test_df['Timestamp'])

train_df = train_df.sort_values('Timestamp').reset_index(drop=True)
test_df = test_df.sort_values('Timestamp').reset_index(drop=True)

print(f"训练集: {train_df.shape}")
print(f"测试集: {test_df.shape}")
print(f"\nTarget统计:")
print(train_df['Target'].describe())

## 特征工程

In [None]:
def create_features(df):
    """创建特征"""
    df = df.copy()
    
    # 收益率
    df['returns'] = df['Close'].pct_change()
    df['log_returns'] = np.log(df['Close'] / df['Close'].shift(1))
    
    # 移动平均
    for w in [5, 10, 20, 50]:
        df[f'ma_{w}'] = df['Close'].rolling(w).mean()
        df[f'ma_ratio_{w}'] = df['Close'] / df[f'ma_{w}']
    
    # 波动率
    df['volatility_5'] = df['returns'].rolling(5).std()
    df['volatility_10'] = df['returns'].rolling(10).std()
    df['volatility_20'] = df['returns'].rolling(20).std()
    
    # RSI
    delta = df['Close'].diff()
    gain = delta.where(delta > 0, 0).rolling(14).mean()
    loss = (-delta.where(delta < 0, 0)).rolling(14).mean()
    df['rsi'] = 100 - 100 / (1 + gain / (loss + 1e-10))
    
    # MACD
    ema12 = df['Close'].ewm(span=12).mean()
    ema26 = df['Close'].ewm(span=26).mean()
    df['macd'] = ema12 - ema26
    df['macd_signal'] = df['macd'].ewm(span=9).mean()
    
    # 价格比率
    df['high_low_ratio'] = df['High'] / (df['Low'] + 1e-10)
    df['close_open_ratio'] = df['Close'] / (df['Open'] + 1e-10)
    
    # 成交量
    df['volume_ma_10'] = df['Volume'].rolling(10).mean()
    df['volume_ratio'] = df['Volume'] / (df['volume_ma_10'] + 1e-10)
    
    # 滞后特征
    for lag in [1, 2, 3, 5, 10]:
        df[f'returns_lag_{lag}'] = df['returns'].shift(lag)
        df[f'close_lag_{lag}'] = df['Close'].shift(lag)
    
    return df

train_featured = create_features(train_df)
test_featured = create_features(test_df)

print(f"特征后列数: {train_featured.shape[1]}")

In [None]:
# 选择特征列
feature_cols = [
    'Open', 'High', 'Low', 'Close', 'Volume',
    'returns', 'log_returns',
    'ma_ratio_5', 'ma_ratio_10', 'ma_ratio_20', 'ma_ratio_50',
    'volatility_5', 'volatility_10', 'volatility_20',
    'rsi', 'macd', 'macd_signal',
    'high_low_ratio', 'close_open_ratio', 'volume_ratio',
    'returns_lag_1', 'returns_lag_2', 'returns_lag_3', 'returns_lag_5', 'returns_lag_10'
]

print(f"特征数量: {len(feature_cols)}")

## 数据预处理

In [None]:
# 清洗数据
train_clean = train_featured.dropna().reset_index(drop=True)
print(f"清洗后: {train_clean.shape}")

# 特征标准化
feature_scaler = StandardScaler()
X = feature_scaler.fit_transform(train_clean[feature_cols])

# Target 使用 MinMaxScaler (论文中提到的)
target_scaler = MinMaxScaler(feature_range=(-1, 1))
y = target_scaler.fit_transform(train_clean[['Target']]).flatten()

print(f"X shape: {X.shape}")
print(f"y shape: {y.shape}")
print(f"y range: [{y.min():.4f}, {y.max():.4f}]")

In [None]:
# 创建滑动窗口序列 (论文使用 window=6)
WINDOW_SIZE = 6

def create_window_data(X, y, window_size):
    """将数据转换为窗口格式用于MLP"""
    X_windowed = []
    y_windowed = []
    
    for i in range(window_size, len(X)):
        # 将窗口内的所有特征展平
        window_features = X[i-window_size:i].flatten()
        X_windowed.append(window_features)
        y_windowed.append(y[i])
    
    return np.array(X_windowed), np.array(y_windowed)

X_windowed, y_windowed = create_window_data(X, y, WINDOW_SIZE)
print(f"窗口化后 X: {X_windowed.shape}")
print(f"窗口化后 y: {y_windowed.shape}")
print(f"每个样本特征数: {X_windowed.shape[1]}")

In [None]:
# 时序分割 (论文使用 70:30)
train_size = int(len(X_windowed) * 0.7)

X_train = X_windowed[:train_size]
y_train = y_windowed[:train_size]
X_val = X_windowed[train_size:]
y_val = y_windowed[train_size:]

print(f"训练集: {X_train.shape}")
print(f"验证集: {X_val.shape}")

# 转为 Tensor
X_train_t = torch.FloatTensor(X_train)
y_train_t = torch.FloatTensor(y_train).unsqueeze(1)
X_val_t = torch.FloatTensor(X_val)
y_val_t = torch.FloatTensor(y_val).unsqueeze(1)

# DataLoader
BATCH_SIZE = 128
train_dataset = TensorDataset(X_train_t, y_train_t)
val_dataset = TensorDataset(X_val_t, y_val_t)
train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False)

print(f"\nBatches - Train: {len(train_loader)}, Val: {len(val_loader)}")

## 定义 MLP 模型

论文架构: Input -> 3 Hidden Layers -> Output

In [None]:
class CryptoMLP(nn.Module):
    """多层感知器 (DNN) 用于加密货币预测
    
    基于论文架构:
    - Input: (window_size * n_features)
    - 3个隐藏层，使用 ReLU 激活
    - Output: 1 (预测值)
    """
    def __init__(self, input_size, hidden_sizes=[256, 128, 64], dropout=0.2):
        super(CryptoMLP, self).__init__()
        
        layers = []
        prev_size = input_size
        
        # 隐藏层
        for hidden_size in hidden_sizes:
            layers.extend([
                nn.Linear(prev_size, hidden_size),
                nn.BatchNorm1d(hidden_size),
                nn.ReLU(),
                nn.Dropout(dropout)
            ])
            prev_size = hidden_size
        
        # 输出层
        layers.append(nn.Linear(prev_size, 1))
        
        self.network = nn.Sequential(*layers)
        
        # 权重初始化
        self._init_weights()
    
    def _init_weights(self):
        for m in self.modules():
            if isinstance(m, nn.Linear):
                nn.init.xavier_uniform_(m.weight)
                if m.bias is not None:
                    nn.init.zeros_(m.bias)
    
    def forward(self, x):
        return self.network(x)

# 创建模型
input_size = X_train.shape[1]  # window_size * n_features
model = CryptoMLP(
    input_size=input_size,
    hidden_sizes=[256, 128, 64],
    dropout=0.2
).to(device)

print(model)
print(f"\n参数量: {sum(p.numel() for p in model.parameters()):,}")

## 训练模型

In [None]:
# 损失函数和优化器 (论文使用 Adam, lr=0.001)
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.5, patience=5)

# 训练参数
EPOCHS = 100
best_val_loss = float('inf')
patience = 15
patience_counter = 0

train_losses = []
val_losses = []

print("开始训练...\n")

In [None]:
for epoch in range(EPOCHS):
    # 训练
    model.train()
    train_loss = 0
    for X_batch, y_batch in train_loader:
        X_batch, y_batch = X_batch.to(device), y_batch.to(device)
        
        optimizer.zero_grad()
        output = model(X_batch)
        loss = criterion(output, y_batch)
        loss.backward()
        optimizer.step()
        
        train_loss += loss.item()
    train_loss /= len(train_loader)
    train_losses.append(train_loss)
    
    # 验证
    model.eval()
    val_loss = 0
    val_preds = []
    with torch.no_grad():
        for X_batch, y_batch in val_loader:
            X_batch, y_batch = X_batch.to(device), y_batch.to(device)
            output = model(X_batch)
            loss = criterion(output, y_batch)
            val_loss += loss.item()
            val_preds.extend(output.cpu().numpy().flatten())
    val_loss /= len(val_loader)
    val_losses.append(val_loss)
    
    # 监控预测标准差
    pred_std = np.std(val_preds)
    
    scheduler.step(val_loss)
    
    if (epoch + 1) % 10 == 0 or epoch == 0:
        print(f"Epoch {epoch+1}/{EPOCHS} - Train: {train_loss:.6f} - Val: {val_loss:.6f} - Pred Std: {pred_std:.4f}")
    
    # Early stopping
    if val_loss < best_val_loss:
        best_val_loss = val_loss
        patience_counter = 0
        torch.save(model.state_dict(), '../models/dnn_best.pth')
    else:
        patience_counter += 1
        if patience_counter >= patience:
            print(f"\nEarly stopping at epoch {epoch+1}")
            break

print("\n✅ 训练完成!")

In [None]:
# 训练曲线
plt.figure(figsize=(10, 5))
plt.plot(train_losses, label='Train')
plt.plot(val_losses, label='Val')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.title('DNN Training History')
plt.legend()
plt.grid(True, alpha=0.3)
plt.show()

## 评估模型

In [None]:
# 加载最佳模型
model.load_state_dict(torch.load('../models/dnn_best.pth'))
model.eval()

# 验证集预测
with torch.no_grad():
    val_pred_scaled = model(X_val_t.to(device)).cpu().numpy()

# 反归一化
val_pred = target_scaler.inverse_transform(val_pred_scaled).flatten()
y_val_orig = target_scaler.inverse_transform(y_val.reshape(-1, 1)).flatten()

# 评估
rmse = np.sqrt(mean_squared_error(y_val_orig, val_pred))
r2 = r2_score(y_val_orig, val_pred)
corr = np.corrcoef(y_val_orig, val_pred)[0, 1]

print(f"验证集评估:")
print(f"  RMSE: {rmse:.6f}")
print(f"  R²: {r2:.6f}")
print(f"  Correlation: {corr:.6f}")
print(f"\n预测统计:")
print(f"  Mean: {val_pred.mean():.6f}")
print(f"  Std: {val_pred.std():.6f}")

## 测试集预测

In [None]:
# 准备测试数据
# 需要训练集尾部数据
train_tail = train_clean.tail(WINDOW_SIZE + 100).copy()

# 合并
test_with_history = pd.concat([train_tail, test_featured], ignore_index=True)
test_with_history = test_with_history.fillna(method='ffill').fillna(method='bfill')

# 标准化
X_test_scaled = feature_scaler.transform(test_with_history[feature_cols])

# 创建窗口
X_test_windowed = []
for i in range(WINDOW_SIZE, len(X_test_scaled)):
    window_features = X_test_scaled[i-WINDOW_SIZE:i].flatten()
    X_test_windowed.append(window_features)
X_test_windowed = np.array(X_test_windowed)

# 只取测试集部分
n_test = len(test_df)
X_test_windowed = X_test_windowed[-n_test:]

print(f"测试数据形状: {X_test_windowed.shape}")

In [None]:
# 预测
X_test_t = torch.FloatTensor(X_test_windowed).to(device)

model.eval()
with torch.no_grad():
    test_pred_scaled = model(X_test_t).cpu().numpy()

# 反归一化
test_predictions = target_scaler.inverse_transform(test_pred_scaled).flatten()

print(f"预测数量: {len(test_predictions)}")
print(f"预测均值: {test_predictions.mean():.6f}")
print(f"预测标准差: {test_predictions.std():.6f}")

## 生成提交文件

In [None]:
# 创建提交
submission_df = pd.DataFrame({
    'Timestamp': test_df['Timestamp'].values[:len(test_predictions)],
    'Prediction': test_predictions
})

# 保存
submission_dir = Path('../submissions')
submission_dir.mkdir(exist_ok=True)

submission_file = submission_dir / 'dnn_submission.csv'
submission_df.to_csv(submission_file, index=False)

print(f"✅ 提交文件已保存: {submission_file}")
print(f"\n预览:")
print(submission_df.head(10))
print(f"\n统计:")
print(submission_df['Prediction'].describe())

In [None]:
# 保存模型
model_dir = Path('../models')
torch.save({
    'model_state_dict': model.state_dict(),
    'feature_cols': feature_cols,
    'window_size': WINDOW_SIZE,
}, model_dir / 'dnn_model.pth')

import joblib
joblib.dump(feature_scaler, model_dir / 'dnn_feature_scaler.pkl')
joblib.dump(target_scaler, model_dir / 'dnn_target_scaler.pkl')

print("✅ 模型已保存")

## 总结

### DNN/MLP 模型架构 (基于论文):
- **Input**: Window_size × N_features (展平的滑动窗口)
- **Hidden Layer 1**: 256 neurons + BatchNorm + ReLU + Dropout
- **Hidden Layer 2**: 128 neurons + BatchNorm + ReLU + Dropout
- **Hidden Layer 3**: 64 neurons + BatchNorm + ReLU + Dropout
- **Output**: 1 (预测值)

### 论文发现:
- MLP 作为基准模型
- LSTM 变体（特别是 BD-LSTM）表现最佳
- 单变量模型通常优于多变量模型