In [1]:
import torch
import torch.nn as nn
from torch.nn import functional as F
import numpy as np
import pickle
import re
import csv
from torch.utils.data import DataLoader, TensorDataset
import ast
from tqdm import tqdm
import pandas as pd
from sklearn.preprocessing import StandardScaler
from pypfopt import EfficientFrontier
from pypfopt import risk_models
from pypfopt import expected_returns
from pypfopt import objective_functions
import os


In [2]:
# =============================================================================
# Model Architecture: iTransformer (Inverted Transformer)
# =============================================================================

class MultiHeadAttention(nn.Module):
    def __init__(self, d_model, num_heads):
        super().__init__()
        self.num_heads = num_heads
        self.depth = d_model // num_heads
        self.W_Q = nn.Linear(d_model, d_model)
        self.W_K = nn.Linear(d_model, d_model)
        self.W_V = nn.Linear(d_model, d_model)
        self.W_O = nn.Linear(d_model, d_model)
 
    def forward(self, Q, K, V):
        Q, K, V = self.W_Q(Q), self.W_K(K), self.W_V(V)
        Q, K, V = self._split_heads(Q), self._split_heads(K), self._split_heads(V)
        
        scores = torch.matmul(Q, K.transpose(-1, -2)) / (self.depth ** 0.5)
        attn = torch.softmax(scores, dim=-1)
        
        return self.W_O(self._combine_heads(torch.matmul(attn, V)))
 
    def _split_heads(self, x):
        return x.view(x.size(0), -1, self.num_heads, self.depth).transpose(1, 2)
 
    def _combine_heads(self, x):
        x = x.transpose(1, 2).contiguous()
        return x.view(x.size(0), -1, self.num_heads * self.depth)


class EncoderLayer(nn.Module):
    def __init__(self, d_model, num_heads):
        super().__init__()
        self.attention = MultiHeadAttention(d_model, num_heads)
        self.ffn = nn.Sequential(
            nn.Linear(d_model, 4 * d_model), nn.ReLU(), nn.Linear(4 * d_model, d_model)
        )
        self.norm1 = nn.LayerNorm(d_model)
        self.norm2 = nn.LayerNorm(d_model)
 
    def forward(self, x):
        x = self.norm1(x + self.attention(x, x, x))
        return self.norm2(x + self.ffn(x))


class iTransformer(nn.Module):
    """
    iTransformer: Inverted Transformer for Time Series Forecasting
    
    Key insight: Treats each variable as a token (not each time step).
    - Input:  (Batch, SeqLen, NumVars)
    - Output: (Batch, PredLen, NumVars)
    """
    def __init__(self, input_len, output_len, num_features, hidden_dim, num_heads, num_layers):
        super().__init__()
        self.input_proj = nn.Linear(input_len, hidden_dim)  # Project time dim to hidden
        self.pos_embed = nn.Parameter(torch.zeros(1, num_features, hidden_dim))
        self.encoder = nn.ModuleList([EncoderLayer(hidden_dim, num_heads) for _ in range(num_layers)])
        self.output_proj = nn.Linear(hidden_dim, output_len)  # Project hidden to pred_len
 
    def forward(self, x):
        # x: (B, L, N) -> transpose -> (B, N, L) -> project -> (B, N, D)
        x = self.input_proj(x.permute(0, 2, 1)) + self.pos_embed
        
        for layer in self.encoder:
            x = layer(x)
        
        # (B, N, D) -> project -> (B, N, P) -> transpose -> (B, P, N)
        return self.output_proj(x).permute(0, 2, 1)


In [3]:
# =============================================================================
# Configuration
# =============================================================================
CSV_FILES = [
    'BROADCOM 5년치.csv', 'ALPHABET C 5년치.csv', 'AMAZON 5년치.csv', 
    'APPLE 5년치.csv', 'META 5년치.csv', 'MICROSOFT 5년치.csv', 
    'NETFLIX 5년치.csv', 'NVIDIA 5년치.csv', 'PALANTIR 5년치.csv', 'TESLA 5년치.csv'
]
FEATURE_COLS = ['Close/Last', 'Volume', 'Open', 'High', 'Low']
STOCK_NAMES = [re.match(r'^[A-Z]+', f).group(0) for f in CSV_FILES]

INPUT_LEN = 504      # ~2 years of trading days
OUTPUT_LEN = 14      # 14-day prediction horizon
HIDDEN_DIM = 128
NUM_HEADS = 8
NUM_LAYERS = 6
NUM_EPOCHS = 100
BATCH_SIZE = 16
TRAIN_RATIO = 0.8

NUM_FEATURES = len(CSV_FILES) * len(FEATURE_COLS)  # 10 stocks * 5 features = 50
DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

print(f"[Config] Device: {DEVICE}, Features: {NUM_FEATURES}, Input: {INPUT_LEN}d, Output: {OUTPUT_LEN}d")

# =============================================================================
# 1. Load and Merge Data
# =============================================================================
print("\n[1/5] Loading data...")
all_data_dfs = []
column_names = []

for csv_file in CSV_FILES:
    stock_name = re.match(r'^[A-Z]+', csv_file).group(0)
    df = pd.read_csv(csv_file, parse_dates=['Date'], index_col='Date').sort_index()
    
    cleaned_cols = {}
    for col in FEATURE_COLS:
        series = df[col]
        if series.dtype == 'object':
            series = series.str.replace(r'[$,]', '', regex=True)
        series = pd.to_numeric(series, errors='coerce')
        col_name = f"{stock_name}_{col}"
        cleaned_cols[col_name] = series
        column_names.append(col_name)
    all_data_dfs.append(pd.DataFrame(cleaned_cols))

price_df = pd.concat(all_data_dfs, axis=1).dropna()
print(f"      Price data: {price_df.shape[0]} days x {price_df.shape[1]} features")

# =============================================================================
# 2. Convert to Returns & Split
# =============================================================================
print("[2/5] Computing returns & splitting data...")
returns_df = price_df.pct_change().dropna()
returns_np = np.nan_to_num(returns_df.to_numpy(dtype=np.float32), nan=0.0, posinf=0.0, neginf=0.0)

train_size = int(len(returns_np) * TRAIN_RATIO)
train_returns = returns_np[:train_size]
test_returns = returns_np[train_size:]

scaler = StandardScaler().fit(train_returns)
train_scaled = scaler.transform(train_returns)

print(f"      Train: {len(train_returns)}d, Test: {len(test_returns)}d")

# =============================================================================
# 3. Create Sliding Windows
# =============================================================================
print("[3/5] Creating sliding windows...")
inputs, outputs = [], []
for i in range(len(train_scaled) - INPUT_LEN - OUTPUT_LEN + 1):
    inputs.append(train_scaled[i : i + INPUT_LEN])
    outputs.append(train_scaled[i + INPUT_LEN : i + INPUT_LEN + OUTPUT_LEN])

inputs = torch.tensor(np.array(inputs), dtype=torch.float32)
outputs = torch.tensor(np.array(outputs), dtype=torch.float32)
data_loader = DataLoader(TensorDataset(inputs, outputs), batch_size=BATCH_SIZE, shuffle=True)

print(f"      Samples: {len(inputs)}, Shape: {list(inputs.shape)}")

# =============================================================================
# 4. Train Model
# =============================================================================
print(f"[4/5] Training model ({NUM_EPOCHS} epochs)...")
model = iTransformer(INPUT_LEN, OUTPUT_LEN, NUM_FEATURES, HIDDEN_DIM, NUM_HEADS, NUM_LAYERS).to(DEVICE)
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)

for epoch in range(NUM_EPOCHS):
    model.train()
    total_loss = 0.0
    for x_batch, y_batch in data_loader:
        x_batch, y_batch = x_batch.to(DEVICE), y_batch.to(DEVICE)
        optimizer.zero_grad()
        loss = criterion(model(x_batch), y_batch)
        loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
        optimizer.step()
        total_loss += loss.item()
    
    if (epoch + 1) % 20 == 0:
        print(f"      Epoch {epoch+1:3d}/{NUM_EPOCHS}: Loss = {total_loss/len(data_loader):.6f}")

# =============================================================================
# 5. Save Artifacts
# =============================================================================
print("[5/5] Saving model and data...")
torch.save(model.state_dict(), "unified_model.pt")
with open("unified_scaler.pkl", 'wb') as f: pickle.dump(scaler, f)
with open("unified_column_map.pkl", 'wb') as f: pickle.dump(column_names, f)
with open("unified_returns.pkl", 'wb') as f: pickle.dump(returns_df, f)
with open("unified_prices.pkl", 'wb') as f: pickle.dump(price_df, f)
with open("unified_config.pkl", 'wb') as f: pickle.dump({
    'stock_names': STOCK_NAMES, 'column_names': column_names,
    'input_len': INPUT_LEN, 'output_len': OUTPUT_LEN,
    'hidden_dim': HIDDEN_DIM, 'num_features': NUM_FEATURES
}, f)

print("\n[Done] All artifacts saved.")

[Config] Device: cuda, Features: 50, Input: 504d, Output: 14d

[1/5] Loading data...
      Price data: 1256 days x 50 features
[2/5] Computing returns & splitting data...
      Train: 1004d, Test: 251d
[3/5] Creating sliding windows...
      Samples: 487, Shape: [487, 504, 50]
[4/5] Training model (100 epochs)...
      Epoch  20/100: Loss = 0.626030
      Epoch  40/100: Loss = 0.391078
      Epoch  60/100: Loss = 0.223439
      Epoch  80/100: Loss = 0.137127
      Epoch 100/100: Loss = 0.091201
[5/5] Saving model and data...

[Done] All artifacts saved.


In [None]:
# =============================================================================
# Inference: Load Model & Optimize Portfolio
# =============================================================================

# --- Load Artifacts ---
with open("unified_config.pkl", 'rb') as f: cfg = pickle.load(f)
with open("unified_scaler.pkl", 'rb') as f: scaler = pickle.load(f)
with open("unified_returns.pkl", 'rb') as f: returns_df = pickle.load(f)
with open("unified_prices.pkl", 'rb') as f: price_df = pickle.load(f)

DEVICE = torch.device('cpu')
model = iTransformer(
    cfg['input_len'], cfg['output_len'], cfg['num_features'], 
    cfg['hidden_dim'], num_heads=8, num_layers=6
).to(DEVICE)
model.load_state_dict(torch.load("unified_model.pt", map_location=DEVICE, weights_only=True))
model.eval()

# --- Predict Future Returns ---
returns_np = np.nan_to_num(returns_df.to_numpy(dtype=np.float32), nan=0.0, posinf=0.0, neginf=0.0)
scaled_input = scaler.transform(returns_np)[-cfg['input_len']:]
input_tensor = torch.tensor(scaled_input, dtype=torch.float32).unsqueeze(0).to(DEVICE)

with torch.no_grad():
    scaled_pred = model(input_tensor).squeeze(0).cpu().numpy()
predicted_returns = scaler.inverse_transform(scaled_pred)

# --- Compute Expected Returns ---
current_prices = price_df.iloc[-1]
predicted_mus = {}
results = []

for stock in cfg['stock_names']:
    col = f"{stock}_Close/Last"
    idx = cfg['column_names'].index(col)
    cum_ret = np.prod(1 + predicted_returns[:, idx]) - 1
    pred_price = current_prices[col] * (1 + cum_ret)
    annual_mu = (1 + cum_ret) ** (252.0 / cfg['output_len']) - 1
    predicted_mus[stock] = annual_mu
    results.append([stock, current_prices[col], pred_price, cum_ret * 100, annual_mu * 100])

# --- Display Price Predictions ---
print("=" * 60)
print(" PRICE PREDICTIONS (14-day horizon)")
print("=" * 60)
print(f"{'Stock':<10} {'Now':>9} {'Pred':>9} {'14d Ret':>9} {'Annual':>10}")
print("-" * 60)
for r in results:
    print(f"{r[0]:<10} ${r[1]:>7.2f}  ${r[2]:>7.2f}  {r[3]:>+7.2f}%  {r[4]:>+8.1f}%")
print("=" * 60)

# --- Portfolio Optimization ---
close_df = price_df[[f"{s}_Close/Last" for s in cfg['stock_names']]]
close_df.columns = cfg['stock_names']
S = risk_models.sample_cov(close_df, frequency=252)
mu_series = pd.Series(predicted_mus)

ef = EfficientFrontier(mu_series, S)
ef.add_objective(objective_functions.L2_reg, gamma=10.0)
ef.max_sharpe(risk_free_rate=0.02)
weights = ef.clean_weights()

# --- Display Portfolio Weights ---
print("\n" + "=" * 40)
print(" OPTIMAL PORTFOLIO ALLOCATION")
print("=" * 40)
sorted_weights = sorted(weights.items(), key=lambda x: -x[1])
for stock, w in sorted_weights:
    bar = "#" * int(w * 30)
    if w > 0.001:
        print(f"{stock:<10} {w*100:>6.2f}% |{bar}")
print("=" * 40)

# --- Performance Metrics ---
perf = ef.portfolio_performance(risk_free_rate=0.02)
print(f"\nExpected Return: {perf[0]*100:.1f}%  |  Volatility: {perf[1]*100:.1f}%  |  Sharpe: {perf[2]:.2f}")


[1/4] Loading saved artifacts...
      Loaded model: input=504d, output=14d
[2/4] Predicting future returns...
[3/4] Computing expected returns...

Stock           Current  Predicted   14d Return       Annual
BROADCOM     $   369.63 $   383.35       +3.71%       +92.7%
ALPHABET     $   281.82 $   260.23       -7.66%       -76.2%
AMAZON       $   244.22 $   242.01       -0.90%       -15.1%
APPLE        $   270.37 $   261.66       -3.22%       -44.5%
META         $   648.35 $   774.89      +19.52%     +2375.7%
MICROSOFT    $   517.81 $   510.51       -1.41%       -22.5%
NETFLIX      $  1118.86 $  1284.15      +14.77%     +1094.4%
NVIDIA       $   202.49 $   213.82       +5.60%      +166.4%
PALANTIR     $   200.47 $   208.61       +4.06%      +104.6%
TESLA        $   456.56 $   484.65       +6.15%      +192.9%

[4/4] Optimizing portfolio...

Optimal Portfolio Weights:
------------------------------
  BROADCOM       1.63%
  META          61.71%
  NETFLIX       27.94%
  NVIDIA         3.20%

