In [1]:
from google.colab import drive
drive.mount('/content/drive')
import pandas as pd

# Ï†ïÌôïÌïú ÌååÏùº Í≤ΩÎ°ú
file_path = '/content/drive/MyDrive/current_df.csv'

# Ïù∏ÏΩîÎî© ÌôïÏù∏ (Í∏∞Î≥∏ÏùÄ utf-8, Ïïà ÎêòÎ©¥ cp949)
df = pd.read_csv(file_path, encoding='utf-8')


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:

# üß™ Deep Attention Regressor for Missing Value Imputation (ta)

import pandas as pd
import numpy as np
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, TensorDataset
import matplotlib.pyplot as plt

# ‚úÖ Step 2: Define Features and Target
features = ['wd', 'ws', 'rn_day', 'rn_hr1', 'hm', 'ta_chi', 'congestion']
target = 'ta'

# ‚úÖ Step 3: Handle Missing Values
df[features + [target]] = df[features + [target]].replace(-99, np.nan)
train_df = df[df[target].notna()].copy()
test_df = df[df[target].isna()].copy()

X_train = train_df[features].fillna(0)
y_train = train_df[target]
X_test = test_df[features].fillna(0)

# ‚úÖ Step 4: Convert to Tensors
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
X_tensor = torch.tensor(X_train.values, dtype=torch.float32).to(device)
y_tensor = torch.tensor(y_train.values, dtype=torch.float32).to(device)
X_test_tensor = torch.tensor(X_test.values, dtype=torch.float32).to(device)

train_loader = DataLoader(TensorDataset(X_tensor, y_tensor), batch_size=2048, shuffle=True)

# ‚úÖ Step 5: Define Deep Attention Model
class DeepAttentionRegressor(nn.Module):
    def __init__(self, input_dim, d_model=128, num_heads=4, num_layers=3, ff_dim=256, dropout=0.1):
        super().__init__()
        self.input_fc = nn.Linear(input_dim, d_model)
        encoder_layer = nn.TransformerEncoderLayer(
            d_model=d_model,
            nhead=num_heads,
            dim_feedforward=ff_dim,
            dropout=dropout,
            batch_first=True,
            activation='gelu'
        )
        self.transformer = nn.TransformerEncoder(encoder_layer, num_layers=num_layers)
        self.output_head = nn.Sequential(
            nn.LayerNorm(d_model),
            nn.Linear(d_model, d_model // 2),
            nn.ReLU(),
            nn.Dropout(dropout),
            nn.Linear(d_model // 2, 1)
        )

    def forward(self, x):
        x = self.input_fc(x).unsqueeze(1)
        x = self.transformer(x)
        x = x.squeeze(1)
        return self.output_head(x).squeeze(1)

# ‚úÖ Step 6: Train Model
model = DeepAttentionRegressor(input_dim=len(features)).to(device)
optimizer = torch.optim.AdamW(model.parameters(), lr=1e-3, weight_decay=1e-5)
criterion = nn.MSELoss()
loss_history = []

for epoch in range(20):
    model.train()
    total_loss = 0
    for xb, yb in train_loader:
        optimizer.zero_grad()
        pred = model(xb)
        loss = criterion(pred, yb)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    loss_history.append(total_loss)
    print(f"Epoch {epoch+1} Loss: {total_loss:.4f}")

# ‚úÖ Step 7: Predict and Fill Missing Values
model.eval()
with torch.no_grad():
    ta_pred = model(X_test_tensor).cpu().numpy()

df.loc[df['ta'].isna(), 'ta'] = ta_pred

# ‚úÖ Step 8: Visualize Training Loss
plt.plot(loss_history)
plt.title("Training Loss (MSE)")
plt.xlabel("Epoch")
plt.ylabel("Loss")
plt.grid(True)
plt.show()


Epoch 1 Loss: 34864.8973
Epoch 2 Loss: 13901.8265
Epoch 3 Loss: 12575.1814


# ÏÉà ÏÑπÏÖò

In [None]:

# ‚úÖ Fast GPU Attention Training (for ta imputation)
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, TensorDataset
from tqdm import tqdm

# ÌÖêÏÑú Î≥ÄÌôò Î∞è GPU Ïù¥Îèô
X_tensor = torch.tensor(X_train.values, dtype=torch.float32).to(device)
y_tensor = torch.tensor(y_train.values, dtype=torch.float32).to(device)
X_test_tensor = torch.tensor(X_test.values, dtype=torch.float32).to(device)

# Í≤ΩÎüâ Ïñ¥ÌÖêÏÖò Î™®Îç∏ Ï†ïÏùò
class FastAttentionRegressor(nn.Module):
    def __init__(self, input_dim, d_model=32, num_heads=1, num_layers=1, ff_dim=64, dropout=0.1):
        super().__init__()
        self.input_fc = nn.Linear(input_dim, d_model)
        encoder_layer = nn.TransformerEncoderLayer(
            d_model=d_model,
            nhead=num_heads,
            dim_feedforward=ff_dim,
            dropout=dropout,
            batch_first=True,
            activation="gelu"
        )
        self.transformer = nn.TransformerEncoder(encoder_layer, num_layers=num_layers)
        self.output_head = nn.Sequential(
            nn.LayerNorm(d_model),
            nn.Linear(d_model, d_model // 2),
            nn.ReLU(),
            nn.Dropout(dropout),
            nn.Linear(d_model // 2, 1)
        )

    def forward(self, x):
        x = self.input_fc(x).unsqueeze(1)
        x = self.transformer(x)
        x = x.squeeze(1)
        return self.output_head(x).squeeze(1)

# Îç∞Ïù¥ÌÑ∞Î°úÎçî Î∞è ÌïôÏäµ Íµ¨ÏÑ±
train_ds = TensorDataset(X_tensor, y_tensor)
train_loader = DataLoader(train_ds, batch_size=512, shuffle=True)

model = FastAttentionRegressor(input_dim=X_tensor.shape[1]).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
criterion = nn.MSELoss()

loss_history = []
for epoch in range(5):
    model.train()
    total_loss = 0
    loop = tqdm(train_loader, desc=f"[Epoch {epoch+1}/5]", leave=False)
    for xb, yb in loop:
        optimizer.zero_grad()
        pred = model(xb)
        loss = criterion(pred, yb)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
        loop.set_postfix(loss=loss.item())
    loss_history.append(total_loss)
    print(f"‚úÖ Epoch {epoch+1} | Total Loss: {total_loss:.4f}", flush=True)

# Í≤∞Ï∏°Ïπò ÏòàÏ∏° Î∞è Î∞òÏòÅ
model.eval()
with torch.no_grad():
    ta_pred = model(X_test_tensor).cpu().numpy()

df.loc[df["ta"].isna(), "ta"] = ta_pred
