In [1]:
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

Mounted at /content/drive


In [2]:
# ✅ ライブラリのインストールとインポート
!pip install -U scikit-learn einops

import os
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
import torch.nn.functional as F
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, r2_score
from einops import rearrange




In [3]:
from einops import rearrange

class SpectralViT(nn.Module):
    def __init__(self, seq_len=91, patch_size=7, dim=128, depth=4, heads=4, mlp_dim=256):
        super().__init__()
        assert seq_len % patch_size == 0, "patch size must divide seq_len"
        self.patch_size = patch_size
        self.num_patches = seq_len // patch_size
        self.dim = dim

        self.linear_proj = nn.Linear(patch_size, dim)
        self.pos_embedding = nn.Parameter(torch.randn(1, self.num_patches, dim))

        encoder_layer = nn.TransformerEncoderLayer(d_model=dim, nhead=heads, dim_feedforward=mlp_dim, batch_first=True)
        self.transformer = nn.TransformerEncoder(encoder_layer, num_layers=depth)

        self.mlp_head = nn.Sequential(
            nn.LayerNorm(dim),
            nn.Linear(dim, 1)
        )

    def forward(self, x):
        # x: (B, 1, 91) → (B, 91)
        x = x.squeeze(1)  # remove channel dim
        x = rearrange(x, 'b (n p) -> b n p', p=self.patch_size)  # (B, num_patches, patch_size)
        x = self.linear_proj(x)  # (B, num_patches, dim)
        x = x + self.pos_embedding  # add positional embedding
        x = self.transformer(x)
        x = x.mean(dim=1)  # global average pooling
        return self.mlp_head(x)

In [4]:
class SpectralDataset(Dataset):
    def __init__(self, X, y):
        self.X = torch.tensor(X).unsqueeze(1)  # (N, 1, 91)
        self.y = torch.tensor(y).unsqueeze(1)
    def __len__(self):
        return len(self.X)
    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]


In [None]:
base_path = "/content/drive/MyDrive/Colab Notebooks/Tea"
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

for i in range(1, 11):
    round_name = f"Round{str(i).zfill(2)}"
    data_path = os.path.join(base_path, round_name)
    print(f"\n📁 {round_name} 処理中...")

    # データ読み込み
    train_df = pd.read_csv(os.path.join(data_path, "training.csv"))
    test_df = pd.read_csv(os.path.join(data_path, "test.csv"))
    spectral_cols = train_df.columns[:-1]
    target_col = train_df.columns[-1]

    X_train = train_df[spectral_cols].values.astype(np.float32)
    y_train = train_df[target_col].values.astype(np.float32)
    X_test = test_df[spectral_cols].values.astype(np.float32)
    y_test = test_df[target_col].values.astype(np.float32)

    scaler = StandardScaler()
    X_train = scaler.fit_transform(X_train)
    X_test = scaler.transform(X_test)

    train_ds = SpectralDataset(X_train, y_train)
    test_ds = SpectralDataset(X_test, y_test)
    train_loader = DataLoader(train_ds, batch_size=32, shuffle=True)
    test_loader = DataLoader(test_ds, batch_size=32)

    # モデル構築
    model = SpectralViT(seq_len=91, patch_size=7).to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
    criterion = nn.MSELoss()

    # 学習
    for epoch in range(100):
        model.train()
        total_loss = 0
        for xb, yb in train_loader:
            xb, yb = xb.to(device), yb.to(device)
            pred = model(xb)
            loss = criterion(pred, yb)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            total_loss += loss.item()
        if (epoch + 1) % 10 == 0:
            print(f"Epoch {epoch+1}, Loss: {total_loss / len(train_loader):.4f}")

    # 評価
    model.eval()
    y_true, y_pred = [], []
    grads_list, inputs_list = [], []

    for xb, yb in test_loader:
        xb = xb.to(device)
        xb.requires_grad = True
        preds = model(xb)
        preds.sum().backward()

        y_true.extend(yb.numpy().flatten())
        y_pred.extend(preds.detach().cpu().numpy().flatten())
        grads_list.append(xb.grad.detach().cpu().numpy())
        inputs_list.append(xb.detach().cpu().numpy())

    rmse = np.sqrt(mean_squared_error(y_true, y_pred))
    r2 = r2_score(y_true, y_pred)
    print(f"✅ {round_name} - RMSE: {rmse:.3f}, R²: {r2:.3f}")

    # 結果保存
    pd.DataFrame({'Observed': y_true, 'Predicted': y_pred}).to_csv(
        os.path.join(data_path, "predictions_vit.csv"), index=False)

    # 重要度評価（Input × Gradient）
    grads = np.concatenate(grads_list, axis=0).squeeze(1)
    inputs = np.concatenate(inputs_list, axis=0).squeeze(1)
    importance = (inputs * grads).mean(axis=0)  # (91,)

    pd.DataFrame({
        'Wavelength': spectral_cols,
        'Importance': importance
    }).to_csv(os.path.join(data_path, "importance_vit.csv"), index=False)



📁 Round01 処理中...
Epoch 10, Loss: 132.0614
Epoch 20, Loss: 48.5916
Epoch 30, Loss: 26.4947
Epoch 40, Loss: 22.9657
Epoch 50, Loss: 18.2114
Epoch 60, Loss: 13.9599
Epoch 70, Loss: 13.1346
Epoch 80, Loss: 13.2364
Epoch 90, Loss: 12.6906
Epoch 100, Loss: 12.9629
✅ Round01 - RMSE: 3.615, R²: 0.890

📁 Round02 処理中...
Epoch 10, Loss: 131.5358
Epoch 20, Loss: 49.7634
Epoch 30, Loss: 30.7927
Epoch 40, Loss: 34.8227
Epoch 50, Loss: 20.0939
Epoch 60, Loss: 17.7418
Epoch 70, Loss: 16.4571
Epoch 80, Loss: 14.4232
Epoch 90, Loss: 26.9344
