In [7]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import TensorDataset, DataLoader
from sklearn.model_selection import KFold
from sklearn.preprocessing import StandardScaler, RobustScaler
import random

# ===================== 設定隨機種子 =====================
def set_seed(seed=42):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)

set_seed(42)

# ===================== 參數設定區（你只要改這裡） =====================
params = {
    'num_layers': 4,
    'dropout': True,
    'use_bn': True,
    'batch_size': 64,
    'lr': 0.001,
    'weight_decay': 1e-4,
    'scaler': 'standard'  # 或 'robust'
}

# ===================== 讀取資料 =====================
train = pd.read_csv('./winequality-red_train.csv')
test = pd.read_csv('./winequality-red_goal.csv')

X = train.drop('quality', axis=1).values
y = train['quality'].values
X_test = test.values
X_test = test[train.drop('quality', axis=1).columns].values

# ===================== 特徵標準化 =====================
if params['scaler'] == 'standard':
    scaler = StandardScaler()
else:
    scaler = RobustScaler()

X = scaler.fit_transform(X)
X_test = scaler.transform(X_test)

# ===================== 模型定義 =====================
class WineQualityNN(nn.Module):
    def __init__(self, input_dim, num_layers=3, dropout=True, use_bn=True):
        super(WineQualityNN, self).__init__()
        dims = [input_dim, 512, 256, 128, 64, 32, 16]  # 最多支援到6層
        layers = []

        for i in range(num_layers):
            layers.append(nn.Linear(dims[i], dims[i+1]))
            if use_bn:
                layers.append(nn.BatchNorm1d(dims[i+1]))
            layers.append(nn.ReLU())
            if dropout:
                layers.append(nn.Dropout(0.3))

        self.hidden = nn.Sequential(*layers)
        self.output = nn.Linear(dims[num_layers], 11)

    def forward(self, x):
        x = self.hidden(x)
        x = self.output(x)
        return x

# ===================== K-Fold 交叉驗證 =====================
def train_and_evaluate(X, y, params, folds=5):
    kf = KFold(n_splits=folds, shuffle=True, random_state=42)
    accuracies = []

    for fold, (train_idx, val_idx) in enumerate(kf.split(X)):
        X_train, X_val = X[train_idx], X[val_idx]
        y_train, y_val = y[train_idx], y[val_idx]

        train_dataset = TensorDataset(torch.tensor(X_train, dtype=torch.float32),
                                      torch.tensor(y_train, dtype=torch.long))
        val_dataset = TensorDataset(torch.tensor(X_val, dtype=torch.float32),
                                    torch.tensor(y_val, dtype=torch.long))

        train_loader = DataLoader(train_dataset, batch_size=params['batch_size'], shuffle=True)
        val_loader = DataLoader(val_dataset, batch_size=params['batch_size'], shuffle=False)

        model = WineQualityNN(input_dim=X.shape[1],
                              num_layers=params['num_layers'],
                              dropout=params['dropout'],
                              use_bn=params['use_bn'])

        optimizer = torch.optim.Adam(model.parameters(), lr=params['lr'], weight_decay=params['weight_decay'])
        criterion = nn.CrossEntropyLoss()

        for epoch in range(30):  # 可以自己調整
            model.train()
            for xb, yb in train_loader:
                optimizer.zero_grad()
                preds = model(xb)
                loss = criterion(preds, yb)
                loss.backward()
                optimizer.step()

        model.eval()
        correct = 0
        total = 0
        with torch.no_grad():
            for xb, yb in val_loader:
                preds = model(xb)
                predicted = preds.argmax(1)
                correct += (predicted == yb).sum().item()
                total += yb.size(0)
        accuracy = correct / total
        accuracies.append(accuracy)

    print(f"K-Fold CV Accuracy: {np.mean(accuracies):.4f}")
    return np.mean(accuracies)

# ===================== 訓練整體模型、預測測試資料 =====================
def train_full_and_predict(X, y, X_test, params):
    full_dataset = TensorDataset(torch.tensor(X, dtype=torch.float32),
                                 torch.tensor(y, dtype=torch.long))
    loader = DataLoader(full_dataset, batch_size=params['batch_size'], shuffle=True)

    model = WineQualityNN(input_dim=X.shape[1],
                          num_layers=params['num_layers'],
                          dropout=params['dropout'],
                          use_bn=params['use_bn'])
    optimizer = torch.optim.Adam(model.parameters(), lr=params['lr'], weight_decay=params['weight_decay'])
    criterion = nn.CrossEntropyLoss()

    for epoch in range(30):  # 可調整
        model.train()
        for xb, yb in loader:
            optimizer.zero_grad()
            preds = model(xb)
            loss = criterion(preds, yb)
            loss.backward()
            optimizer.step()

    # 預測測試集
    model.eval()
    X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
    with torch.no_grad():
        outputs = model(X_test_tensor)
        predictions = outputs.argmax(1).numpy()

    # 輸出 CSV（符合 baseline 格式）
    df_submit = pd.DataFrame({'Id': np.arange(len(predictions)), 'Predicted': predictions})
    df_submit.to_csv('winequality_predictions.csv', index=False)
    print("✅ 預測結果已輸出為 winequality_predictions.csv")

# ===================== 執行流程 =====================
acc = train_and_evaluate(X, y, params, folds=5)
train_full_and_predict(X, y, X_test, params)


K-Fold CV Accuracy: 0.6208
✅ 預測結果已輸出為 winequality_predictions.csv
