In [737]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import TensorDataset, DataLoader
from sklearn.model_selection import KFold
from sklearn.preprocessing import StandardScaler, RobustScaler
import random

In [738]:
# 設定 GPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device

device(type='cpu')

In [739]:
#設定隨機種子
def set_seed(seed=42):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
set_seed(42)

In [740]:
#參數
params = {
    'num_layers': 4,
    'dropout': True,
    'use_bn': True,
    'batch_size': 64,
    'lr': 0.001,
    'weight_decay': 1e-4,
    'scaler': 'standard'  # 'standard' or 'robust'
}

In [741]:
#### TODO
red= pd.read_csv('./winequality-red_train.csv')
white = pd.read_csv('./winequality-white_train.csv')
#增加一個column(wine_type)
red['wine_type'] = 0 
white['wine_type'] = 1

train = pd.concat([red, white], ignore_index=True)

test_red = pd.read_csv('./winequality-red_goal.csv')
test_white = pd.read_csv('./winequality-white_goal.csv')
test_red['wine_type'] = 0
test_white['wine_type'] = 1

test_all = pd.concat([test_red, test_white], ignore_index=True)

print(red.head())
print(white.head())
#### TODO

# selected_features = False #要不要只選特定特徵
# if selected_features:
#   #### TODO
#   selected_features = ['fixed acidity', 'volatile acidity', 'residual sugar', 'free sulfur dioxide', 'citric acid', 'chlorides', 'total sulfur dioxide', 'density', 
#                          'pH', 'sulphates', 'alcohol'] # 
#   #### TODO
# if selected_features:
#   data = data[selected_features + ['quality']]

# 分割特徵與標籤
X = train.drop('quality', axis=1).values
y = train['quality'].values
X_test = test_all[train.drop('quality', axis=1).columns].values

# 標準化數據
scaler = StandardScaler() if params['scaler'] == 'standard' else RobustScaler()
X = scaler.fit_transform(X)
X_test = scaler.transform(X_test)


   fixed acidity  volatile acidity  citric acid  residual sugar  chlorides  \
0            6.9             0.670         0.06             2.1      0.080   
1            7.5             0.530         0.06             2.6      0.086   
2           11.1             0.180         0.48             1.5      0.068   
3            8.3             0.705         0.12             2.6      0.092   
4            7.4             0.670         0.12             1.6      0.186   

   free sulfur dioxide  total sulfur dioxide  density    pH  sulphates  \
0                    8                    33  0.99845  3.68       0.71   
1                   20                    44  0.99650  3.38       0.59   
2                    7                    15  0.99730  3.22       0.64   
3                   12                    28  0.99940  3.51       0.72   
4                    5                    21  0.99600  3.39       0.54   

   alcohol  quality  wine_type  
0      9.6        5          0  
1     10.7        6 

In [742]:
# # 切分訓練集與測試集
# X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42, stratify=y)

# # 轉換為 PyTorch Tensor
# X_train_tensor = torch.tensor(X_train, dtype=torch.float32).to(device)
# y_train_tensor = torch.tensor(y_train, dtype=torch.long).to(device)
# X_test_tensor = torch.tensor(X_test, dtype=torch.float32).to(device)
# y_test_tensor = torch.tensor(y_test, dtype=torch.long).to(device)

# # 創建 DataLoader
# train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
# test_dataset = TensorDataset(X_test_tensor, y_test_tensor)
# train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
# test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

In [743]:
# # 定義神經網絡模型
# class WineQualityNN(nn.Module):
#     def __init__(self, input_dim):
#         super(WineQualityNN, self).__init__()
#         #### TODO
#         self.fc1 = nn.Linear(input_dim, 64)
#         self.bn1 = nn.BatchNorm1d(64)
#         self.fc2 = nn.Linear(64, 32)
#         self.bn2 = nn.BatchNorm1d(32)
#         self.fc3 = nn.Linear(32, 11)
#         self.dropout = nn.Dropout(0.15)

#     def forward(self, x):
#         x = self.dropout(F.relu(self.bn1(self.fc1(x))))
#         x = self.dropout(F.relu(self.bn2(self.fc2(x))))
#         x = self.fc3(x)
#         return x
class WineQualityNN(nn.Module):
    def __init__(self, input_dim, num_layers=3, dropout=True, use_bn=True):
        super(WineQualityNN, self).__init__()
        dims = [input_dim, 512, 256, 128, 64, 32, 16]
        layers = []
        for i in range(num_layers):
            layers.append(nn.Linear(dims[i], dims[i+1]))
            if use_bn:
                layers.append(nn.BatchNorm1d(dims[i+1]))
            layers.append(nn.ReLU())
            if dropout:
                layers.append(nn.Dropout(0.3))
        self.hidden = nn.Sequential(*layers)
        self.output = nn.Linear(dims[num_layers], 11)

    def forward(self, x):
        return self.output(self.hidden(x))


In [744]:
# # 初始化模型
# model = WineQualityNN(input_dim=X_train.shape[1],
#                           num_layers=4,
#                           use_batchnorm=False,
#                           use_dropout=False).to(device)

# criterion = nn.CrossEntropyLoss()
# optimizer = optim.Adam(model.parameters(), lr=0.002, weight_decay=1e-05)


In [None]:
# 訓練模型輸出預測 
def train_and_predict(X, y, X_test, params, red_len):
    dataset = TensorDataset(torch.tensor(X, dtype=torch.float32),
                            torch.tensor(y, dtype=torch.long))
    loader = DataLoader(dataset, batch_size=params['batch_size'], shuffle=True)

    model = WineQualityNN(input_dim=X.shape[1],
                          num_layers=params['num_layers'],
                          dropout=params['dropout'],
                          use_bn=params['use_bn'])
    optimizer = torch.optim.Adam(model.parameters(), lr=params['lr'], weight_decay=params['weight_decay'])
    criterion = nn.CrossEntropyLoss()

    for epoch in range(30):
        model.train()
        for xb, yb in loader:
            optimizer.zero_grad()
            preds = model(xb)
            loss = criterion(preds, yb)
            loss.backward()
            optimizer.step()

    # 預測
    model.eval()
    with torch.no_grad():
        preds = model(torch.tensor(X_test, dtype=torch.float32))
        predicted = preds.argmax(1).numpy()

    # 組裝 ID + 預測結果
    ids = [f"red_{i+1}" for i in range(red_len)] + [f"white_{i+1}" for i in range(len(X_test) - red_len)]
    df_submit = pd.DataFrame({'ID': ids, 'quality': predicted})
    df_submit.to_csv('winequality_predictions.csv', index=False)
    print("預測已輸出 winequality_predictions.csv")

In [746]:
# 執行
train_and_predict(X, y, X_test, params, red_len=len(test_red))

AttributeError: 'bool' object has no attribute 'sum'