In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import RobustScaler
from torch.utils.data import DataLoader, TensorDataset

In [2]:
# 設定 GPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device

device(type='cpu')

In [3]:
#### TODO
data = pd.read_csv('./winequality-red_train.csv', delimiter=',')
print(data.head())
#### TODO

selected_features = True #要不要只選特定特徵
if selected_features:
  #### TODO
  selected_features = ['fixed acidity', 'volatile acidity', 'citric acid', 'residual sugar', 
                         'chlorides', 'free sulfur dioxide', 'total sulfur dioxide', 'density', 
                         'pH', 'sulphates', 'alcohol']
  #### TODO
if selected_features:
  data = data[selected_features + ['quality']]

# 分割特徵與標籤
X = data.iloc[:, :-1].values  # 特徵
y = data.iloc[:, -1].values   # 標籤 (Wine Quality)

# 標準化數據
scaler = StandardScaler() #對異常值較不敏感
X = scaler.fit_transform(X)


   fixed acidity  volatile acidity  citric acid  residual sugar  chlorides  \
0            6.9             0.670         0.06             2.1      0.080   
1            7.5             0.530         0.06             2.6      0.086   
2           11.1             0.180         0.48             1.5      0.068   
3            8.3             0.705         0.12             2.6      0.092   
4            7.4             0.670         0.12             1.6      0.186   

   free sulfur dioxide  total sulfur dioxide  density    pH  sulphates  \
0                    8                    33  0.99845  3.68       0.71   
1                   20                    44  0.99650  3.38       0.59   
2                    7                    15  0.99730  3.22       0.64   
3                   12                    28  0.99940  3.51       0.72   
4                    5                    21  0.99600  3.39       0.54   

   alcohol  quality  
0      9.6        5  
1     10.7        6  
2     10.1        6 

In [4]:
# 切分訓練集與測試集
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# 轉換為 PyTorch Tensor
X_train_tensor = torch.tensor(X_train, dtype=torch.float32).to(device)
y_train_tensor = torch.tensor(y_train, dtype=torch.long).to(device)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32).to(device)
y_test_tensor = torch.tensor(y_test, dtype=torch.long).to(device)

# 創建 DataLoader
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
test_dataset = TensorDataset(X_test_tensor, y_test_tensor)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

In [5]:
# 定義神經網絡模型
class WineQualityNN(nn.Module):
    def __init__(self, input_dim):
        super(WineQualityNN, self).__init__()
        #### TODO
        self.fc1 = nn.Linear(input_dim, 128)
        self.fc2 = nn.Linear(128, 128)
        self.fc3 = nn.Linear(128, 64)
        self.fc4 = nn.Linear(64, 11)  # Wine quality 範圍為 0-10
        #### TODO

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        x = torch.relu(self.fc3(x))
        x = self.fc4(x)
        return x

In [6]:
# 初始化模型
input_dim = X.shape[1] # 計算輸入的特徵數
model = WineQualityNN(input_dim).to(device)
criterion = nn.CrossEntropyLoss() # 使用交叉熵損失函數（適用於分類問題)
optimizer = optim.Adam(model.parameters(), lr=0.01)

In [7]:
# 訓練模型
def train_model(model, train_loader, criterion, optimizer, epochs=20): #為什麼epochs設成20
    for epoch in range(epochs):
        total_loss = 0
        for X_batch, y_batch in train_loader:
            optimizer.zero_grad()
            outputs = model(X_batch)
            loss = criterion(outputs, y_batch)
            loss.backward()
            optimizer.step() #更新權重
            total_loss += loss.item()
        print(f"Epoch {epoch+1}/{epochs}, Loss: {total_loss/len(train_loader):.4f}")


In [8]:
# 測試模型
def test_model(model, test_loader):
    correct = 0
    total = 0
    with torch.no_grad():
        for X_batch, y_batch in test_loader:
            outputs = model(X_batch)
            _, predicted = torch.max(outputs, 1)
            total += y_batch.size(0)
            correct += (predicted == y_batch).sum().item()
    print(f"Test Accuracy: {100 * correct / total:.2f}%")

In [None]:
# 執行訓練與測試
train_model(model, train_loader, criterion, optimizer, epochs=100)
test_model(model, test_loader)

Epoch 1/100, Loss: 1.1924
Epoch 2/100, Loss: 0.9919
Epoch 3/100, Loss: 0.9441
Epoch 4/100, Loss: 0.9054
Epoch 5/100, Loss: 0.8932
Epoch 6/100, Loss: 0.8951
Epoch 7/100, Loss: 0.8600
Epoch 8/100, Loss: 0.8331
Epoch 9/100, Loss: 0.8280
Epoch 10/100, Loss: 0.7957
Epoch 11/100, Loss: 0.8002
Epoch 12/100, Loss: 0.8041
Epoch 13/100, Loss: 0.7321
Epoch 14/100, Loss: 0.7277
Epoch 15/100, Loss: 0.7103
Epoch 16/100, Loss: 0.6717
Epoch 17/100, Loss: 0.6837
Epoch 18/100, Loss: 0.6763
Epoch 19/100, Loss: 0.6664
Epoch 20/100, Loss: 0.6622
Epoch 21/100, Loss: 0.6154
Epoch 22/100, Loss: 0.5647
Epoch 23/100, Loss: 0.5761
Epoch 24/100, Loss: 0.5644
Epoch 25/100, Loss: 0.5693
Epoch 26/100, Loss: 0.4916
Epoch 27/100, Loss: 0.4172
Epoch 28/100, Loss: 0.4254
Epoch 29/100, Loss: 0.3881
Epoch 30/100, Loss: 0.3933
Epoch 31/100, Loss: 0.3784
Epoch 32/100, Loss: 0.3469
Epoch 33/100, Loss: 0.3532
Epoch 34/100, Loss: 0.3057
Epoch 35/100, Loss: 0.3458
Epoch 36/100, Loss: 0.3118
Epoch 37/100, Loss: 0.2770
Epoch 38/1

In [10]:
# 預測新數據並保存到同一個 CSV
def predict_and_save_combined(model, selected_features, files, output_csv):
    results = []
    for file_path, wine_type in files:
        data = pd.read_csv(file_path, delimiter=',')
        if selected_features:
            data = data[selected_features]
        X_new = scaler.transform(data.values)
        X_new_tensor = torch.tensor(X_new, dtype=torch.float32).to(device)

        with torch.no_grad():
            outputs = model(X_new_tensor)
            _, predicted = torch.max(outputs, 1)

        results.extend([
            {'ID': f"{wine_type}_{i+1}", 'quality': int(pred.cpu().numpy())}
            for i, pred in enumerate(predicted)
        ])

    results_df = pd.DataFrame(results)
    results_df.to_csv(output_csv, index=False)
    print(f"Predictions saved to {output_csv}")

In [11]:
# 預測紅酒與白酒品質，合併輸出至單一 CSV
predict_and_save_combined(model,
  selected_features,
 [("winequality-red_goal.csv", "red"), ("winequality-white_goal.csv", "white")],
                          "./baseline_winequality_predictions.csv")

Predictions saved to ./baseline_winequality_predictions.csv
