.ipynb的介面最上面有一排東西"+Code", "+Markdown", ...最後有個"Outline"打開就可以看到目錄！

## 預處理

In [None]:
import os  # 處理檔案路徑
import numpy as np  # 處理數值資料
import pandas as pd  # 處理表格型資料
from tqdm import tqdm  # 讓迴圈加上進度條
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split

In [36]:
SEQ_LEN = 2048  # 我們希望每筆資料長度都一樣（2048個時間點）(可以改)

DATA_DIR = "39_Training_Dataset/39_Training_Dataset/train_data" # 存放 .txt 的資料夾
INFO_PATH = "39_Training_Dataset/39_Training_Dataset/train_info.csv" # 標籤的 .csv 檔路徑

In [38]:
def load_and_process_txt(file_path, seq_len=SEQ_LEN):
    data = np.loadtxt(file_path)  # 把.txt轉為numpy陣列 (T, 6)
    
    # 標準化處理：讓每個維度的平均變0，標準差變1
    data = (data - data.mean(axis=0)) / (data.std(axis=0) + 1e-8)
    
    # 裁切或補零：統一長度為 SEQ_LEN
    if data.shape[0] >= seq_len:
        data = data[:seq_len]  # 裁切
    else:
        pad_width = seq_len - data.shape[0]
        data = np.pad(data, ((0, pad_width), (0, 0)), mode='constant')  # np.pad() 是 NumPy 的補零函式
        # (0, pad_width) 表示對列數（時間點）補零在「後面」
        # (0, 0) 表示欄位數（6 個感測器維度）不補零
        # mode='constant' 用 0 去補
    
    return data  # shape = (2048, 6)

In [39]:
# 讀取標籤 CSV
info_df = pd.read_csv(INFO_PATH)

# 預處理 label（從 0 開始）
info_df['gender'] -= 1  # gender: 1,2 → 0,1
info_df['hand'] = info_df['hold racket handed'] - 1  # 1,2 → 0,1
info_df['level'] -= 2  # level: 2~5 → 0~3

# 建立空 list 來存處理後的資料
X_data = []  # 每筆 (2048, 6) 的時間序列
y_gender = []   # 對應性別
y_hand = []  # 對應慣用手
y_years = []  # 對應球齡（0,1,2）
y_level = []  # 對應等級（0~3）

# 開始跑全部的 txt 檔
for _, row in tqdm(info_df.iterrows(), total=len(info_df)):
    uid = row['unique_id']
    file_path = os.path.join(DATA_DIR, f"{uid}.txt")

    if not os.path.exists(file_path):
        print(f"找不到檔案: {file_path}")
        continue  # 若檔案不存在就跳過

    # 處理 .txt 檔 → 轉成 (2048, 6) 的陣列
    x = load_and_process_txt(file_path)  # 👈 讀 + 標準化 + 補長度
    X_data.append(x)  # 放入訓練資料集

    # 加入對應的標籤
    y_gender.append(row['gender'])
    y_hand.append(row['hand'])
    y_years.append(row['play years'])  # 不用處理，已經是 0,1,2
    y_level.append(row['level'])       # 已經轉為 0~3

# 轉成 numpy 陣列
X_data = np.array(X_data)
y_gender = np.array(y_gender)
y_hand = np.array(y_hand)
y_years = np.array(y_years)
y_level = np.array(y_level)


  0%|          | 0/1955 [00:00<?, ?it/s]

100%|██████████| 1955/1955 [00:11<00:00, 173.71it/s]


In [None]:
# 存下來，之後大家可以直接用
# 如果你資料預處理有更動的話，下面的檔名也記得改！！！
# 更改方案，這些檔案就自己可以存下來，不要上傳到github上
np.save("X_data.npy", X_data)
np.save("y_gender.npy", y_gender)
np.save("y_hand.npy", y_hand)
np.save("y_years.npy", y_years)
np.save("y_level.npy", y_level)

## Test
看一下資料分布而已，也可以跳過

In [None]:
import numpy as np  # 處理數值資料
X_data = np.load("X_data.npy")
y_gender = np.load("y_gender.npy")
y_hand = np.load("y_hand.npy")
y_years = np.load("y_years.npy")
y_level = np.load("y_level.npy")

In [40]:
print("男生（0）有幾筆：", np.sum(y_gender == 0))
print("女生（1）有幾筆：", np.sum(y_gender == 1))
# 幾乎 5 倍差距 嚴重失衡
# 建議使用 oversampling / SMOTE 等技巧

# W1 使用 class_weight（推薦！）
# W2 上採樣少數類別（如女生）
# W3 設計多目標損失時，對 gender 特別調整權重

男生（0）有幾筆： 1627
女生（1）有幾筆： 328


In [50]:
# 檢查持拍手分布
print("右手（0）有幾筆：", np.sum(y_hand == 0))
print("左手（1）有幾筆：", np.sum(y_hand == 1))
# 4.34 倍差距→ 明顯不平衡

# 檢查球齡分布
print("低球齡有幾筆：", np.sum(y_years == 0))
print("中球齡有幾筆：", np.sum(y_years == 1))
print("高球齡有幾筆：", np.sum(y_years == 2))
# 差 ≈ 2.2 倍 → 輕度不平衡

# 檢查等級分布
print("等級0：", np.sum(y_level == 0))
print("等級1：", np.sum(y_level == 1))
print("等級2：", np.sum(y_level == 2))
print("等級3：", np.sum(y_level == 3))
# 6.6 倍差距 → 明顯不平衡

右手（0）有幾筆： 1589
左手（1）有幾筆： 366
低球齡有幾筆： 387
中球齡有幾筆： 868
高球齡有幾筆： 700
等級0： 715
等級1： 201
等級2： 136
等級3： 903


## 0. 載資料

In [None]:
import numpy as np  # 處理數值資料
import pandas as pd
import os
from sklearn.model_selection import train_test_split

In [None]:
X_data = np.load("X_data.npy")
y_gender = np.load("y_gender.npy")
y_hand = np.load("y_hand.npy")
y_years = np.load("y_years.npy")
y_level = np.load("y_level.npy")

In [None]:
# 切分訓練與驗證資料（80% 訓練, 20% 驗證）
X_train, X_val, y_gender_train, y_gender_val, y_hand_train, y_hand_val, y_years_train, y_years_val, y_level_train, y_level_val = train_test_split(
    X_data, y_gender, y_hand, y_years, y_level, test_size=0.2, random_state=42
)

## Optional. 權重處理

In [None]:
# 處理球齡資料不平衡
from sklearn.utils.class_weight import compute_class_weight

weights_years = compute_class_weight(class_weight='balanced', classes=np.unique(y_years), y=y_years)
weights_gender = compute_class_weight(class_weight='balanced', classes=np.unique(y_gender), y=y_gender)
weights_hands = compute_class_weight(class_weight='balanced', classes=np.unique(y_hand), y=y_hand)
weights_level = compute_class_weight(class_weight='balanced', classes=np.unique(y_level), y=y_level)
print("class weights_years:", dict(enumerate(weights_years)))
print("class weights_gender:", dict(enumerate(weights_gender)))
print("class weights_hands:", dict(enumerate(weights_hands)))
print("class weights_level:", dict(enumerate(weights_level)))

import torch
import torch.nn as nn

pos_weight_gender = torch.tensor([weights_gender[1]], dtype=torch.float32)
pos_weight_hand   = torch.tensor([weights_hands[1]], dtype=torch.float32)
ce_weight_years   = torch.tensor(weights_years, dtype=torch.float32)
ce_weight_level   = torch.tensor(weights_level, dtype=torch.float32)

class weights_years: {0: np.float64(1.6838931955211025), 1: np.float64(0.750768049155146), 2: np.float64(0.930952380952381)}
class weights_gender: {0: np.float64(0.60079901659496), 1: np.float64(2.980182926829268)}
class weights_hands: {0: np.float64(0.6151667715544368), 1: np.float64(2.670765027322404)}
class weights_level: {0: np.float64(0.6835664335664335), 1: np.float64(2.431592039800995), 2: np.float64(3.59375), 3: np.float64(0.54125138427464)}


## 1. 定義資料集

In [2]:
# Dataset
# 自定義一個能被 DataLoader 使用的資料類別
# 當 DataLoader 做 batch 時會呼叫這個函式
from torch.utils.data import Dataset, DataLoader

class TableTennisDataset(Dataset):
    def __init__(self, X, y_gender, y_hand, y_years, y_level):
        self.X = torch.tensor(X, dtype=torch.float32)
        self.y_gender = torch.tensor(y_gender, dtype=torch.float32)
        self.y_hand = torch.tensor(y_hand, dtype=torch.float32)
        self.y_years = torch.tensor(y_years, dtype=torch.long)
        self.y_level = torch.tensor(y_level, dtype=torch.long)

    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        return self.X[idx], {
            'gender': self.y_gender[idx],
            'hand': self.y_hand[idx],
            'years': self.y_years[idx],
            'level': self.y_level[idx]
        }

## 2. 定義模型

In [3]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from tqdm import tqdm

# 定義損失函數
# 要不要加權
use_class_weight = False
# use_class_weight = True

if use_class_weight:
    # binary 分類：性別 & 手 → BCEWithLogitsLoss + pos_weight
    loss_gender = nn.BCEWithLogitsLoss(pos_weight=pos_weight_gender)
    loss_hand   = nn.BCEWithLogitsLoss(pos_weight=pos_weight_hand)

    # 多類別分類：球齡 & 等級 → CrossEntropyLoss + weight
    loss_years = nn.CrossEntropyLoss(weight=ce_weight_years)
    loss_level = nn.CrossEntropyLoss(weight=ce_weight_level)
else:
    loss_gender = nn.BCEWithLogitsLoss()
    loss_hand   = nn.BCEWithLogitsLoss()
    loss_years  = nn.CrossEntropyLoss()
    loss_level  = nn.CrossEntropyLoss()

In [4]:
# 模型
class MultiTaskCNN(nn.Module):
    def __init__(self):
        super(MultiTaskCNN, self).__init__()

        # 1D CNN feature extractor
        self.feature_extractor = nn.Sequential(
            nn.Conv1d(in_channels=6, out_channels=64, kernel_size=5, padding=2),  # (B, 64, 2048)
            nn.ReLU(),
            nn.MaxPool1d(kernel_size=2),  # (B, 64, 1024)

            nn.Conv1d(64, 128, kernel_size=5, padding=2),  # (B, 128, 1024)
            nn.ReLU(),
            nn.MaxPool1d(2),  # (B, 128, 512)

            nn.Conv1d(128, 256, kernel_size=3, padding=1),  # (B, 256, 512)
            nn.ReLU(),
            nn.AdaptiveAvgPool1d(1)  # (B, 256, 1)
        )

        # flatten
        self.flatten = nn.Flatten()

        # 預測 head（共用 CNN 特徵）
        self.gender_head = nn.Linear(256, 1)   # binary
        self.hand_head   = nn.Linear(256, 1)   # binary
        self.years_head  = nn.Linear(256, 3)   # 3 類
        self.level_head  = nn.Linear(256, 4)   # 4 類

    def forward(self, x):  # x: (B, 2048, 6)
        x = x.permute(0, 2, 1)  # 轉成 (B, 6, 2048) → CNN 要求的格式
        feat = self.feature_extractor(x)  # (B, 256, 1)
        feat = self.flatten(feat)  # (B, 256)

        return {
            'gender': self.gender_head(feat).squeeze(-1),  # (B,)
            'hand': self.hand_head(feat).squeeze(-1),
            'years': self.years_head(feat),  # (B, 3)
            'level': self.level_head(feat)   # (B, 4)
        }

## 3. 定義模型訓練

In [5]:
# train
def train_one_epoch(model, dataloader, optimizer, device):
    model.train()
    total_loss = 0
    correct_gender, correct_hand, correct_years, correct_level = 0, 0, 0, 0
    total_samples = 0

    for x, y in tqdm(dataloader):
        x = x.to(device)
        y_gender = y['gender'].to(device)
        y_hand = y['hand'].to(device)
        y_years = y['years'].to(device)
        y_level = y['level'].to(device)

        optimizer.zero_grad()
        out = model(x)

        # 計算 loss
        loss_g = loss_gender(out['gender'], y_gender)
        loss_h = loss_hand(out['hand'], y_hand)
        loss_y = loss_years(out['years'], y_years)
        loss_l = loss_level(out['level'], y_level)
        loss = loss_g + loss_h + loss_y + loss_l
        loss.backward()
        optimizer.step()
        total_loss += loss.item()

        # ➕ 準確率計算
        total_samples += x.size(0)

        # gender 預測（>0 → 1, 否則 0）
        pred_gender = (torch.sigmoid(out['gender']) > 0.5).float()
        correct_gender += (pred_gender == y_gender).sum().item()

        # hand 預測
        pred_hand = (torch.sigmoid(out['hand']) > 0.5).float()
        correct_hand += (pred_hand == y_hand).sum().item()

        # years 預測
        pred_years = torch.argmax(out['years'], dim=1)
        correct_years += (pred_years == y_years).sum().item()

        # level 預測
        pred_level = torch.argmax(out['level'], dim=1)
        correct_level += (pred_level == y_level).sum().item()

    acc_gender = correct_gender / total_samples
    acc_hand = correct_hand / total_samples
    acc_years = correct_years / total_samples
    acc_level = correct_level / total_samples
    avg_loss = total_loss / len(dataloader)

    print(f"Loss: {avg_loss:.4f} | acc_gender: {acc_gender:.3f} | acc_hand: {acc_hand:.3f} | acc_years: {acc_years:.3f} | acc_level: {acc_level:.3f}")
    return avg_loss


## 4. 定義驗證函數

In [6]:
def evaluate_model(model, dataloader, device):
    model.eval()
    total_loss = 0
    correct_gender, correct_hand, correct_years, correct_level = 0, 0, 0, 0
    total_samples = 0

    with torch.no_grad():
        for x, y in dataloader:
            x = x.to(device)
            y_gender = y['gender'].to(device)
            y_hand = y['hand'].to(device)
            y_years = y['years'].to(device)
            y_level = y['level'].to(device)

            out = model(x)

            loss_g = loss_gender(out['gender'], y_gender)
            loss_h = loss_hand(out['hand'], y_hand)
            loss_y = loss_years(out['years'], y_years)
            loss_l = loss_level(out['level'], y_level)
            loss = loss_g + loss_h + loss_y + loss_l
            total_loss += loss.item()

            total_samples += x.size(0)

            pred_gender = (torch.sigmoid(out['gender']) > 0.5).float()
            correct_gender += (pred_gender == y_gender).sum().item()

            pred_hand = (torch.sigmoid(out['hand']) > 0.5).float()
            correct_hand += (pred_hand == y_hand).sum().item()

            pred_years = torch.argmax(out['years'], dim=1)
            correct_years += (pred_years == y_years).sum().item()

            pred_level = torch.argmax(out['level'], dim=1)
            correct_level += (pred_level == y_level).sum().item()

    acc_gender = correct_gender / total_samples
    acc_hand = correct_hand / total_samples
    acc_years = correct_years / total_samples
    acc_level = correct_level / total_samples
    avg_loss = total_loss / len(dataloader)

    print(f"\n[Validation] Loss: {avg_loss:.4f} | acc_gender: {acc_gender:.3f} | acc_hand: {acc_hand:.3f} | acc_years: {acc_years:.3f} | acc_level: {acc_level:.3f}")
    return avg_loss

## 主程式

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu") # 讓你能自動使用 GPU（有就用，沒有就 CPU）
model = MultiTaskCNN().to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3) # 學習率learning_rate為 1e-3（預設很好用）

# 建立 Dataset 物件
dataset_train = TableTennisDataset(X_train, y_gender_train, y_hand_train, y_years_train, y_level_train)
dataset_val = TableTennisDataset(X_val, y_gender_val, y_hand_val, y_years_val, y_level_val)


# 建立 DataLoader（訓練與驗證）
dataloader_train = DataLoader(dataset_train, batch_size=32, shuffle=True)
dataloader_val = DataLoader(dataset_val, batch_size=32, shuffle=False)

# 修改訓練流程：每一輪後加上驗證
for epoch in range(10):
    print(f"\n🟢 Epoch {epoch+1}")
    train_loss = train_one_epoch(model, dataloader_train, optimizer, device)
    val_loss = evaluate_model(model, dataloader_val, device)




🟢 Epoch 1


100%|██████████| 49/49 [00:19<00:00,  2.50it/s]


Loss: 2.9471 | acc_gender: 0.810 | acc_hand: 0.813 | acc_years: 0.554 | acc_level: 0.559

[Validation] Loss: 2.5980 | acc_gender: 0.859 | acc_hand: 0.813 | acc_years: 0.563 | acc_level: 0.614

🟢 Epoch 2


100%|██████████| 49/49 [00:20<00:00,  2.38it/s]


Loss: 2.2027 | acc_gender: 0.826 | acc_hand: 0.964 | acc_years: 0.636 | acc_level: 0.662

[Validation] Loss: 2.2410 | acc_gender: 0.872 | acc_hand: 0.985 | acc_years: 0.573 | acc_level: 0.637

🟢 Epoch 3


100%|██████████| 49/49 [00:21<00:00,  2.29it/s]


Loss: 1.9364 | acc_gender: 0.871 | acc_hand: 0.985 | acc_years: 0.646 | acc_level: 0.689

[Validation] Loss: 1.9613 | acc_gender: 0.905 | acc_hand: 0.990 | acc_years: 0.621 | acc_level: 0.673

🟢 Epoch 4


100%|██████████| 49/49 [00:19<00:00,  2.52it/s]


Loss: 1.7146 | acc_gender: 0.896 | acc_hand: 0.997 | acc_years: 0.707 | acc_level: 0.715

[Validation] Loss: 1.7173 | acc_gender: 0.949 | acc_hand: 0.992 | acc_years: 0.657 | acc_level: 0.708

🟢 Epoch 5


100%|██████████| 49/49 [00:17<00:00,  2.81it/s]


Loss: 1.4520 | acc_gender: 0.926 | acc_hand: 0.999 | acc_years: 0.728 | acc_level: 0.776

[Validation] Loss: 1.5160 | acc_gender: 0.957 | acc_hand: 0.995 | acc_years: 0.721 | acc_level: 0.783

🟢 Epoch 6


100%|██████████| 49/49 [00:16<00:00,  2.89it/s]


Loss: 1.2003 | acc_gender: 0.940 | acc_hand: 1.000 | acc_years: 0.793 | acc_level: 0.827

[Validation] Loss: 1.2413 | acc_gender: 0.951 | acc_hand: 0.995 | acc_years: 0.829 | acc_level: 0.808

🟢 Epoch 7


100%|██████████| 49/49 [00:17<00:00,  2.80it/s]


Loss: 1.0356 | acc_gender: 0.944 | acc_hand: 1.000 | acc_years: 0.813 | acc_level: 0.857

[Validation] Loss: 1.0716 | acc_gender: 0.962 | acc_hand: 0.992 | acc_years: 0.844 | acc_level: 0.849

🟢 Epoch 8


100%|██████████| 49/49 [00:18<00:00,  2.58it/s]


Loss: 0.8832 | acc_gender: 0.949 | acc_hand: 1.000 | acc_years: 0.862 | acc_level: 0.886

[Validation] Loss: 0.9649 | acc_gender: 0.962 | acc_hand: 0.992 | acc_years: 0.834 | acc_level: 0.885

🟢 Epoch 9


100%|██████████| 49/49 [00:20<00:00,  2.38it/s]


Loss: 0.7980 | acc_gender: 0.954 | acc_hand: 1.000 | acc_years: 0.859 | acc_level: 0.898

[Validation] Loss: 0.8241 | acc_gender: 0.967 | acc_hand: 0.992 | acc_years: 0.885 | acc_level: 0.867

🟢 Epoch 10


100%|██████████| 49/49 [00:19<00:00,  2.49it/s]


Loss: 0.7448 | acc_gender: 0.958 | acc_hand: 1.000 | acc_years: 0.875 | acc_level: 0.900

[Validation] Loss: 0.7340 | acc_gender: 0.969 | acc_hand: 0.992 | acc_years: 0.854 | acc_level: 0.910


## 儲存模型

In [8]:
torch.save(model.state_dict(), 'modelNoWeight042400.pth')

## 測資

In [20]:
SEQ_LEN = 2048
# 1. 載入測試資料
def load_test_data(test_info_path, test_data_dir, seq_len=SEQ_LEN):
    test_info = pd.read_csv(test_info_path)
    X_test = []
    uids = []

    for _, row in tqdm(test_info.iterrows(), total=len(test_info)):
        uid = row['unique_id']
        file_path = os.path.join(test_data_dir, f"{uid}.txt")
        if not os.path.exists(file_path):
            continue
        x = load_and_process_txt(file_path, seq_len)
        X_test.append(x)
        uids.append(uid)

    X_test = np.array(X_test)
    return uids, torch.tensor(X_test, dtype=torch.float32)

In [21]:
# 2. 預測函式
def predict(model, X_test_tensor, device):
    model.eval()
    preds = []

    with torch.no_grad():
        for x in tqdm(X_test_tensor):
            x = x.unsqueeze(0).to(device)
            out = model(x)

            gender = torch.sigmoid(out['gender']).item()
            hand = torch.sigmoid(out['hand']).item()
            years = torch.softmax(out['years'], dim=1).squeeze().cpu().numpy()
            level = torch.softmax(out['level'], dim=1).squeeze().cpu().numpy()

            preds.append([gender, hand] + years.tolist() + level.tolist())

    return preds

In [22]:
def load_and_process_txt(file_path, seq_len=SEQ_LEN):
    data = np.loadtxt(file_path)  # 把.txt轉為numpy陣列 (T, 6)
    
    # 標準化處理：讓每個維度的平均變0，標準差變1
    data = (data - data.mean(axis=0)) / (data.std(axis=0) + 1e-8)
    
    # 裁切或補零：統一長度為 SEQ_LEN
    if data.shape[0] >= seq_len:
        data = data[:seq_len]  # 裁切
    else:
        pad_width = seq_len - data.shape[0]
        data = np.pad(data, ((0, pad_width), (0, 0)), mode='constant')  # np.pad() 是 NumPy 的補零函式
        # (0, pad_width) 表示對列數（時間點）補零在「後面」
        # (0, 0) 表示欄位數（6 個感測器維度）不補零
        # mode='constant' 用 0 去補
    
    return data  # shape = (2048, 6)

In [23]:
from decimal import Decimal, ROUND_HALF_UP
def create_submission(uids, preds, save_path='submission.csv'):
    def format_float(val):
        return str(Decimal(val).quantize(Decimal('0.000001'), rounding=ROUND_HALF_UP))

    df = pd.DataFrame(preds, columns=[
        'gender', 'hold racket handed',
        'play years_0', 'play years_1', 'play years_2',
        'level_2', 'level_3', 'level_4', 'level_5'
    ])
    df.insert(0, 'unique_id', uids)
    for col in df.columns[1:]:
        df[col] = df[col].apply(format_float)
    with open(save_path, "w", encoding="utf-8", newline='\n') as f:
        df.to_csv(f, index=False)
    print(f"✔️ Submission saved to {save_path}")

In [26]:
# 修改成你自己的 test 檔案路徑
TEST_INFO = "39_Test_Dataset/39_Test_Dataset/test_info.csv"
TEST_DATA = "39_Test_Dataset/39_Test_Dataset/test_data"

# 1. 載入 test data
uids, X_test_tensor = load_test_data(TEST_INFO, TEST_DATA)


model = MultiTaskCNN().to(device)
model.load_state_dict(torch.load("modelNoWeight042400.pth"))
model.eval()

# 2. 預測
preds = predict(model, X_test_tensor, device)

submissionCSV = "submission042402.csv" # 記得改
# 3. 產出 CSV
create_submission(uids, preds, save_path=submissionCSV)


  0%|          | 0/1430 [00:00<?, ?it/s]

100%|██████████| 1430/1430 [00:10<00:00, 141.25it/s]
100%|██████████| 1430/1430 [00:08<00:00, 170.49it/s]

✔️ Submission saved to submission042402.csv



