In [None]:
import os  # 處理檔案路徑
import numpy as np  # 處理數值資料
import pandas as pd  # 處理表格型資料
from tqdm import tqdm  # 讓迴圈加上進度條
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split
from sklearn.utils.class_weight import compute_class_weight
from decimal import Decimal, ROUND_HALF_UP
from sklearn.metrics import accuracy_score

from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
DATA_DIR = "/content/drive/MyDrive/39_Training_Dataset/39_Training_Dataset/train_data" # 存放 .txt 的資料夾
INFO_PATH = "/content/drive/MyDrive/39_Training_Dataset/39_Training_Dataset/train_info.csv" # 標籤的 .csv 檔路徑

# 資料前處理

In [None]:
# 前處理：切割每筆資料的 27 次揮拍並提取統計特徵
from scipy.stats import kurtosis, skew
from scipy.signal import welch
from scipy.stats import entropy

def segment_swing(data, cut_points):
    return [data[cut_points[i]:cut_points[i+1]] for i in range(27)]

def extract_features(segment):
    feats = []

    acc = segment[:, 0:3]  # Ax, Ay, Az
    gyro = segment[:, 3:6] # Gx, Gy, Gz

    # 三軸平均值（共6）
    feats += list(acc.mean(axis=0))  # ax_mean, ay_mean, az_mean
    feats += list(gyro.mean(axis=0))  # gx_mean, gy_mean, gz_mean

    # 三軸變異數（共6）
    feats += list(acc.var(axis=0))  # ax_var, ...
    feats += list(gyro.var(axis=0))

    # 三軸 RMS（平方根平均）（共6）
    feats += list(np.sqrt((acc ** 2).mean(axis=0)))
    feats += list(np.sqrt((gyro ** 2).mean(axis=0)))

    # 加速度總量 & 角速度總量（不考慮方向）
    acc_total = np.linalg.norm(acc, axis=1)
    gyro_total = np.linalg.norm(gyro, axis=1)

    feats += [acc_total.max(), acc_total.min(), acc_total.mean()]  # 3
    feats += [gyro_total.max(), gyro_total.min(), gyro_total.mean()]  # 3

    # skewness + kurtosis + spectral entropy（各軸加總，共 6）
    for i in range(3):
        a = acc[:, i]
        g = gyro[:, i]

        # Skewness & Kurtosis
        feats += [skew(a), kurtosis(a)]
        feats += [skew(g), kurtosis(g)]

        # Spectral entropy (用 Welch 頻譜估計)
        for signal in [a, g]:
            f, Pxx = welch(signal, nperseg=min(len(signal), 64))
            Pxx /= Pxx.sum() + 1e-8
            feats += [entropy(Pxx)]

    return feats

def preprocess_dataset(info_csv_path, data_dir, is_train=True):
    df_info = pd.read_csv(info_csv_path)
    feature_list = []
    meta_list = []

    for idx, row in df_info.iterrows():
        uid = row["unique_id"]
        txt_path = os.path.join(data_dir, f"{uid}.txt")
        if not os.path.exists(txt_path):
            continue
        try:
            data = np.loadtxt(txt_path)
            cut_str = row["cut_point"]
            cut_points = list(map(int, cut_str.strip("[]").split()))
            if len(cut_points) != 28:
                continue  # 要切出27段，需28個點
            swings = segment_swing(data, cut_points)
            # 原有的特徵擷取
            all_feats = [extract_features(s) for s in swings]
            flatten_feats = np.concatenate(all_feats)  # shape: (1134,)

            # 🔁 新增 mode 特徵（10 維 one-hot）
            mode_idx = int(row["mode"]) - 1  # 從 1~10 → 0~9
            mode_onehot = np.zeros(10)
            if 0 <= mode_idx < 10:
                mode_onehot[mode_idx] = 1

            # ➕ 合併進整體特徵
            full_feat = np.concatenate([flatten_feats, mode_onehot])  # shape: (1144,)

            # ➕ 加入清單
            feature_list.append(full_feat)

            if is_train:
                meta_list.append({
                    "unique_id": uid,
                    "gender": 1 if row["gender"] == 2 else 0,
                    "hold racket handed": 1 if row["hold racket handed"] == 2 else 0,
                    "play years": row["play years"],
                    "level": row["level"] - 2  # 轉成 0~3
                })
            else:
                meta_list.append({"unique_id": uid})
        except:
            continue  # 忽略格式錯誤或缺失

    df_feat = pd.DataFrame(feature_list, columns=[f"f{i}" for i in range(len(feature_list[0]))])
    df_meta = pd.DataFrame(meta_list)
    df_result = pd.concat([df_meta, df_feat], axis=1)
    return df_result

# ⏬ 執行前處理
train_df = preprocess_dataset(INFO_PATH, DATA_DIR, is_train=True)
test_df = preprocess_dataset("/content/drive/MyDrive/39_Test_Dataset/39_Test_Dataset/test_info.csv","/content/drive/MyDrive/39_Test_Dataset/39_Test_Dataset/test_data", is_train=False)

train_df.to_csv("train_features.csv", index=False)
test_df.to_csv("test_features.csv", index=False)
print("✅ train_features.csv和test_features.csv 已完成前處理")


  feats += [skew(g), kurtosis(g)]


✅ train_features.csv和test_features.csv 已完成前處理


# 手部模型
# 儲存和驗證手模型

In [None]:
# 重新載入新的訓練資料集並設定特徵欄位
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split
from sklearn.utils.class_weight import compute_class_weight
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler
import joblib
# 載入新的 train_features
df = pd.read_csv("train_features.csv")

# 特徵與標籤欄位設定
X_full = df.drop(columns=["unique_id", "gender", "hold racket handed", "play years", "level"])
y_hand = df["hold racket handed"].values

# 提取 Ax mean 和 Ax var（每段揮拍）+ mode one-hot（最後10欄）
"""
ax_indices = []
for i in range(27):
    base = i * 42
    ax_indices += [base + 0, base + 6]  # Ax mean, Ax var
mode_indices = list(range(X_full.shape[1] - 10, X_full.shape[1]))
X_raw = X_full
X_selected = X_raw.iloc[:, ax_indices + mode_indices].values
"""
# 特徵選取與標準化
# 確認最後10欄是 mode one-hot，不標準化
mode_indices = list(range(X_full.shape[1] - 10, X_full.shape[1]))
X_num = X_full.iloc[:, :-10].values
X_mode = X_full.iloc[:, -10:].values

# 對數值特徵標準化
scaler = StandardScaler()
X_num_scaled = scaler.fit_transform(X_num)
# 儲存 scaler
joblib.dump(scaler, "scaler_main.pkl")

# 合併為最終特徵
X_scaled = np.concatenate([X_num_scaled, X_mode], axis=1)

# 切分訓練與驗證
X_train, X_val, y_train, y_val = train_test_split(X_scaled, y_hand, test_size=0.2, stratify=y_hand, random_state=42)

# 建立 Dataset
class HandDataset(Dataset):
    def __init__(self, X, y):
        self.X = torch.tensor(X, dtype=torch.float32)
        self.y = torch.tensor(y, dtype=torch.float32)
    def __len__(self):
        return len(self.X)
    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]

train_loader = DataLoader(HandDataset(X_train, y_train), batch_size=64, shuffle=True)
val_loader = DataLoader(HandDataset(X_val, y_val), batch_size=64)

# 定義模型
class Binary_Hand_Classifier(nn.Module):
    def __init__(self, input_dim):
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(input_dim, 128),
            nn.BatchNorm1d(128),
            nn.ReLU(),
            nn.Dropout(0.4),
            nn.Linear(128, 64),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(64, 1)
        )
    def forward(self, x):
        return self.net(x).squeeze(-1)

# 訓練模型（使用 GPU，如可用）
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = Binary_Hand_Classifier(X_scaled.shape[1]).to(device)
weights = compute_class_weight(class_weight="balanced", classes=np.unique(y_train), y=y_train)
pos_weight = torch.tensor([weights[1]], dtype=torch.float32).to(device)
criterion = nn.BCEWithLogitsLoss(pos_weight=pos_weight)
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)

# 儲存 scaler
joblib.dump(scaler, "scaler_main.pkl")

# 訓練過程
for epoch in range(20):
    model.train()
    for xb, yb in train_loader:
        xb, yb = xb.to(device), yb.to(device)
        pred = model(xb)
        loss = criterion(pred, yb)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    # 驗證 acc
    model.eval()
    all_preds, all_trues = [], []
    with torch.no_grad():
        for xb, yb in val_loader:
            xb, yb = xb.to(device), yb.to(device)
            out = torch.sigmoid(model(xb))
            preds = (out > 0.5).float()
            all_preds.extend(preds.cpu().numpy())
            all_trues.extend(yb.cpu().numpy())

    acc = accuracy_score(all_trues, all_preds)
    print(f"Epoch {epoch+1}: val_acc = {acc:.4f}")

# 儲存模型
torch.save(model.state_dict(), "hand_model.pth")


Epoch 1: val_acc = 0.9309
Epoch 2: val_acc = 0.9770
Epoch 3: val_acc = 0.9847
Epoch 4: val_acc = 0.9923
Epoch 5: val_acc = 0.9949
Epoch 6: val_acc = 0.9974
Epoch 7: val_acc = 0.9974
Epoch 8: val_acc = 1.0000
Epoch 9: val_acc = 0.9974
Epoch 10: val_acc = 1.0000
Epoch 11: val_acc = 0.9974
Epoch 12: val_acc = 1.0000
Epoch 13: val_acc = 1.0000
Epoch 14: val_acc = 1.0000
Epoch 15: val_acc = 1.0000
Epoch 16: val_acc = 1.0000
Epoch 17: val_acc = 1.0000
Epoch 18: val_acc = 1.0000
Epoch 19: val_acc = 1.0000
Epoch 20: val_acc = 1.0000


# 球齡和等級

In [None]:

# 讀取使用者剛上傳的資料
df = pd.read_csv("train_features.csv")

# 檢查 level 欄位的唯一值與每個值的出現次數
level_counts = df["level"].value_counts().sort_index()
level_counts

Unnamed: 0_level_0,count
level,Unnamed: 1_level_1
0,715
1,201
2,136
3,903


In [None]:
# 讀取資料
df = pd.read_csv("train_features.csv")

# 切分特徵與標籤
X_full = df.drop(columns=["unique_id", "gender", "hold racket handed", "play years", "level"])
y_years = df["play years"].values
y_level = df["level"].values

# 確認最後10欄是 mode one-hot，不標準化
mode_indices = list(range(X_full.shape[1] - 10, X_full.shape[1]))
X_num = X_full.iloc[:, :-10].values
X_mode = X_full.iloc[:, -10:].values

# 對數值特徵標準化
scaler = joblib.load("scaler_main.pkl")
X_num_scaled = scaler.fit_transform(X_num)

# 合併為最終特徵
X_all = np.concatenate([X_num_scaled, X_mode], axis=1)

# 最後切分訓練/驗證集
X_train_y, X_val_y, y_train_y, y_val_y = train_test_split(X_all, y_years, test_size=0.2, stratify=y_years, random_state=42)
X_train_l, X_val_l, y_train_l, y_val_l = train_test_split(X_all, y_level, test_size=0.2, stratify=y_level, random_state=42)

# Dataset 類別
class MultiClassDataset(Dataset):
    def __init__(self, X, y):
        self.X = torch.tensor(X, dtype=torch.float32)
        self.y = torch.tensor(y, dtype=torch.long)
    def __len__(self):
        return len(self.X)
    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]

train_loader_years = DataLoader(MultiClassDataset(X_train_y, y_train_y), batch_size=64, shuffle=True)
val_loader_years = DataLoader(MultiClassDataset(X_val_y, y_val_y), batch_size=64)

train_loader_level = DataLoader(MultiClassDataset(X_train_l, y_train_l), batch_size=64, shuffle=True)
val_loader_level = DataLoader(MultiClassDataset(X_val_l, y_val_l), batch_size=64)

# 模型
class MultiClassifier(nn.Module):
    def __init__(self, input_dim, num_classes):
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(input_dim, 128),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(128, 64),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(64, num_classes)
        )
    def forward(self, x):
        return self.net(x)

# 訓練函式
def train_model(X_train, X_val, y_train, y_val, train_loader, val_loader, num_classes, model_name):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model = MultiClassifier(X_train.shape[1], num_classes).to(device)
    weights = compute_class_weight(class_weight="balanced", classes=np.unique(y_train), y=y_train)
    class_weights = torch.tensor(weights, dtype=torch.float32).to(device)
    criterion = nn.CrossEntropyLoss(weight=class_weights)
    optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)

    for epoch in range(20):
        model.train()
        for xb, yb in train_loader:
            xb, yb = xb.to(device), yb.to(device)
            pred = model(xb)
            loss = criterion(pred, yb)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

        # 驗證
        model.eval()
        all_preds, all_trues = [], []
        with torch.no_grad():
            for xb, yb in val_loader:
                xb, yb = xb.to(device), yb.to(device)
                out = model(xb)
                preds = torch.argmax(out, dim=1)
                all_preds.extend(preds.cpu().numpy())
                all_trues.extend(yb.cpu().numpy())
        acc = accuracy_score(all_trues, all_preds)
        print(f"{model_name} Epoch {epoch+1}: val_acc = {acc:.4f}")

    # 儲存模型
    torch.save(model.state_dict(), f"{model_name}.pth")
    return model

# 訓練兩個模型
model_years = train_model(X_train_y, X_val_y, y_train_y, y_val_y, train_loader_years, val_loader_years, 3, "play_years_model")
model_level = train_model(X_train_l, X_val_l, y_train_l, y_val_l, train_loader_level, val_loader_level, 4, "level_model")


play_years_model Epoch 1: val_acc = 0.5754
play_years_model Epoch 2: val_acc = 0.6240
play_years_model Epoch 3: val_acc = 0.6343
play_years_model Epoch 4: val_acc = 0.6650
play_years_model Epoch 5: val_acc = 0.6777
play_years_model Epoch 6: val_acc = 0.6880
play_years_model Epoch 7: val_acc = 0.6982
play_years_model Epoch 8: val_acc = 0.7084
play_years_model Epoch 9: val_acc = 0.7289
play_years_model Epoch 10: val_acc = 0.7442
play_years_model Epoch 11: val_acc = 0.7570
play_years_model Epoch 12: val_acc = 0.7749
play_years_model Epoch 13: val_acc = 0.7954
play_years_model Epoch 14: val_acc = 0.7980
play_years_model Epoch 15: val_acc = 0.8082
play_years_model Epoch 16: val_acc = 0.8133
play_years_model Epoch 17: val_acc = 0.8184
play_years_model Epoch 18: val_acc = 0.8389
play_years_model Epoch 19: val_acc = 0.8491
play_years_model Epoch 20: val_acc = 0.8542
level_model Epoch 1: val_acc = 0.3836
level_model Epoch 2: val_acc = 0.6113
level_model Epoch 3: val_acc = 0.6010
level_model Epo

# 性別


In [None]:
# 準備資料
df = pd.read_csv("train_features.csv")
X_full = df.drop(columns=["unique_id", "gender", "hold racket handed", "play years", "level"])
y_gender = df["gender"].values

# 特徵標準化（不含最後10欄 mode）
X_num = X_full.iloc[:, :-10].values
X_mode = X_full.iloc[:, -10:].values
scaler = joblib.load("scaler_main.pkl")
X_num_scaled = scaler.fit_transform(X_num)
X_combined = np.concatenate([X_num_scaled, X_mode], axis=1)

# 資料切分
X_train, X_val, y_train, y_val = train_test_split(X_combined, y_gender, test_size=0.2, stratify=y_gender, random_state=42)

# 權重處理（處理不平衡）
weights = compute_class_weight(class_weight="balanced", classes=np.unique(y_train), y=y_train)
pos_weight = torch.tensor([weights[1]], dtype=torch.float32).to(device)

# Dataset
class GenderDataset(Dataset):
    def __init__(self, X, y):
        self.X = torch.tensor(X, dtype=torch.float32)
        self.y = torch.tensor(y, dtype=torch.float32)
    def __len__(self):
        return len(self.X)
    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]

train_loader = DataLoader(GenderDataset(X_train, y_train), batch_size=64, shuffle=True)
val_loader = DataLoader(GenderDataset(X_val, y_val), batch_size=64)

# 模型架構
class Binary_Gender_Classifier(nn.Module):
    def __init__(self, input_dim):
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(input_dim, 128),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(128, 32),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(32, 1)
        )
    def forward(self, x):
        return self.net(x).squeeze(-1)

# 訓練設定
model = Binary_Gender_Classifier(X_train.shape[1]).to("cuda" if torch.cuda.is_available() else "cpu")
criterion = nn.BCEWithLogitsLoss(pos_weight=pos_weight)
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)

# 訓練迴圈
train_accs, val_accs = [], []
for epoch in range(30):
    model.train()
    for xb, yb in train_loader:
        xb, yb = xb.to(device), yb.to(device)
        pred = model(xb)
        loss = criterion(pred, yb)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    # 驗證
    model.eval()
    preds, truths = [], []
    with torch.no_grad():
        for xb, yb in val_loader:
            xb = xb.to(device)
            out = torch.sigmoid(model(xb)).cpu()
            preds.extend((out > 0.5).int().numpy())
            truths.extend(yb.numpy())
    acc = accuracy_score(truths, preds)
    val_accs.append(acc)
    print(f"Epoch {epoch+1}: val_acc = {acc:.4f}")

# ✅ 儲存模型
torch.save(model.state_dict(), "gender_model.pth")


Epoch 1: val_acc = 0.9156
Epoch 2: val_acc = 0.9412
Epoch 3: val_acc = 0.9591
Epoch 4: val_acc = 0.9693
Epoch 5: val_acc = 0.9744
Epoch 6: val_acc = 0.9719
Epoch 7: val_acc = 0.9744
Epoch 8: val_acc = 0.9795
Epoch 9: val_acc = 0.9693
Epoch 10: val_acc = 0.9770
Epoch 11: val_acc = 0.9795
Epoch 12: val_acc = 0.9744
Epoch 13: val_acc = 0.9795
Epoch 14: val_acc = 0.9821
Epoch 15: val_acc = 0.9744
Epoch 16: val_acc = 0.9770
Epoch 17: val_acc = 0.9821
Epoch 18: val_acc = 0.9847
Epoch 19: val_acc = 0.9795
Epoch 20: val_acc = 0.9795
Epoch 21: val_acc = 0.9795
Epoch 22: val_acc = 0.9693
Epoch 23: val_acc = 0.9795
Epoch 24: val_acc = 0.9770
Epoch 25: val_acc = 0.9795
Epoch 26: val_acc = 0.9770
Epoch 27: val_acc = 0.9719
Epoch 28: val_acc = 0.9693
Epoch 29: val_acc = 0.9770
Epoch 30: val_acc = 0.9821


# 生成submission

In [None]:
# ====== 重新載入檔案 ======
test_path = "test_features.csv"
df_test = pd.read_csv(test_path)
X_test_raw = df_test.drop(columns=["unique_id"])
unique_ids = df_test["unique_id"].tolist()

# 模型欄位處理
num_features = X_test_raw.shape[1] - 10
X_main = X_test_raw.iloc[:, :num_features].values
X_mode = X_test_raw.iloc[:, num_features:].values

# 標準化主特徵欄位
scaler = joblib.load("scaler_main.pkl")
X_main_scaled = scaler.fit_transform(X_main)
X_test_scaled = np.hstack([X_main_scaled, X_mode])
X_test_tensor = torch.tensor(X_test_scaled, dtype=torch.float32)

# ====== 載入模型並推論 ======
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

model_hand = Binary_Hand_Classifier(X_test_scaled.shape[1]).to(device)
model_gender = Binary_Gender_Classifier(X_test_scaled.shape[1]).to(device)
model_years = MultiClassifier(X_test_scaled.shape[1], 3).to(device)
model_level = MultiClassifier(X_test_scaled.shape[1], 4).to(device)

model_hand.load_state_dict(torch.load("hand_model.pth", map_location=device))
model_gender.load_state_dict(torch.load("gender_model.pth", map_location=device))
model_years.load_state_dict(torch.load("play_years_model.pth", map_location=device))
model_level.load_state_dict(torch.load("level_model.pth", map_location=device))

model_hand.eval()
model_gender.eval()
model_years.eval()
model_level.eval()

with torch.no_grad():
    pred_hand = torch.sigmoid(model_hand(X_test_tensor.to(device))).cpu().numpy()
    pred_gender = torch.sigmoid(model_gender(X_test_tensor.to(device))).cpu().numpy()
    pred_years = torch.softmax(model_years(X_test_tensor.to(device)), dim=1).cpu().numpy()
    pred_level = torch.softmax(model_level(X_test_tensor.to(device)), dim=1).cpu().numpy()


# 對 pred_years 和 pred_level 進行 row-wise 正規化（softmax 或簡單比例）
pred_years = pred_years / pred_years.sum(axis=1, keepdims=True)
pred_level = pred_level / pred_level.sum(axis=1, keepdims=True)

submission = pd.DataFrame({
    "unique_id": unique_ids,
    "gender": pred_gender,  # 保留原始機率
    "hold racket handed": pred_hand,
    "play years_0": pred_years[:, 0],
    "play years_1": pred_years[:, 1],
    "play years_2": pred_years[:, 2],
    "level_2": pred_level[:, 0],
    "level_3": pred_level[:, 1],
    "level_4": pred_level[:, 2],
    "level_5": pred_level[:, 3],
})
from decimal import Decimal, ROUND_HALF_UP
import numpy as np

# 先補 NaN，再格式化
submission.fillna(submission.mean(numeric_only=True), inplace=True)

def format_float(val):
    if pd.isna(val):
        return val  # 保留 NaN（但其實這邊已補完）
    return "{:.4f}".format(Decimal(val).quantize(Decimal('0.0001'), rounding=ROUND_HALF_UP))

# 格式化小數（不含 unique_id）
for col in submission.columns[1:]:
    submission[col] = submission[col].apply(format_float)

# 儲存 CSV，避免寫出科學記號
submission.to_csv("submission.csv", index=False)