# Try another dataset

In [1]:
from autodp import rdp_acct
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import itertools


import os
path_project = os.path.dirname(os.path.abspath('.'))
import sys
sys.path.append(os.path.join(path_project, 'src'))
sys.path.append(os.path.join(path_project, 'exp/script'))

import options

img_path = os.path.join(path_project, 'exp', 'img')
pickle_path = os.path.join(path_project, 'exp', 'pickle')
results_path = os.path.join(path_project, 'exp', 'results')
default_args = options.build_default_args(path_project)

import copy
from run_simulation import run_simulation

from mylogger import logger_set_debug, logger_set_info, logger_set_warning
from opacus.accountants import RDPAccountant

import pickle

dataset_path = os.path.join(path_project, 'dataset')

## Body performance
https://www.kaggle.com/datasets/kukuroo3/body-performance-data

In [50]:
from sklearn.model_selection import train_test_split
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from sklearn.model_selection import KFold
from sklearn.metrics import accuracy_score
import pandas as pd

In [51]:
df = pd.read_csv(os.path.join(dataset_path, "body", "bodyPerformance.csv"))

In [52]:
df.drop_duplicates(inplace=True)
df = df.replace({'M':0, 'F':1})
df = df.replace({'A':1,'B':2,'C':3,'D':4})
X = df.iloc[:, :-1].values    # Independent variable
y = df.iloc[:, -1].values     # Dependent variable

# クラス値を整数にエンコード
encoder = LabelEncoder()
encoder.fit(y)
encoded_Y = encoder.transform(y)


# PyTorchテンソルへの変換
X_tensor = torch.tensor(X, dtype=torch.float32)
y_tensor = torch.tensor(encoded_Y, dtype=torch.long)

In [41]:
import torch
import torch.nn as nn

class ANNModel(nn.Module):
    def __init__(self, input_dim, output_dim):
        super(ANNModel, self).__init__()
        self.fc1 = nn.Linear(input_dim, 20)
        self.fc2 = nn.Linear(20, 35)
        self.fc3 = nn.Linear(35, 50)
        self.fc4 = nn.Linear(50, 65)
        self.dropout1 = nn.Dropout(0.2)
        self.fc5 = nn.Linear(65, 80)
        self.dropout2 = nn.Dropout(0.2)
        self.fc6 = nn.Linear(80, 55)
        self.fc7 = nn.Linear(55, 35)
        self.fc8 = nn.Linear(35, output_dim)
        
    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        x = torch.relu(self.fc3(x))
        x = self.dropout1(torch.relu(self.fc4(x)))
        x = self.dropout2(torch.relu(self.fc5(x)))
        x = torch.relu(self.fc6(x))
        x = torch.relu(self.fc7(x))
        x = self.fc8(x)  # Softmaxは後で適用
        return x

input_dim = 11  # 入力特徴量の数
output_dim = 4  # 出力クラスの数
learning_rate = 0.001
batch_size = 128


# 交差検証
kf = KFold(n_splits=5, shuffle=True)
acc_scores = []

for fold, (train_idx, val_idx) in enumerate(kf.split(X)):
    print('\nFold:', fold + 1)

    # データセットを訓練用と検証用に分割
    X_train, y_train = X_tensor[train_idx], y_tensor[train_idx]
    X_val, y_val = X_tensor[val_idx], y_tensor[val_idx]

    train_dataset = TensorDataset(X_train, y_train)
    val_dataset = TensorDataset(X_val, y_val)

    train_loader = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)
    val_loader = DataLoader(dataset=val_dataset, batch_size=batch_size, shuffle=False)

    model = ANNModel(input_dim, output_dim)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)

    # 訓練ループ
    epochs = 200
    for epoch in range(epochs):
        for i, (inputs, labels) in enumerate(train_loader):
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

        # print(f"Epoch {epoch+1}/{epochs}, Loss: {loss.item()}")

    # 検証
    model.eval()
    y_pred = []
    y_true = []
    with torch.no_grad():
        for data, target in val_loader:
            output = model(data)
            _, predicted = torch.max(output.data, 1)
            y_pred.extend(predicted.numpy())
            y_true.extend(target.numpy())

    acc = accuracy_score(y_true, y_pred)
    acc_scores.append(acc)
    print('Accuracy:', acc)

print('Mean Accuracy:', sum(acc_scores) / len(acc_scores))


Fold: 1
Accuracy: 0.723404255319149

Fold: 2
Accuracy: 0.711832773422919

Fold: 3
Accuracy: 0.6971620612397311

Fold: 4
Accuracy: 0.7106049290515309

Fold: 5
Accuracy: 0.7233009708737864
Mean Accuracy: 0.7132609979814233
