In [7]:
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.model_selection import train_test_split, KFold
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import balanced_accuracy_score
from torch.utils.data import DataLoader, TensorDataset
from scipy.io import loadmat


# MLP 모델 정의
class MLP(nn.Module):
    def __init__(self, input_dim, num_classes):
        super(MLP, self).__init__()
        self.model = nn.Sequential(
            nn.Linear(input_dim, 100),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(100, 100),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(100, 10),
            nn.ReLU(),
            nn.Linear(10, num_classes)  # 출력층: 클래스 수에 따라 동적으로 설정
        )

    def forward(self, x):
        return self.model(x)

# 데이터셋 불러오기 함수
def load_dataset(file_path):
    data = loadmat(file_path)
    X = data['X']
    y = data['Y'].flatten()
    return X, y

# 데이터 전처리 함수
def preprocess_data(X, y):
    # 레이블을 0부터 시작하도록 정규화
    y = y - np.min(y)
    
    scaler = MinMaxScaler()
    X = scaler.fit_transform(X)
    
    return X, y

# 모델 학습 함수
def train_model(model, X_train, y_train, X_val, y_val, criterion, optimizer, scheduler, epochs=500, early_stopping_patience=200):
    model.train()
    best_val_loss = float('inf')
    patience_counter = 0
    
    for epoch in range(epochs):
        inputs = torch.tensor(X_train, dtype=torch.float32)
        labels = torch.tensor(y_train, dtype=torch.long)
        
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        nn.utils.clip_grad_norm_(model.parameters(), 2.5)
        optimizer.step()
        scheduler.step()

        # Early stopping
        val_loss = criterion(model(torch.tensor(X_val, dtype=torch.float32)), torch.tensor(y_val, dtype=torch.long)).item()
        if val_loss < best_val_loss:
            best_val_loss = val_loss
            patience_counter = 0
        else:
            patience_counter += 1
            if patience_counter >= early_stopping_patience:
                break

# 모델 평가 함수
def evaluate_model(model, val_loader):
    model.eval()
    all_labels = []
    all_predictions = []
    with torch.no_grad():
        for inputs, labels in val_loader:
            inputs = inputs.to(torch.float32)
            labels = labels.to(torch.long)
            outputs = model(inputs)
            _, predicted = torch.max(outputs.data, 1)
            all_labels.extend(labels.numpy())
            all_predictions.extend(predicted.numpy())
    accuracy = balanced_accuracy_score(all_labels, all_predictions)
    return accuracy

if __name__ == "__main__":
    # 파일 경로 설정
    file_paths = [
        'CLL_SUB_111.mat',
        'lung.mat',
        'Prostate_GE.mat',
        'SMK_CAN_187.mat',
        'TOX_171.mat'
    ]
    
    for file_path in file_paths:
        X, y = load_dataset(file_path)
        X, y = preprocess_data(X, y)
        
        input_dim = X.shape[1]
        num_classes = len(np.unique(y))
        
        kf = KFold(n_splits=5, shuffle=True, random_state=42)
        fold_accuracies = []

        for train_index, val_index in kf.split(X):
            X_train, X_val = X[train_index], X[val_index]
            y_train, y_val = y[train_index], y[val_index]

            # 데이터 로더 생성
            batch_size = 8  # 배치 크기 설정
            train_dataset = TensorDataset(torch.tensor(X_train), torch.tensor(y_train))
            val_dataset = TensorDataset(torch.tensor(X_val), torch.tensor(y_val))
            train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
            val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)

            # 모델 재정의 및 초기화
            model = MLP(input_dim, num_classes)
            criterion = nn.CrossEntropyLoss()
            optimizer = optim.AdamW(model.parameters(), lr=0.003, weight_decay=1e-4)
            scheduler = optim.lr_scheduler.LinearLR(optimizer, start_factor=1.0, end_factor=0.1, total_iters=500)
            
            train_model(model, train_loader, val_loader, criterion, optimizer, scheduler, epochs=500)
            test_accuracy = evaluate_model(model, val_loader)
            fold_accuracies.append(test_accuracy)

        mean_accuracy = np.mean(fold_accuracies)
        print(f'Balanced Accuracy on dataset {file_path}: {mean_accuracy:.4f}')



Balanced Accuracy on dataset CLL_SUB_111.mat: 0.7968
Balanced Accuracy on dataset lung.mat: 0.9420
Balanced Accuracy on dataset Prostate_GE.mat: 0.9207
Balanced Accuracy on dataset SMK_CAN_187.mat: 0.6375
Balanced Accuracy on dataset TOX_171.mat: 0.8858


In [None]:
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.model_selection import KFold
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import balanced_accuracy_score
from scipy.io import loadmat
from sklearn.decomposition import TruncatedSVD

# MLP 모델 정의
class MLP(nn.Module):
    def __init__(self, input_dim, num_classes):
        super(MLP, self).__init__()
        self.model = nn.Sequential(
            nn.Linear(input_dim, 100),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(100, 100),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(100, 10),
            nn.ReLU(),
            nn.Linear(10, num_classes)  # 출력층: 클래스 수에 따라 동적으로 설정
        )

    def forward(self, x):
        return self.model(x)

# 데이터셋 불러오기 함수
def load_dataset(file_path):
    data = loadmat(file_path)
    X = data['X']
    y = data['Y'].flatten()
    return X, y

# 데이터 전처리 함수
def preprocess_data(X, y, n_components=50):
    # 레이블을 0부터 시작하도록 정규화
    y = y - np.min(y)
    
    scaler = MinMaxScaler()
    X = scaler.fit_transform(X)
    
    # SVD 적용
    svd = TruncatedSVD(n_components=n_components, random_state=42)
    X = svd.fit_transform(X)
    
    return X, y


# 모델 학습 함수
def train_model(model, train_loader, val_loader, criterion, optimizer, scheduler, epochs=500, early_stopping_patience=200):
    model.train()
    best_val_loss = float('inf')
    patience_counter = 0
    
    for epoch in range(epochs):
        for inputs, labels in train_loader:
            inputs = inputs.to(torch.float32)
            labels = labels.to(torch.long)
        
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            nn.utils.clip_grad_norm_(model.parameters(), 2.5)
            optimizer.step()
        scheduler.step()

        # Early stopping
        val_loss = 0.0
        for inputs, labels in val_loader:
            inputs = inputs.to(torch.float32)
            labels = labels.to(torch.long)
            outputs = model(inputs)
            loss = criterion(outputs, labels).item()
            val_loss += loss
        
        val_loss /= len(val_loader)
        
        if val_loss < best_val_loss:
            best_val_loss = val_loss
            patience_counter = 0
        else:
            patience_counter += 1
            if patience_counter >= early_stopping_patience:
                break


# 모델 평가 함수
def evaluate_model(model, val_loader):
    model.eval()
    all_labels = []
    all_predictions = []
    with torch.no_grad():
        for inputs, labels in val_loader:
            inputs = inputs.to(torch.float32)
            labels = labels.to(torch.long)
            outputs = model(inputs)
            _, predicted = torch.max(outputs.data, 1)
            all_labels.extend(labels.numpy())
            all_predictions.extend(predicted.numpy())
    accuracy = balanced_accuracy_score(all_labels, all_predictions)
    return accuracy

if __name__ == "__main__":
    # 파일 경로 설정
    file_paths = [
        'CLL_SUB_111.mat',
        'lung.mat',
        'Prostate_GE.mat',
        'SMK_CAN_187.mat',
        'TOX_171.mat'
    ]
    
    for file_path in file_paths:
        X, y = load_dataset(file_path)
        X, y = preprocess_data(X, y)
        
        input_dim = X.shape[1]
        num_classes = len(np.unique(y))
        
        kf = KFold(n_splits=5, shuffle=True, random_state=42)
        fold_accuracies = []

        for train_index, val_index in kf.split(X):
            X_train, X_val = X[train_index], X[val_index]
            y_train, y_val = y[train_index], y[val_index]

            # 데이터 로더 생성
            batch_size = 8  # 배치 크기 설정
            train_dataset = TensorDataset(torch.tensor(X_train), torch.tensor(y_train))
            val_dataset = TensorDataset(torch.tensor(X_val), torch.tensor(y_val))
            train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
            val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)

            # 모델 재정의 및 초기화
            model = MLP(input_dim, num_classes)
            criterion = nn.CrossEntropyLoss()
            optimizer = optim.AdamW(model.parameters(), lr=0.003, weight_decay=1e-4)
            scheduler = optim.lr_scheduler.LinearLR(optimizer, start_factor=1.0, end_factor=0.1, total_iters=500)
            
            train_model(model, train_loader, val_loader, criterion, optimizer, scheduler, epochs=500)
            test_accuracy = evaluate_model(model, val_loader)
            fold_accuracies.append(test_accuracy)

        mean_accuracy = np.mean(fold_accuracies)
        print(f'Balanced Accuracy on dataset {file_path}: {mean_accuracy:.4f}')



Balanced Accuracy on dataset CLL_SUB_111.mat: 0.8321
Balanced Accuracy on dataset lung.mat: 0.9116
Balanced Accuracy on dataset Prostate_GE.mat: 0.8766
Balanced Accuracy on dataset SMK_CAN_187.mat: 0.6963
Balanced Accuracy on dataset TOX_171.mat: 0.9584


In [27]:
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.model_selection import KFold
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import balanced_accuracy_score
from scipy.io import loadmat
from sklearn.decomposition import TruncatedSVD

# MLP 모델 정의
class MLP(nn.Module):
    def __init__(self, input_dim, num_classes):
        super(MLP, self).__init__()
        self.model = nn.Sequential(
            nn.Linear(input_dim, 100),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(100, 100),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(100, 10),
            nn.ReLU(),
            nn.Linear(10, num_classes)  # 출력층: 클래스 수에 따라 동적으로 설정
        )

    def forward(self, x):
        return self.model(x)

# 데이터셋 불러오기 함수
def load_dataset(file_path):
    data = loadmat(file_path)
    X = data['X']
    y = data['Y'].flatten()
    return X, y

# 데이터 전처리 함수
def preprocess_data(X, y, n_components=50):
    # 레이블을 0부터 시작하도록 정규화
    y = y - np.min(y)
    
    scaler = MinMaxScaler()
    X = scaler.fit_transform(X)
    
    # SVD 적용
    svd = TruncatedSVD(n_components=n_components, random_state=42)
    X = svd.fit_transform(X)
    
    return X, y

# 모델 학습 함수
def train_model(model, train_loader, val_loader, criterion, optimizer, scheduler, epochs=500, early_stopping_patience=200):
    model.train()
    best_val_loss = float('inf')
    patience_counter = 0
    
    for epoch in range(epochs):
        for inputs, labels in train_loader:
            inputs = inputs.to(torch.float32)
            labels = labels.to(torch.long)
        
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            nn.utils.clip_grad_norm_(model.parameters(), 2.5)
            optimizer.step()
        scheduler.step()

        # Early stopping
        val_loss = 0.0
        for inputs, labels in val_loader:
            inputs = inputs.to(torch.float32)
            labels = labels.to(torch.long)
            outputs = model(inputs)
            loss = criterion(outputs, labels).item()
            val_loss += loss
        
        val_loss /= len(val_loader)
        
        if val_loss < best_val_loss:
            best_val_loss = val_loss
            patience_counter = 0
        else:
            patience_counter += 1
            if patience_counter >= early_stopping_patience:
                break

# 모델 평가 함수
def evaluate_model(model, val_loader):
    model.eval()
    all_labels = []
    all_predictions = []
    with torch.no_grad():
        for inputs, labels in val_loader:
            inputs = inputs.to(torch.float32)
            labels = labels.to(torch.long)
            outputs = model(inputs)
            _, predicted = torch.max(outputs.data, 1)
            all_labels.extend(labels.numpy())
            all_predictions.extend(predicted.numpy())
    accuracy = balanced_accuracy_score(all_labels, all_predictions)
    return accuracy


if __name__ == "__main__":
    # 파일 경로 설정
    file_paths = [
        'CLL_SUB_111.mat',
        'lung.mat',
        'Prostate_GE.mat',
        'SMK_CAN_187.mat',
        'TOX_171.mat'
    ]
    
    for file_path in file_paths:
        X, y = load_dataset(file_path)
        X, y = preprocess_data(X, y)
        
        input_dim = X.shape[1]
        num_classes = len(np.unique(y))
        
        kf = KFold(n_splits=5, shuffle=True, random_state=42)
        fold_accuracies = []

        for train_index, val_index in kf.split(X):
            X_train, X_val = X[train_index], X[val_index]
            y_train, y_val = y[train_index], y[val_index]

            # 데이터 로더 생성
            batch_size = 8  # 배치 크기 설정
            train_dataset = TensorDataset(torch.tensor(X_train), torch.tensor(y_train))
            val_dataset = TensorDataset(torch.tensor(X_val), torch.tensor(y_val))
            train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
            val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)

            # 모델 재정의 및 초기화
            model = MLP(input_dim, num_classes)
            criterion = nn.CrossEntropyLoss()
            optimizer = optim.AdamW(model.parameters(), lr=0.003, weight_decay=1e-4)
            scheduler = optim.lr_scheduler.LinearLR(optimizer, start_factor=1.0, end_factor=0.1, total_iters=500)
            
            train_model(model, train_loader, val_loader, criterion, optimizer, scheduler, epochs=500)
            test_accuracy = evaluate_model(model, val_loader)
            fold_accuracies.append(test_accuracy)

        mean_accuracy = np.mean(fold_accuracies)
        print(f'Balanced Accuracy on dataset {file_path}: {mean_accuracy:.4f}')

Balanced Accuracy on dataset CLL_SUB_111.mat: 0.7032
Balanced Accuracy on dataset lung.mat: 0.8390
Balanced Accuracy on dataset Prostate_GE.mat: 0.8987
Balanced Accuracy on dataset SMK_CAN_187.mat: 0.6711
Balanced Accuracy on dataset TOX_171.mat: 0.9614


In [23]:
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.model_selection import KFold
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import balanced_accuracy_score
from scipy.io import loadmat
from sklearn.decomposition import NMF

# MLP 모델 정의
class MLP(nn.Module):
    def __init__(self, input_dim, num_classes):
        super(MLP, self).__init__()
        self.model = nn.Sequential(
            nn.Linear(input_dim, 100),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(100, 100),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(100, 10),
            nn.ReLU(),
            nn.Linear(10, num_classes)  # 출력층: 클래스 수에 따라 동적으로 설정
        )

    def forward(self, x):
        return self.model(x)

# 데이터셋 불러오기 함수
def load_dataset(file_path):
    data = loadmat(file_path)
    X = data['X']
    y = data['Y'].flatten()
    return X, y

# 데이터 전처리 함수
def preprocess_data(X, y, n_components=50):
    # 레이블을 0부터 시작하도록 정규화
    y = y - np.min(y)
    
    scaler = MinMaxScaler()
    X = scaler.fit_transform(X)
    
    # NMF 적용
    nmf = NMF(n_components=n_components, init='random', random_state=42, max_iter=1000)
    X = nmf.fit_transform(X)
    
    return X, y


# 모델 학습 함수
def train_model(model, train_loader, val_loader, criterion, optimizer, scheduler, epochs=500, early_stopping_patience=200):
    model.train()
    best_val_loss = float('inf')
    patience_counter = 0
    
    for epoch in range(epochs):
        for inputs, labels in train_loader:
            inputs = inputs.to(torch.float32)
            labels = labels.to(torch.long)
        
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            nn.utils.clip_grad_norm_(model.parameters(), 2.5)
            optimizer.step()
        scheduler.step()

        # Early stopping
        val_loss = 0.0
        for inputs, labels in val_loader:
            inputs = inputs.to(torch.float32)
            labels = labels.to(torch.long)
            outputs = model(inputs)
            loss = criterion(outputs, labels).item()
            val_loss += loss
        
        val_loss /= len(val_loader)
        
        if val_loss < best_val_loss:
            best_val_loss = val_loss
            patience_counter = 0
        else:
            patience_counter += 1
            if patience_counter >= early_stopping_patience:
                break


# 모델 평가 함수
def evaluate_model(model, val_loader):
    model.eval()
    all_labels = []
    all_predictions = []
    with torch.no_grad():
        for inputs, labels in val_loader:
            inputs = inputs.to(torch.float32)
            labels = labels.to(torch.long)
            outputs = model(inputs)
            _, predicted = torch.max(outputs.data, 1)
            all_labels.extend(labels.numpy())
            all_predictions.extend(predicted.numpy())
    accuracy = balanced_accuracy_score(all_labels, all_predictions)
    return accuracy

if __name__ == "__main__":
    # 파일 경로 설정
    file_paths = [
        'CLL_SUB_111.mat',
        'lung.mat',
        'Prostate_GE.mat',
        'SMK_CAN_187.mat',
        'TOX_171.mat'
    ]
    
    for file_path in file_paths:
        X, y = load_dataset(file_path)
        X, y = preprocess_data(X, y)
        
        input_dim = X.shape[1]
        num_classes = len(np.unique(y))
        
        kf = KFold(n_splits=5, shuffle=True, random_state=42)
        fold_accuracies = []

        for train_index, val_index in kf.split(X):
            X_train, X_val = X[train_index], X[val_index]
            y_train, y_val = y[train_index], y[val_index]

            # 데이터 로더 생성
            batch_size = 8  # 배치 크기 설정
            train_dataset = TensorDataset(torch.tensor(X_train), torch.tensor(y_train))
            val_dataset = TensorDataset(torch.tensor(X_val), torch.tensor(y_val))
            train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
            val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)

            # 모델 재정의 및 초기화
            model = MLP(input_dim, num_classes)
            criterion = nn.CrossEntropyLoss()
            optimizer = optim.AdamW(model.parameters(), lr=0.003, weight_decay=1e-4)
            scheduler = optim.lr_scheduler.LinearLR(optimizer, start_factor=1.0, end_factor=0.1, total_iters=500)
            
            train_model(model, train_loader, val_loader, criterion, optimizer, scheduler, epochs=500)
            test_accuracy = evaluate_model(model, val_loader)
            fold_accuracies.append(test_accuracy)

        mean_accuracy = np.mean(fold_accuracies)
        print(f'Balanced Accuracy on dataset {file_path}: {mean_accuracy:.4f}')

Balanced Accuracy on dataset CLL_SUB_111.mat: 0.6737




Balanced Accuracy on dataset lung.mat: 0.8796
Balanced Accuracy on dataset Prostate_GE.mat: 0.8595




Balanced Accuracy on dataset SMK_CAN_187.mat: 0.6411
Balanced Accuracy on dataset TOX_171.mat: 0.9569


In [13]:
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.model_selection import train_test_split, KFold
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import balanced_accuracy_score
from torch.utils.data import DataLoader, TensorDataset
from scipy.io import loadmat

# MLP 모델 정의
class MLP(nn.Module):
    def __init__(self, input_dim, num_classes):
        super(MLP, self).__init__()
        self.model = nn.Sequential(
            nn.Linear(input_dim, 100),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(100, 100),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(100, 10),
            nn.ReLU(),
            nn.Linear(10, num_classes)  # 출력층: 클래스 수에 따라 동적으로 설정
        )

    def forward(self, x):
        return self.model(x)

# 데이터셋 불러오기 함수
def load_dataset(file_path):
    data = loadmat(file_path)
    X = data['X']
    y = data['Y'].flatten()
    return X, y

# Dot Histogram Embedding 함수
def dot_histogram_embedding(X, bins=50):
    hist_features = []
    for i in range(X.shape[1]):
        hist, _ = np.histogram(X[:, i], bins=bins, density=True)
        hist_features.append(hist)
    return np.array(hist_features).T

# 데이터 전처리 함수
def preprocess_data(X, y, bins=50):
    # 레이블을 0부터 시작하도록 정규화
    y = y - np.min(y)
    
    scaler = MinMaxScaler()
    X = scaler.fit_transform(X)
    
    # Dot Histogram 적용
    X = dot_histogram_embedding(X, bins)
    
    return X, y

# 모델 학습 함수
def train_model(model, train_loader, val_loader, criterion, optimizer, scheduler, epochs=500, early_stopping_patience=200):
    model.train()
    best_val_loss = float('inf')
    patience_counter = 0
    
    for epoch in range(epochs):
        for inputs, labels in train_loader:
            inputs = inputs.to(torch.float32)
            labels = labels.to(torch.long)
        
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            nn.utils.clip_grad_norm_(model.parameters(), 2.5)
            optimizer.step()
        scheduler.step()

        # Early stopping
        val_loss = 0.0
        for inputs, labels in val_loader:
            inputs = inputs.to(torch.float32)
            labels = labels.to(torch.long)
            outputs = model(inputs)
            loss = criterion(outputs, labels).item()
            val_loss += loss
        
        val_loss /= len(val_loader)
        
        if val_loss < best_val_loss:
            best_val_loss = val_loss
            patience_counter = 0
        else:
            patience_counter += 1
            if patience_counter >= early_stopping_patience:
                break

            

# 모델 평가 함수
def evaluate_model(model, val_loader):
    model.eval()
    all_labels = []
    all_predictions = []
    with torch.no_grad():
        for inputs, labels in val_loader:
            inputs = inputs.to(torch.float32)
            labels = labels.to(torch.long)
            outputs = model(inputs)
            _, predicted = torch.max(outputs.data, 1)
            all_labels.extend(labels.numpy())
            all_predictions.extend(predicted.numpy())
    accuracy = balanced_accuracy_score(all_labels, all_predictions)
    return accuracy


if __name__ == "__main__":
    # 파일 경로 설정
    file_paths = [
        'CLL_SUB_111.mat',
        'lung.mat',
        'Prostate_GE.mat',
        'SMK_CAN_187.mat',
        'TOX_171.mat'
    ]
    
    for file_path in file_paths:
        X, y = load_dataset(file_path)
        X, y = preprocess_data(X, y)
        
        input_dim = X.shape[1]
        num_classes = len(np.unique(y))
        
        kf = KFold(n_splits=5, shuffle=True, random_state=42)
        fold_accuracies = []

        for train_index, val_index in kf.split(X):
            X_train, X_val = X[train_index], X[val_index]
            y_train, y_val = y[train_index], y[val_index]

            # 데이터 로더 생성
            batch_size = 8  # 배치 크기 설정
            train_dataset = TensorDataset(torch.tensor(X_train), torch.tensor(y_train))
            val_dataset = TensorDataset(torch.tensor(X_val), torch.tensor(y_val))
            train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
            val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)

            # 모델 재정의 및 초기화
            model = MLP(input_dim, num_classes)
            criterion = nn.CrossEntropyLoss()
            optimizer = optim.AdamW(model.parameters(), lr=0.003, weight_decay=1e-4)
            scheduler = optim.lr_scheduler.LinearLR(optimizer, start_factor=1.0, end_factor=0.1, total_iters=500)
            
            train_model(model, train_loader, val_loader, criterion, optimizer, scheduler, epochs=500)
            test_accuracy = evaluate_model(model, val_loader)
            fold_accuracies.append(test_accuracy)

        mean_accuracy = np.mean(fold_accuracies)
        print(f'Balanced Accuracy on dataset {file_path}: {mean_accuracy:.4f}')




Balanced Accuracy on dataset CLL_SUB_111.mat: 0.9500




Balanced Accuracy on dataset lung.mat: 1.0000




Balanced Accuracy on dataset Prostate_GE.mat: 1.0000




Balanced Accuracy on dataset SMK_CAN_187.mat: 1.0000




Balanced Accuracy on dataset TOX_171.mat: 0.6600
