In [2]:
pip install qiskit qiskit-aer qiskit-machine-learning


Note: you may need to restart the kernel to use updated packages.


In [2]:
from qiskit import QuantumCircuit, transpile
from qiskit.circuit.library import ZZFeatureMap
from qiskit_aer import Aer
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import numpy as np
import pandas as pd

# URL of the dataset
url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/parkinsons/parkinsons.data'

# Load the dataset directly from the URL
df = pd.read_csv(url)

# Define features (X) and target (y)
X = df.drop(['name', 'status'], axis=1)
y = df['status']

# Standardize features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Split data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

# Define a feature map for encoding data into quantum states
feature_map = ZZFeatureMap(feature_dimension=X_train.shape[1], reps=2)

# Manually create a quantum kernel function (if QuantumKernel is unavailable)
def quantum_kernel(x1, x2):
    # Create a quantum circuit for each input vector
    qc1 = QuantumCircuit(len(x1))
    qc2 = QuantumCircuit(len(x2))
    
    # Apply feature map to each circuit by parameter binding using assign_parameters
    qc1.compose(feature_map.assign_parameters(x1), inplace=True)
    qc2.compose(feature_map.assign_parameters(x2), inplace=True)
    
    # Transpile circuits for the Aer backend
    qc1_transpiled = transpile(qc1, backend=Aer.get_backend('statevector_simulator'))
    qc2_transpiled = transpile(qc2, backend=Aer.get_backend('statevector_simulator'))
    
    # Use Aer simulator to compute state vectors and their inner product as kernel value
    backend = Aer.get_backend('statevector_simulator')
    result1 = backend.run(qc1_transpiled).result().get_statevector()
    result2 = backend.run(qc2_transpiled).result().get_statevector()
    
    return np.abs(np.dot(np.conj(result1), result2))**2

# Example usage of manual kernel function (for demonstration purposes)
kernel_matrix_train = np.array([[quantum_kernel(x1, x2) for x2 in X_train] for x1 in X_train])
svc = SVC(kernel='precomputed')
svc.fit(kernel_matrix_train, y_train)

# Evaluate on test set using the custom kernel function
kernel_matrix_test = np.array([[quantum_kernel(x1, x2) for x2 in X_train] for x1 in X_test])
accuracy = svc.score(kernel_matrix_test, y_test)

print(f"Test set accuracy: {accuracy:.2f}")


KeyboardInterrupt: 

In [4]:
from qiskit import QuantumCircuit, transpile
from qiskit.circuit.library import ZZFeatureMap
from qiskit_aer import Aer
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
import numpy as np
import pandas as pd

# URL of the dataset
url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/parkinsons/parkinsons.data'

# Load the dataset directly from the URL
df = pd.read_csv(url)

# Define features (X) and target (y)
X = df.drop(['name', 'status'], axis=1)
y = df['status']

# Standardize features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Dimensionality reduction using PCA (reduce to 5 features for simplicity)
pca = PCA(n_components=5)
X_pca = pca.fit_transform(X_scaled)

# Split data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X_pca, y, test_size=0.2, random_state=42)

# Define a simpler feature map (reduce reps to 1)
feature_map = ZZFeatureMap(feature_dimension=X_train.shape[1], reps=1)

# Manually create a quantum kernel function (if QuantumKernel is unavailable)
def quantum_kernel(x1, x2):
    # Create a quantum circuit for each input vector
    qc1 = QuantumCircuit(len(x1))
    qc2 = QuantumCircuit(len(x2))
    
    # Apply feature map to each circuit by parameter binding using assign_parameters
    qc1.compose(feature_map.assign_parameters(x1), inplace=True)
    qc2.compose(feature_map.assign_parameters(x2), inplace=True)
    
    # Transpile circuits for the Aer backend
    backend = Aer.get_backend('statevector_simulator')
    qc1_transpiled = transpile(qc1, backend=backend)
    qc2_transpiled = transpile(qc2, backend=backend)
    
    # Use Aer simulator to compute state vectors and their inner product as kernel value
    result1 = backend.run(qc1_transpiled).result().get_statevector()
    result2 = backend.run(qc2_transpiled).result().get_statevector()
    
    return np.abs(np.dot(np.conj(result1), result2))**2

# Reduce the dataset size for faster testing (use only 10 samples for training)
X_train_small = X_train[:10]
y_train_small = y_train[:10]

# Compute the kernel matrix for the small subset of data
kernel_matrix_train = np.array([[quantum_kernel(x1, x2) for x2 in X_train_small] for x1 in X_train_small])

# Train the SVC model with precomputed kernel
svc = SVC(kernel='precomputed')
svc.fit(kernel_matrix_train, y_train_small)

# Evaluate on test set using the custom kernel function
kernel_matrix_test = np.array([[quantum_kernel(x1, x2) for x2 in X_train_small] for x1 in X_test])

# Compute accuracy
accuracy = svc.score(kernel_matrix_test, y_test)
print(f"Test set accuracy: {accuracy:.2f}")


Test set accuracy: 0.82


In [6]:
from qiskit import QuantumCircuit, transpile
from qiskit.circuit.library import ZZFeatureMap
from qiskit_aer import Aer
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
import numpy as np
import pandas as pd

# URL of the dataset
url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/parkinsons/parkinsons.data'

# Load the dataset directly from the URL
df = pd.read_csv(url)

# Define features (X) and target (y)
X = df.drop(['name', 'status'], axis=1)
y = df['status']

# Standardize features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Dimensionality reduction using PCA (increase to 10 features)
pca = PCA(n_components=10)
X_pca = pca.fit_transform(X_scaled)

# Split data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X_pca, y, test_size=0.2, random_state=42)

# Define a deeper feature map (increase reps to 2)
feature_map = ZZFeatureMap(feature_dimension=X_train.shape[1], reps=2)

# Manually create a quantum kernel function
def quantum_kernel(x1, x2):
    # Create a quantum circuit for each input vector
    qc1 = QuantumCircuit(len(x1))
    qc2 = QuantumCircuit(len(x2))
    
    # Apply feature map to each circuit by parameter binding using assign_parameters
    qc1.compose(feature_map.assign_parameters(x1), inplace=True)
    qc2.compose(feature_map.assign_parameters(x2), inplace=True)
    
    # Transpile circuits for the Aer backend
    backend = Aer.get_backend('statevector_simulator')
    qc1_transpiled = transpile(qc1, backend=backend)
    qc2_transpiled = transpile(qc2, backend=backend)
    
    # Use Aer simulator to compute state vectors and their inner product as kernel value
    result1 = backend.run(qc1_transpiled).result().get_statevector()
    result2 = backend.run(qc2_transpiled).result().get_statevector()
    
    return np.abs(np.dot(np.conj(result1), result2))**2

# Compute the kernel matrix for the training data
kernel_matrix_train = np.array([[quantum_kernel(x1, x2) for x2 in X_train] for x1 in X_train])

# Hyperparameter tuning using GridSearchCV for SVM
param_grid = {
    'C': [0.1, 1, 10, 100],  # Regularization parameter
    'gamma': [0.001, 0.01, 0.1, 1]  # Kernel coefficient
}

svc = SVC(kernel='precomputed')
grid_search = GridSearchCV(svc, param_grid, cv=3)
grid_search.fit(kernel_matrix_train, y_train)

# Evaluate on test set using the best SVM parameters from grid search
kernel_matrix_test = np.array([[quantum_kernel(x1, x2) for x2 in X_train] for x1 in X_test])
best_svc = grid_search.best_estimator_
accuracy = best_svc.score(kernel_matrix_test, y_test)

print(f"Test set accuracy after hyperparameter tuning: {accuracy:.2f}")
print(f"Best parameters found by GridSearch: {grid_search.best_params_}")


KeyboardInterrupt: 

In [None]:
import optuna
from qiskit import QuantumCircuit, transpile
from qiskit.circuit.library import ZZFeatureMap
from qiskit_aer import Aer
from sklearn.ensemble import BaggingClassifier, AdaBoostClassifier, StackingClassifier
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score
from sklearn.decomposition import PCA
import numpy as np
import pandas as pd

# URL of the dataset
url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/parkinsons/parkinsons.data'

# Load the dataset directly from the URL
df = pd.read_csv(url)

# Define features (X) and target (y)
X = df.drop(['name', 'status'], axis=1)
y = df['status']

# Standardize features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Dimensionality reduction using PCA (increase to 10 features)
pca = PCA(n_components=10)
X_pca = pca.fit_transform(X_scaled)

# Split data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X_pca, y, test_size=0.2, random_state=42)

# Define a quantum feature map
feature_map = ZZFeatureMap(feature_dimension=X_train.shape[1], reps=2)

# Function to compute the quantum kernel
def quantum_kernel(x1, x2):
    qc1 = QuantumCircuit(len(x1))
    qc2 = QuantumCircuit(len(x2))
    
    qc1.compose(feature_map.assign_parameters(x1), inplace=True)
    qc2.compose(feature_map.assign_parameters(x2), inplace=True)
    
    backend = Aer.get_backend('statevector_simulator')
    qc1_transpiled = transpile(qc1, backend=backend)
    qc2_transpiled = transpile(qc2, backend=backend)
    
    result1 = backend.run(qc1_transpiled).result().get_statevector()
    result2 = backend.run(qc2_transpiled).result().get_statevector()
    
    return np.abs(np.dot(np.conj(result1), result2))**2

# Create kernel matrix
def create_kernel_matrix(X1, X2):
    return np.array([[quantum_kernel(x1, x2) for x2 in X2] for x1 in X1])

# Optuna for SVM optimization
def objective_svm(trial):
    # Hyperparameters to tune
    C = trial.suggest_loguniform('C', 1e-1, 100)
    gamma = trial.suggest_loguniform('gamma', 1e-4, 1e-1)

    # Compute quantum kernel matrix
    kernel_matrix_train = create_kernel_matrix(X_train, X_train)
    
    # SVM with quantum kernel
    svc = SVC(kernel='precomputed', C=C, gamma=gamma)
    svc.fit(kernel_matrix_train, y_train)

    kernel_matrix_test = create_kernel_matrix(X_test, X_train)
    y_pred = svc.predict(kernel_matrix_test)
    
    return accuracy_score(y_test, y_pred)

# Optimize SVM with Optuna
study = optuna.create_study(direction='maximize')
study.optimize(objective_svm, n_trials=10)
best_params = study.best_params
print(f"Best SVM Parameters: {best_params}")

# Build kernel matrix for stacking and bagging classifiers
kernel_matrix_train = create_kernel_matrix(X_train, X_train)
kernel_matrix_test = create_kernel_matrix(X_test, X_train)

# Base models for stacking
base_models = [
    ('svc', SVC(kernel='precomputed', C=best_params['C'], gamma=best_params['gamma'])),
    ('rf', RandomForestClassifier(n_estimators=100))
]

# Meta-model for stacking
meta_model = LogisticRegression()

# Stacking classifier
stacking_clf = StackingClassifier(estimators=base_models, final_estimator=meta_model)
stacking_clf.fit(kernel_matrix_train, y_train)
y_pred_stack = stacking_clf.predict(kernel_matrix_test)
stacking_accuracy = accuracy_score(y_test, y_pred_stack)
print(f"Stacking Accuracy: {stacking_accuracy:.2f}")

# Bagging classifier
bagging_clf = BaggingClassifier(base_estimator=SVC(kernel='precomputed', C=best_params['C'], gamma=best_params['gamma']), n_estimators=10)
bagging_clf.fit(kernel_matrix_train, y_train)
y_pred_bag = bagging_clf.predict(kernel_matrix_test)
bagging_accuracy = accuracy_score(y_test, y_pred_bag)
print(f"Bagging Accuracy: {bagging_accuracy:.2f}")

# AdaBoost classifier
adaboost_clf = AdaBoostClassifier(base_estimator=SVC(kernel='precomputed', C=best_params['C'], gamma=best_params['gamma']), n_estimators=10)
adaboost_clf.fit(kernel_matrix_train, y_train)
y_pred_ada = adaboost_clf.predict(kernel_matrix_test)
adaboost_accuracy = accuracy_score(y_test, y_pred_ada)
print(f"AdaBoost Accuracy: {adaboost_accuracy:.2f}")


In [10]:
import optuna
from qiskit import QuantumCircuit, transpile
from qiskit.circuit.library import ZZFeatureMap
from qiskit_aer import Aer
from sklearn.ensemble import BaggingClassifier, AdaBoostClassifier, StackingClassifier
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score
from sklearn.decomposition import PCA
import numpy as np
import pandas as pd

# URL of the dataset
url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/parkinsons/parkinsons.data'

# Load the dataset directly from the URL
df = pd.read_csv(url)

# Define features (X) and target (y)
X = df.drop(['name', 'status'], axis=1)
y = df['status']

# Standardize features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Dimensionality reduction using PCA (reduce to 5 features for initial testing)
pca = PCA(n_components=5)
X_pca = pca.fit_transform(X_scaled)

# Split data into training and test sets (use a small subset for testing)
X_train, X_test, y_train, y_test = train_test_split(X_pca[:20], y[:20], test_size=0.2, random_state=42)

# Define a simpler feature map (reduce reps to 1)
feature_map = ZZFeatureMap(feature_dimension=X_train.shape[1], reps=1)

# Function to compute the quantum kernel
def quantum_kernel(x1, x2):
    qc1 = QuantumCircuit(len(x1))
    qc2 = QuantumCircuit(len(x2))
    
    qc1.compose(feature_map.assign_parameters(x1), inplace=True)
    qc2.compose(feature_map.assign_parameters(x2), inplace=True)
    
    backend = Aer.get_backend('statevector_simulator')
    qc1_transpiled = transpile(qc1, backend=backend)
    qc2_transpiled = transpile(qc2, backend=backend)
    
    result1 = backend.run(qc1_transpiled).result().get_statevector()
    result2 = backend.run(qc2_transpiled).result().get_statevector()
    
    return np.abs(np.dot(np.conj(result1), result2))**2

# Create kernel matrix (cache results to reduce processing time)
def create_kernel_matrix(X1, X2):
    return np.array([[quantum_kernel(x1, x2) for x2 in X2] for x1 in X1])

# Precompute the kernel matrices for the small dataset
kernel_matrix_train = create_kernel_matrix(X_train, X_train)
kernel_matrix_test = create_kernel_matrix(X_test, X_train)

# Optuna for SVM optimization
def objective_svm(trial):
    # Hyperparameters to tune
    C = trial.suggest_loguniform('C', 1e-1, 100)
    gamma = trial.suggest_loguniform('gamma', 1e-4, 1e-1)

    # SVM with quantum kernel
    svc = SVC(kernel='precomputed', C=C, gamma=gamma)
    svc.fit(kernel_matrix_train, y_train)
    
    y_pred = svc.predict(kernel_matrix_test)
    return accuracy_score(y_test, y_pred)

# Run Optuna for best SVM hyperparameters
study = optuna.create_study(direction='maximize')
study.optimize(objective_svm, n_trials=5)  # Fewer trials for initial testing
best_params = study.best_params
print(f"Best SVM Parameters: {best_params}")

# Base models for stacking
svc_model = SVC(kernel='precomputed', C=best_params['C'], gamma=best_params['gamma'])
rf_model = RandomForestClassifier(n_estimators=10)  # Reduce estimators for testing

# Stacking classifier with SVM and RandomForest
stacking_clf = StackingClassifier(estimators=[('svc', svc_model), ('rf', rf_model)], final_estimator=LogisticRegression())
stacking_clf.fit(kernel_matrix_train, y_train)
y_pred_stack = stacking_clf.predict(kernel_matrix_test)
stacking_accuracy = accuracy_score(y_test, y_pred_stack)
print(f"Stacking Accuracy: {stacking_accuracy:.2f}")

# Bagging classifier using SVM as the base model
bagging_clf = BaggingClassifier(base_estimator=svc_model, n_estimators=5)  # Reduced estimators for speed
bagging_clf.fit(kernel_matrix_train, y_train)
y_pred_bag = bagging_clf.predict(kernel_matrix_test)
bagging_accuracy = accuracy_score(y_test, y_pred_bag)
print(f"Bagging Accuracy: {bagging_accuracy:.2f}")

# AdaBoost classifier with SVM as the base estimator
adaboost_clf = AdaBoostClassifier(base_estimator=svc_model, n_estimators=5)  # Reduced estimators for testing
adaboost_clf.fit(kernel_matrix_train, y_train)
y_pred_ada = adaboost_clf.predict(kernel_matrix_test)
adaboost_accuracy = accuracy_score(y_test, y_pred_ada)
print(f"AdaBoost Accuracy: {adaboost_accuracy:.2f}")


[I 2024-10-25 02:22:05,761] A new study created in memory with name: no-name-d07fd2f5-fa34-42a8-9daf-0780720ef26c
  C = trial.suggest_loguniform('C', 1e-1, 100)
  gamma = trial.suggest_loguniform('gamma', 1e-4, 1e-1)
[W 2024-10-25 02:22:05,762] Trial 0 failed with parameters: {'C': 1.1061967426701824, 'gamma': 0.00028940091707163693} because of the following error: ValueError('The number of classes has to be greater than one; got 1 class').
Traceback (most recent call last):
  File "/opt/anaconda3/lib/python3.12/site-packages/optuna/study/_optimize.py", line 197, in _run_trial
    value_or_values = func(trial)
                      ^^^^^^^^^^^
  File "/var/folders/b7/fvm8bd317t9f9_j96tkt0q800000gn/T/ipykernel_70127/2548618993.py", line 73, in objective_svm
    svc.fit(kernel_matrix_train, y_train)
  File "/opt/anaconda3/lib/python3.12/site-packages/sklearn/base.py", line 1474, in wrapper
    return fit_method(estimator, *args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

ValueError: The number of classes has to be greater than one; got 1 class

In [12]:
import optuna
from qiskit import QuantumCircuit, transpile
from qiskit.circuit.library import ZZFeatureMap
from qiskit_aer import Aer
from sklearn.ensemble import BaggingClassifier, AdaBoostClassifier, StackingClassifier
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score
from sklearn.decomposition import PCA
import numpy as np
import pandas as pd

# URL of the dataset
url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/parkinsons/parkinsons.data'

# Load the dataset directly from the URL
df = pd.read_csv(url)

# Define features (X) and target (y)
X = df.drop(['name', 'status'], axis=1)
y = df['status']

# Standardize features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Dimensionality reduction using PCA (reduce to 5 features for initial testing)
pca = PCA(n_components=5)
X_pca = pca.fit_transform(X_scaled)

# Perform a stratified split with a larger dataset subset
X_train, X_test, y_train, y_test = train_test_split(X_pca, y, test_size=0.2, random_state=42, stratify=y)

# Define a simpler feature map (reduce reps to 1)
feature_map = ZZFeatureMap(feature_dimension=X_train.shape[1], reps=1)

# Function to compute the quantum kernel
def quantum_kernel(x1, x2):
    qc1 = QuantumCircuit(len(x1))
    qc2 = QuantumCircuit(len(x2))
    
    qc1.compose(feature_map.assign_parameters(x1), inplace=True)
    qc2.compose(feature_map.assign_parameters(x2), inplace=True)
    
    backend = Aer.get_backend('statevector_simulator')
    qc1_transpiled = transpile(qc1, backend=backend)
    qc2_transpiled = transpile(qc2, backend=backend)
    
    result1 = backend.run(qc1_transpiled).result().get_statevector()
    result2 = backend.run(qc2_transpiled).result().get_statevector()
    
    return np.abs(np.dot(np.conj(result1), result2))**2

# Create kernel matrix (cache results to reduce processing time)
def create_kernel_matrix(X1, X2):
    return np.array([[quantum_kernel(x1, x2) for x2 in X2] for x1 in X1])

# Precompute the kernel matrices for the small dataset
kernel_matrix_train = create_kernel_matrix(X_train, X_train)
kernel_matrix_test = create_kernel_matrix(X_test, X_train)

# Optuna for SVM optimization
def objective_svm(trial):
    # Hyperparameters to tune
    C = trial.suggest_loguniform('C', 1e-1, 100)
    gamma = trial.suggest_loguniform('gamma', 1e-4, 1e-1)

    # SVM with quantum kernel
    svc = SVC(kernel='precomputed', C=C, gamma=gamma)
    svc.fit(kernel_matrix_train, y_train)
    
    y_pred = svc.predict(kernel_matrix_test)
    return accuracy_score(y_test, y_pred)

# Run Optuna for best SVM hyperparameters
study = optuna.create_study(direction='maximize')
study.optimize(objective_svm, n_trials=5)  # Fewer trials for initial testing
best_params = study.best_params
print(f"Best SVM Parameters: {best_params}")

# Base models for stacking
svc_model = SVC(kernel='precomputed', C=best_params['C'], gamma=best_params['gamma'])
rf_model = RandomForestClassifier(n_estimators=10)  # Reduce estimators for testing

# Stacking classifier with SVM and RandomForest
stacking_clf = StackingClassifier(estimators=[('svc', svc_model), ('rf', rf_model)], final_estimator=LogisticRegression())
stacking_clf.fit(kernel_matrix_train, y_train)
y_pred_stack = stacking_clf.predict(kernel_matrix_test)
stacking_accuracy = accuracy_score(y_test, y_pred_stack)
print(f"Stacking Accuracy: {stacking_accuracy:.2f}")

# Bagging classifier using SVM as the base model
bagging_clf = BaggingClassifier(base_estimator=svc_model, n_estimators=5)  # Reduced estimators for speed
bagging_clf.fit(kernel_matrix_train, y_train)
y_pred_bag = bagging_clf.predict(kernel_matrix_test)
bagging_accuracy = accuracy_score(y_test, y_pred_bag)
print(f"Bagging Accuracy: {bagging_accuracy:.2f}")

# AdaBoost classifier with SVM as the base estimator
adaboost_clf = AdaBoostClassifier(base_estimator=svc_model, n_estimators=5)  # Reduced estimators for testing
adaboost_clf.fit(kernel_matrix_train, y_train)
y_pred_ada = adaboost_clf.predict(kernel_matrix_test)
adaboost_accuracy = accuracy_score(y_test, y_pred_ada)
print(f"AdaBoost Accuracy: {adaboost_accuracy:.2f}")


[I 2024-10-25 02:37:55,817] A new study created in memory with name: no-name-f7342c2b-b149-4062-b660-7775e239ece3
  C = trial.suggest_loguniform('C', 1e-1, 100)
  gamma = trial.suggest_loguniform('gamma', 1e-4, 1e-1)
[I 2024-10-25 02:37:55,818] Trial 0 finished with value: 0.717948717948718 and parameters: {'C': 5.617579379105943, 'gamma': 0.000967085126937672}. Best is trial 0 with value: 0.717948717948718.
  C = trial.suggest_loguniform('C', 1e-1, 100)
  gamma = trial.suggest_loguniform('gamma', 1e-4, 1e-1)
[I 2024-10-25 02:37:55,820] Trial 1 finished with value: 0.7435897435897436 and parameters: {'C': 0.3923268616638427, 'gamma': 0.00028939872766482305}. Best is trial 1 with value: 0.7435897435897436.
  C = trial.suggest_loguniform('C', 1e-1, 100)
  gamma = trial.suggest_loguniform('gamma', 1e-4, 1e-1)
[I 2024-10-25 02:37:55,821] Trial 2 finished with value: 0.7435897435897436 and parameters: {'C': 0.39974802445741486, 'gamma': 0.0015472932635270315}. Best is trial 1 with value: 0.

Best SVM Parameters: {'C': 0.3923268616638427, 'gamma': 0.00028939872766482305}
Stacking Accuracy: 0.74


TypeError: BaggingClassifier.__init__() got an unexpected keyword argument 'base_estimator'

In [16]:
pip install pytorch-tabnet


Collecting pytorch-tabnet
  Downloading pytorch_tabnet-4.1.0-py3-none-any.whl.metadata (15 kB)
Downloading pytorch_tabnet-4.1.0-py3-none-any.whl (44 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m44.5/44.5 kB[0m [31m1.0 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: pytorch-tabnet
Successfully installed pytorch-tabnet-4.1.0
Note: you may need to restart the kernel to use updated packages.


In [22]:
pip install catboost lightgbm transformers


Note: you may need to restart the kernel to use updated packages.


In [28]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler
from pytorch_tabnet.tab_model import TabNetClassifier
from catboost import CatBoostClassifier
from lightgbm import LGBMClassifier

# Load the dataset
url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/parkinsons/parkinsons.data'
df = pd.read_csv(url)

# Define features (X) and target (y)
X = df.drop(['name', 'status'], axis=1)
y = df['status']

# Standardize features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Split data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

# Convert data to np.array for PyTorch-based models
X_train_np, X_test_np = np.array(X_train), np.array(X_test)
y_train_np, y_test_np = np.array(y_train), np.array(y_test)

# 1. TabNet Implementation
# Initialize and train TabNet
tabnet_clf = TabNetClassifier(verbose=0)
tabnet_clf.fit(
    X_train_np, y_train_np,
    eval_set=[(X_test_np, y_test_np)],
    eval_metric=['accuracy'],
    patience=20,
    max_epochs=100,
)

# Predict and evaluate
y_pred_tabnet = tabnet_clf.predict(X_test_np)
tabnet_accuracy = accuracy_score(y_test_np, y_pred_tabnet)
print(f"TabNet Accuracy: {tabnet_accuracy:.2f}")

# 2. CatBoost Implementation
# Initialize and train CatBoost
catboost_clf = CatBoostClassifier(iterations=100, learning_rate=0.1, depth=6, verbose=0)
catboost_clf.fit(X_train, y_train)

# Predict and evaluate
y_pred_catboost = catboost_clf.predict(X_test)
catboost_accuracy = accuracy_score(y_test, y_pred_catboost)
print(f"CatBoost Accuracy: {catboost_accuracy:.2f}")

# 3. LightGBM Implementation
# Initialize and train LightGBM
lgbm_clf = LGBMClassifier(n_estimators=100, learning_rate=0.1, max_depth=6)
lgbm_clf.fit(X_train, y_train)

# Predict and evaluate
y_pred_lgbm = lgbm_clf.predict(X_test)
lgbm_accuracy = accuracy_score(y_test, y_pred_lgbm)
print(f"LightGBM Accuracy: {lgbm_accuracy:.2f}")

# Summary of Results
print(f"TabNet Accuracy: {tabnet_accuracy:.2f}")
print(f"CatBoost Accuracy: {catboost_accuracy:.2f}")
print(f"LightGBM Accuracy: {lgbm_accuracy:.2f}")



Early stopping occurred at epoch 20 with best_epoch = 0 and best_val_0_accuracy = 0.46154
TabNet Accuracy: 0.46
CatBoost Accuracy: 0.95
[LightGBM] [Info] Number of positive: 115, number of negative: 41
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000175 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1132
[LightGBM] [Info] Number of data points in the train set: 156, number of used features: 22
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.737179 -> initscore=1.031360
[LightGBM] [Info] Start training from score 1.031360




LightGBM Accuracy: 0.95
TabNet Accuracy: 0.46
CatBoost Accuracy: 0.95
LightGBM Accuracy: 0.95


In [32]:
pip install tab-transformer-pytorch


Collecting tab-transformer-pytorch
  Downloading tab_transformer_pytorch-0.3.0-py3-none-any.whl.metadata (690 bytes)
Collecting einops>=0.3 (from tab-transformer-pytorch)
  Downloading einops-0.8.0-py3-none-any.whl.metadata (12 kB)
Downloading tab_transformer_pytorch-0.3.0-py3-none-any.whl (6.9 kB)
Downloading einops-0.8.0-py3-none-any.whl (43 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m43.2/43.2 kB[0m [31m1.2 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: einops, tab-transformer-pytorch
Successfully installed einops-0.8.0 tab-transformer-pytorch-0.3.0
Note: you may need to restart the kernel to use updated packages.


In [40]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler
from tab_transformer_pytorch import TabTransformer
import torch
from torch.optim import Adam
from torch.nn import CrossEntropyLoss
from sklearn.preprocessing import LabelEncoder

# Load the dataset
url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/parkinsons/parkinsons.data'
df = pd.read_csv(url)

# Define features (X) and target (y)
X = df.drop(['name', 'status'], axis=1)
y = df['status']

# Standardize features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Convert target to numeric if it's not already (TabTransformer works best with integer labels)
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)

# Split data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y_encoded, test_size=0.2, random_state=42)

# Convert data to PyTorch tensors
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.long)
y_test_tensor = torch.tensor(y_test, dtype=torch.long)

# Initialize TabTransformer model
tab_transformer = TabTransformer(
    categories=(),               # No categorical features in this dataset
    num_continuous=X_train.shape[1],  # Number of continuous features
    dim=64,                       # Dimension of embeddings
    depth=6,                      # Depth of the transformer
    heads=8,                      # Number of attention heads
    dim_out=2                     # Number of classes (binary classification)
)

# Set up training parameters
optimizer = Adam(tab_transformer.parameters(), lr=1e-3)
criterion = CrossEntropyLoss()

# Training loop
epochs = 20
for epoch in range(epochs):
    tab_transformer.train()
    optimizer.zero_grad()
    
    # Pass an empty tensor for x_cat as there are no categorical features
    output = tab_transformer(torch.empty(X_train_tensor.shape[0], 0).long(), X_train_tensor)
    loss = criterion(output, y_train_tensor)
    loss.backward()
    optimizer.step()
    print(f'Epoch {epoch+1}/{epochs}, Loss: {loss.item()}')

# Evaluation
tab_transformer.eval()
with torch.no_grad():
    test_output = tab_transformer(torch.empty(X_test_tensor.shape[0], 0).long(), X_test_tensor)
    _, y_pred_tabtransformer = torch.max(test_output, 1)

# Calculate accuracy
tabtransformer_accuracy = accuracy_score(y_test_tensor, y_pred_tabtransformer)
print(f"TabTransformer Accuracy: {tabtransformer_accuracy:.2f}")


Epoch 1/20, Loss: 0.6824726462364197
Epoch 2/20, Loss: 0.6589516401290894
Epoch 3/20, Loss: 0.637179970741272
Epoch 4/20, Loss: 0.6168936491012573
Epoch 5/20, Loss: 0.5979650616645813
Epoch 6/20, Loss: 0.5800801515579224
Epoch 7/20, Loss: 0.5629091262817383
Epoch 8/20, Loss: 0.5464689135551453
Epoch 9/20, Loss: 0.5307313799858093
Epoch 10/20, Loss: 0.5156416893005371
Epoch 11/20, Loss: 0.5011964440345764
Epoch 12/20, Loss: 0.48726293444633484
Epoch 13/20, Loss: 0.47377175092697144
Epoch 14/20, Loss: 0.4607541859149933
Epoch 15/20, Loss: 0.44818252325057983
Epoch 16/20, Loss: 0.4361090660095215
Epoch 17/20, Loss: 0.4245056211948395
Epoch 18/20, Loss: 0.41336295008659363
Epoch 19/20, Loss: 0.40257224440574646
Epoch 20/20, Loss: 0.39220064878463745
TabTransformer Accuracy: 0.90


In [44]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler
from pytorch_tabnet.tab_model import TabNetClassifier
from catboost import CatBoostClassifier
from lightgbm import LGBMClassifier
from sklearn.model_selection import StratifiedKFold
import optuna

# Load the dataset
url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/parkinsons/parkinsons.data'
df = pd.read_csv(url)

# Define features (X) and target (y)
X = df.drop(['name', 'status'], axis=1)
y = df['status']

# Standardize features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Split data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

# Define a function to optimize with Optuna
def objective(trial):
    model_type = trial.suggest_categorical('model_type', ['CatBoost', 'LightGBM', 'TabNet'])
    
    if model_type == 'CatBoost':
        params = {
            'iterations': trial.suggest_int('iterations', 50, 200),
            'learning_rate': trial.suggest_loguniform('learning_rate', 0.01, 0.3),
            'depth': trial.suggest_int('depth', 4, 10)
        }
        model = CatBoostClassifier(**params, verbose=0)
    elif model_type == 'LightGBM':
        params = {
            'n_estimators': trial.suggest_int('n_estimators', 50, 200),
            'learning_rate': trial.suggest_loguniform('learning_rate', 0.01, 0.3),
            'max_depth': trial.suggest_int('max_depth', 4, 10)
        }
        model = LGBMClassifier(**params)
    else:
        params = {
            'n_d': trial.suggest_int('n_d', 8, 64),
            'n_a': trial.suggest_int('n_a', 8, 64),
            'n_steps': trial.suggest_int('n_steps', 3, 10),
            'gamma': trial.suggest_loguniform('gamma', 1e-3, 3),
            'lambda_sparse': trial.suggest_loguniform('lambda_sparse', 1e-3, 1)
        }
        model = TabNetClassifier(**params, verbose=0)
    
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    return accuracy_score(y_test, y_pred)

# Run Optuna for hyperparameter optimization
study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=50)
best_params = study.best_trial.params

# Initialize models with best parameters
catboost = CatBoostClassifier(iterations=best_params.get('iterations', 100), 
                              learning_rate=best_params.get('learning_rate', 0.1), 
                              depth=best_params.get('depth', 6), 
                              verbose=0)

lgbm = LGBMClassifier(n_estimators=best_params.get('n_estimators', 100), 
                      learning_rate=best_params.get('learning_rate', 0.1), 
                      max_depth=best_params.get('max_depth', 6))

tabnet = TabNetClassifier(n_d=best_params.get('n_d', 16), 
                          n_a=best_params.get('n_a', 16), 
                          n_steps=best_params.get('n_steps', 5), 
                          gamma=best_params.get('gamma', 1.3), 
                          lambda_sparse=best_params.get('lambda_sparse', 1e-3), 
                          verbose=0)

# Fit each model
catboost.fit(X_train, y_train)
lgbm.fit(X_train, y_train)
tabnet.fit(X_train, y_train)

# Predict probabilities for soft voting
catboost_probs = catboost.predict_proba(X_test)
lgbm_probs = lgbm.predict_proba(X_test)
tabnet_probs = tabnet.predict_proba(X_test)

# Averaging probabilities (soft voting)
average_probs = (catboost_probs + lgbm_probs + tabnet_probs) / 3
y_pred_ensemble = np.argmax(average_probs, axis=1)

# Calculate ensemble accuracy
ensemble_accuracy = accuracy_score(y_test, y_pred_ensemble)
print(f"Ensemble Model Accuracy: {ensemble_accuracy:.2f}")


[I 2024-10-25 03:12:01,496] A new study created in memory with name: no-name-fc4e309c-a641-4984-a1d4-92c07ead5541
  'learning_rate': trial.suggest_loguniform('learning_rate', 0.01, 0.3),
[I 2024-10-25 03:12:01,554] Trial 0 finished with value: 0.9230769230769231 and parameters: {'model_type': 'LightGBM', 'n_estimators': 120, 'learning_rate': 0.12001835532550631, 'max_depth': 7}. Best is trial 0 with value: 0.9230769230769231.
  'learning_rate': trial.suggest_loguniform('learning_rate', 0.01, 0.3),
[I 2024-10-25 03:12:01,581] Trial 1 finished with value: 0.9487179487179487 and parameters: {'model_type': 'LightGBM', 'n_estimators': 128, 'learning_rate': 0.07050186241724277, 'max_depth': 4}. Best is trial 1 with value: 0.9487179487179487.
  'learning_rate': trial.suggest_loguniform('learning_rate', 0.01, 0.3),
[I 2024-10-25 03:12:01,644] Trial 2 finished with value: 0.9487179487179487 and parameters: {'model_type': 'CatBoost', 'iterations': 131, 'learning_rate': 0.04413916880593815, 'dept

[LightGBM] [Info] Number of positive: 115, number of negative: 41
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000264 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1132
[LightGBM] [Info] Number of data points in the train set: 156, number of used features: 22
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.737179 -> initscore=1.031360
[LightGBM] [Info] Start training from score 1.031360
[LightGBM] [Info] Number of positive: 115, number of negative: 41
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000162 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1132
[LightGBM] [Info] Number of data points in the train set: 156, number of used features: 22
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.737179 -> initscore=1.031360
[LightGBM] [Info] S

[I 2024-10-25 03:12:01,728] Trial 3 finished with value: 0.9230769230769231 and parameters: {'model_type': 'CatBoost', 'iterations': 78, 'learning_rate': 0.04155014378458823, 'depth': 7}. Best is trial 1 with value: 0.9487179487179487.
  'learning_rate': trial.suggest_loguniform('learning_rate', 0.01, 0.3),
[I 2024-10-25 03:12:01,971] Trial 4 finished with value: 0.9487179487179487 and parameters: {'model_type': 'CatBoost', 'iterations': 181, 'learning_rate': 0.017583687827446622, 'depth': 8}. Best is trial 1 with value: 0.9487179487179487.
  'gamma': trial.suggest_loguniform('gamma', 1e-3, 3),
  'lambda_sparse': trial.suggest_loguniform('lambda_sparse', 1e-3, 1)
[I 2024-10-25 03:12:02,025] Trial 5 finished with value: 0.5641025641025641 and parameters: {'model_type': 'TabNet', 'n_d': 49, 'n_a': 55, 'n_steps': 8, 'gamma': 0.0010273919020282372, 'lambda_sparse': 0.007735037818597104}. Best is trial 1 with value: 0.9487179487179487.
  'learning_rate': trial.suggest_loguniform('learning_r

[LightGBM] [Info] Number of positive: 115, number of negative: 41
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000206 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1132
[LightGBM] [Info] Number of data points in the train set: 156, number of used features: 22
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.737179 -> initscore=1.031360
[LightGBM] [Info] Start training from score 1.031360
[LightGBM] [Info] Number of positive: 115, number of negative: 41
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000117 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1132
[LightGBM] [Info] Number of data points in the train set: 156, number of used features: 22
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.737179 -> initscore=1.031360
[LightGBM] [Info] Start training from score 1.031360
[LightGBM] [Info] Number of po

[I 2024-10-25 03:12:02,532] Trial 12 finished with value: 0.9487179487179487 and parameters: {'model_type': 'CatBoost', 'iterations': 125, 'learning_rate': 0.039380256035044844, 'depth': 4}. Best is trial 1 with value: 0.9487179487179487.
  'learning_rate': trial.suggest_loguniform('learning_rate', 0.01, 0.3),
[I 2024-10-25 03:12:02,564] Trial 13 finished with value: 0.9487179487179487 and parameters: {'model_type': 'LightGBM', 'n_estimators': 51, 'learning_rate': 0.07635056075789659, 'max_depth': 6}. Best is trial 1 with value: 0.9487179487179487.
  'gamma': trial.suggest_loguniform('gamma', 1e-3, 3),
  'lambda_sparse': trial.suggest_loguniform('lambda_sparse', 1e-3, 1)
[I 2024-10-25 03:12:02,597] Trial 14 finished with value: 0.5641025641025641 and parameters: {'model_type': 'TabNet', 'n_d': 62, 'n_a': 8, 'n_steps': 3, 'gamma': 2.7419542742264125, 'lambda_sparse': 0.4445221253388422}. Best is trial 1 with value: 0.9487179487179487.
  'learning_rate': trial.suggest_loguniform('learnin

[LightGBM] [Info] Number of positive: 115, number of negative: 41
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000125 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1132
[LightGBM] [Info] Number of data points in the train set: 156, number of used features: 22
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.737179 -> initscore=1.031360
[LightGBM] [Info] Start training from score 1.031360
[LightGBM] [Info] Number of positive: 115, number of negative: 41
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000124 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1132
[LightGBM] [Info] Number of data points in the train set: 156, number of used features: 22
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.737179 -> initscore=1.031360
[LightGBM] [Info] Start training from score 1.031360


[I 2024-10-25 03:12:02,769] Trial 16 finished with value: 0.9230769230769231 and parameters: {'model_type': 'LightGBM', 'n_estimators': 138, 'learning_rate': 0.011363644181058659, 'max_depth': 10}. Best is trial 1 with value: 0.9487179487179487.
  'learning_rate': trial.suggest_loguniform('learning_rate', 0.01, 0.3),
[I 2024-10-25 03:12:02,804] Trial 17 finished with value: 0.9487179487179487 and parameters: {'model_type': 'LightGBM', 'n_estimators': 140, 'learning_rate': 0.06653203381804636, 'max_depth': 6}. Best is trial 1 with value: 0.9487179487179487.
  'learning_rate': trial.suggest_loguniform('learning_rate', 0.01, 0.3),


[LightGBM] [Info] Number of positive: 115, number of negative: 41
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000274 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1132
[LightGBM] [Info] Number of data points in the train set: 156, number of used features: 22
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.737179 -> initscore=1.031360
[LightGBM] [Info] Start training from score 1.031360


[I 2024-10-25 03:12:03,231] Trial 18 finished with value: 0.9487179487179487 and parameters: {'model_type': 'CatBoost', 'iterations': 119, 'learning_rate': 0.026945073618108904, 'depth': 10}. Best is trial 1 with value: 0.9487179487179487.
  'gamma': trial.suggest_loguniform('gamma', 1e-3, 3),
  'lambda_sparse': trial.suggest_loguniform('lambda_sparse', 1e-3, 1)
[I 2024-10-25 03:12:03,265] Trial 19 finished with value: 0.717948717948718 and parameters: {'model_type': 'TabNet', 'n_d': 20, 'n_a': 59, 'n_steps': 4, 'gamma': 0.004445865833526508, 'lambda_sparse': 0.21079829553562832}. Best is trial 1 with value: 0.9487179487179487.
  'learning_rate': trial.suggest_loguniform('learning_rate', 0.01, 0.3),
[I 2024-10-25 03:12:03,345] Trial 20 finished with value: 0.9487179487179487 and parameters: {'model_type': 'CatBoost', 'iterations': 153, 'learning_rate': 0.1829655761580238, 'depth': 5}. Best is trial 1 with value: 0.9487179487179487.
  'learning_rate': trial.suggest_loguniform('learning_

[LightGBM] [Info] Number of positive: 115, number of negative: 41
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000093 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1132
[LightGBM] [Info] Number of data points in the train set: 156, number of used features: 22
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.737179 -> initscore=1.031360
[LightGBM] [Info] Start training from score 1.031360


[I 2024-10-25 03:12:04,489] Trial 26 finished with value: 0.9487179487179487 and parameters: {'model_type': 'CatBoost', 'iterations': 101, 'learning_rate': 0.0563001837722748, 'depth': 9}. Best is trial 1 with value: 0.9487179487179487.
  'learning_rate': trial.suggest_loguniform('learning_rate', 0.01, 0.3),
[I 2024-10-25 03:12:04,607] Trial 27 finished with value: 0.9487179487179487 and parameters: {'model_type': 'CatBoost', 'iterations': 177, 'learning_rate': 0.01429427392426685, 'depth': 6}. Best is trial 1 with value: 0.9487179487179487.
  'gamma': trial.suggest_loguniform('gamma', 1e-3, 3),
  'lambda_sparse': trial.suggest_loguniform('lambda_sparse', 1e-3, 1)
[I 2024-10-25 03:12:04,661] Trial 28 finished with value: 0.23076923076923078 and parameters: {'model_type': 'TabNet', 'n_d': 34, 'n_a': 34, 'n_steps': 6, 'gamma': 0.033493466588915964, 'lambda_sparse': 0.04549595146554684}. Best is trial 1 with value: 0.9487179487179487.
  'learning_rate': trial.suggest_loguniform('learning_

[LightGBM] [Info] Number of positive: 115, number of negative: 41
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000119 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1132
[LightGBM] [Info] Number of data points in the train set: 156, number of used features: 22
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.737179 -> initscore=1.031360
[LightGBM] [Info] Start training from score 1.031360
[LightGBM] [Info] Number of positive: 115, number of negative: 41
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000137 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1132
[LightGBM] [Info] Number of data points in the train set: 156, number of used features: 22
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.737179 -> initscore=1.031360
[LightGBM] [Info] Start training from score 1.031360


[I 2024-10-25 03:12:05,026] Trial 31 finished with value: 0.9487179487179487 and parameters: {'model_type': 'CatBoost', 'iterations': 199, 'learning_rate': 0.0215727267949199, 'depth': 8}. Best is trial 1 with value: 0.9487179487179487.
  'learning_rate': trial.suggest_loguniform('learning_rate', 0.01, 0.3),
[I 2024-10-25 03:12:05,296] Trial 32 finished with value: 0.9487179487179487 and parameters: {'model_type': 'CatBoost', 'iterations': 180, 'learning_rate': 0.01694163468735973, 'depth': 8}. Best is trial 1 with value: 0.9487179487179487.
  'learning_rate': trial.suggest_loguniform('learning_rate', 0.01, 0.3),
[I 2024-10-25 03:12:05,597] Trial 33 finished with value: 0.9487179487179487 and parameters: {'model_type': 'CatBoost', 'iterations': 141, 'learning_rate': 0.025048365061294264, 'depth': 9}. Best is trial 1 with value: 0.9487179487179487.
  'learning_rate': trial.suggest_loguniform('learning_rate', 0.01, 0.3),
[I 2024-10-25 03:12:05,844] Trial 34 finished with value: 0.9487179

[LightGBM] [Info] Number of positive: 115, number of negative: 41
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000166 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1132
[LightGBM] [Info] Number of data points in the train set: 156, number of used features: 22
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.737179 -> initscore=1.031360
[LightGBM] [Info] Start training from score 1.031360
[LightGBM] [Info] Number of positive: 115, number of negative: 41
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000091 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1132
[LightGBM] [Info] Number of data points in the train set: 156, number of used features: 22
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.737179 -> initscore=1.031360
[LightGBM] [Info] Start training from score 1.031360


  'learning_rate': trial.suggest_loguniform('learning_rate', 0.01, 0.3),
[I 2024-10-25 03:12:06,863] Trial 41 finished with value: 0.9487179487179487 and parameters: {'model_type': 'LightGBM', 'n_estimators': 85, 'learning_rate': 0.14720385017373222, 'max_depth': 10}. Best is trial 1 with value: 0.9487179487179487.
  'learning_rate': trial.suggest_loguniform('learning_rate', 0.01, 0.3),
[I 2024-10-25 03:12:06,892] Trial 42 finished with value: 0.9487179487179487 and parameters: {'model_type': 'LightGBM', 'n_estimators': 84, 'learning_rate': 0.21754763364716026, 'max_depth': 9}. Best is trial 1 with value: 0.9487179487179487.
  'learning_rate': trial.suggest_loguniform('learning_rate', 0.01, 0.3),
[I 2024-10-25 03:12:06,928] Trial 43 finished with value: 0.9487179487179487 and parameters: {'model_type': 'LightGBM', 'n_estimators': 104, 'learning_rate': 0.26936200586730696, 'max_depth': 9}. Best is trial 1 with value: 0.9487179487179487.
  'learning_rate': trial.suggest_loguniform('learn

[LightGBM] [Info] Number of positive: 115, number of negative: 41
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000120 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1132
[LightGBM] [Info] Number of data points in the train set: 156, number of used features: 22
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.737179 -> initscore=1.031360
[LightGBM] [Info] Start training from score 1.031360
[LightGBM] [Info] Number of positive: 115, number of negative: 41
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000064 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1132
[LightGBM] [Info] Number of data points in the train set: 156, number of used features: 22
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.737179 -> initscore=1.031360
[LightGBM] [Info] Start training from score 1.031360
[LightGBM] [Info] Number of po

[I 2024-10-25 03:12:07,070] Trial 46 finished with value: 0.9487179487179487 and parameters: {'model_type': 'LightGBM', 'n_estimators': 125, 'learning_rate': 0.043094184258302776, 'max_depth': 6}. Best is trial 1 with value: 0.9487179487179487.
  'learning_rate': trial.suggest_loguniform('learning_rate', 0.01, 0.3),




[I 2024-10-25 03:12:07,355] Trial 47 finished with value: 0.9487179487179487 and parameters: {'model_type': 'CatBoost', 'iterations': 117, 'learning_rate': 0.1583506508059041, 'depth': 9}. Best is trial 1 with value: 0.9487179487179487.
  'learning_rate': trial.suggest_loguniform('learning_rate', 0.01, 0.3),
[I 2024-10-25 03:12:07,374] Trial 48 finished with value: 0.9487179487179487 and parameters: {'model_type': 'LightGBM', 'n_estimators': 52, 'learning_rate': 0.06550592317208484, 'max_depth': 9}. Best is trial 1 with value: 0.9487179487179487.
  'learning_rate': trial.suggest_loguniform('learning_rate', 0.01, 0.3),
[I 2024-10-25 03:12:07,538] Trial 49 finished with value: 0.9487179487179487 and parameters: {'model_type': 'CatBoost', 'iterations': 153, 'learning_rate': 0.013670380893471, 'depth': 7}. Best is trial 1 with value: 0.9487179487179487.


[LightGBM] [Info] Number of positive: 115, number of negative: 41
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000092 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1132
[LightGBM] [Info] Number of data points in the train set: 156, number of used features: 22
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.737179 -> initscore=1.031360
[LightGBM] [Info] Start training from score 1.031360
[LightGBM] [Info] Number of positive: 115, number of negative: 41
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000203 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1132
[LightGBM] [Info] Number of data points in the train set: 156, number of used features: 22
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.737179 -> initscore=1.031360
[LightGBM] [Info] Start training from score 1.031360
Ensemble Model Accuracy: 0.95




In [46]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score
from torch.optim import Adam
from torch.optim.lr_scheduler import StepLR
from sklearn.preprocessing import LabelEncoder

# Load and preprocess the dataset
url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/parkinsons/parkinsons.data'
df = pd.read_csv(url)

# Define features (X) and target (y)
X = df.drop(['name', 'status'], axis=1)
y = df['status']

# Standardize features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Convert target to numeric
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)

# Split data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y_encoded, test_size=0.2, random_state=42)

# Convert data to PyTorch tensors
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.long)
y_test_tensor = torch.tensor(y_test, dtype=torch.long)

# Custom Transformer Model
class TabularTransformer(nn.Module):
    def __init__(self, input_dim, num_classes, d_model=64, nhead=4, num_layers=4, dim_feedforward=128, dropout=0.1):
        super(TabularTransformer, self).__init__()
        self.embedding = nn.Linear(input_dim, d_model)
        encoder_layer = nn.TransformerEncoderLayer(d_model=d_model, nhead=nhead, dim_feedforward=dim_feedforward, dropout=dropout)
        self.transformer_encoder = nn.TransformerEncoder(encoder_layer, num_layers=num_layers)
        self.fc = nn.Linear(d_model, num_classes)
        self.dropout = nn.Dropout(dropout)

    def forward(self, x):
        x = self.embedding(x)
        x = self.transformer_encoder(x.unsqueeze(1)).squeeze(1)
        x = self.dropout(x)
        return self.fc(x)

# Initialize the Transformer model
input_dim = X_train.shape[1]
num_classes = len(np.unique(y_train))
model = TabularTransformer(input_dim=input_dim, num_classes=num_classes, d_model=64, nhead=4, num_layers=6, dim_feedforward=128, dropout=0.2)

# Set up training parameters
optimizer = Adam(model.parameters(), lr=0.001, weight_decay=1e-4)
criterion = nn.CrossEntropyLoss()
scheduler = StepLR(optimizer, step_size=10, gamma=0.5)  # Reduces learning rate by half every 10 epochs

# Training loop
epochs = 50  # Set higher for actual training
for epoch in range(epochs):
    model.train()
    optimizer.zero_grad()
    
    outputs = model(X_train_tensor)
    loss = criterion(outputs, y_train_tensor)
    loss.backward()
    optimizer.step()
    scheduler.step()  # Adjust learning rate
    
    if (epoch + 1) % 10 == 0:
        print(f'Epoch {epoch+1}/{epochs}, Loss: {loss.item()}')

# Evaluation
model.eval()
with torch.no_grad():
    test_outputs = model(X_test_tensor)
    _, y_pred = torch.max(test_outputs, 1)

# Calculate accuracy
accuracy = accuracy_score(y_test_tensor, y_pred)
print(f"Transformer Model Accuracy: {accuracy:.4f}")




Epoch 10/50, Loss: 0.3902304172515869
Epoch 20/50, Loss: 0.29607486724853516
Epoch 30/50, Loss: 0.23847359418869019
Epoch 40/50, Loss: 0.20130693912506104
Epoch 50/50, Loss: 0.20234918594360352
Transformer Model Accuracy: 0.7949


In [50]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score
from torch.optim import Adam
from torch.optim.lr_scheduler import ReduceLROnPlateau
from sklearn.preprocessing import LabelEncoder
from torch.utils.data import DataLoader, TensorDataset

# Load and preprocess the dataset
url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/parkinsons/parkinsons.data'
df = pd.read_csv(url)

# Define features (X) and target (y)
X = df.drop(['name', 'status'], axis=1)
y = df['status']

# Standardize features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Convert target to numeric
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)

# Split data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y_encoded, test_size=0.2, random_state=42)

# Convert data to PyTorch tensors and create DataLoader for batch training
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.long)
y_test_tensor = torch.tensor(y_test, dtype=torch.long)

train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)

# Custom Transformer Model
class TabularTransformer(nn.Module):
    def __init__(self, input_dim, num_classes, d_model=128, nhead=8, num_layers=8, dim_feedforward=256, dropout=0.3):
        super(TabularTransformer, self).__init__()
        self.embedding = nn.Linear(input_dim, d_model)
        encoder_layer = nn.TransformerEncoderLayer(d_model=d_model, nhead=nhead, dim_feedforward=dim_feedforward, dropout=dropout)
        self.transformer_encoder = nn.TransformerEncoder(encoder_layer, num_layers=num_layers)
        self.fc = nn.Linear(d_model, num_classes)
        self.dropout = nn.Dropout(dropout)

    def forward(self, x):
        x = self.embedding(x)
        x = self.transformer_encoder(x.unsqueeze(1)).squeeze(1)
        x = self.dropout(x)
        return self.fc(x)

# Initialize the Transformer model
input_dim = X_train.shape[1]
num_classes = len(np.unique(y_train))
model = TabularTransformer(input_dim=input_dim, num_classes=num_classes, d_model=128, nhead=8, num_layers=8, dim_feedforward=256, dropout=0.3)

# Set up training parameters
optimizer = Adam(model.parameters(), lr=0.0005, weight_decay=1e-4)
criterion = nn.CrossEntropyLoss()
scheduler = ReduceLROnPlateau(optimizer, mode='max', factor=0.5, patience=5, verbose=True)  # Decays learning rate when performance plateaus

# Training loop with batch training
epochs = 400
for epoch in range(epochs):
    model.train()
    total_loss = 0
    for X_batch, y_batch in train_loader:
        optimizer.zero_grad()
        outputs = model(X_batch)
        loss = criterion(outputs, y_batch)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    
    # Evaluate on the test set every 10 epochs
    if (epoch + 1) % 10 == 0:
        model.eval()
        with torch.no_grad():
            test_outputs = model(X_test_tensor)
            _, y_pred = torch.max(test_outputs, 1)
            accuracy = accuracy_score(y_test_tensor, y_pred)
            print(f"Epoch {epoch+1}/{epochs}, Loss: {total_loss:.4f}, Test Accuracy: {accuracy:.4f}")
        
        # Adjust learning rate if validation accuracy plateaus
        scheduler.step(accuracy)

# Final Evaluation
model.eval()
with torch.no_grad():
    final_outputs = model(X_test_tensor)
    _, final_preds = torch.max(final_outputs, 1)
final_accuracy = accuracy_score(y_test_tensor, final_preds)
print(f"Final Transformer Model Accuracy: {final_accuracy:.4f}")




Epoch 10/400, Loss: 1.7359, Test Accuracy: 0.8718
Epoch 20/400, Loss: 2.3522, Test Accuracy: 0.9231
Epoch 30/400, Loss: 1.3104, Test Accuracy: 0.8718
Epoch 40/400, Loss: 0.5605, Test Accuracy: 0.9231
Epoch 50/400, Loss: 1.1885, Test Accuracy: 0.9231
Epoch 60/400, Loss: 0.9268, Test Accuracy: 0.8462
Epoch 70/400, Loss: 0.6818, Test Accuracy: 0.8974
Epoch 80/400, Loss: 0.5625, Test Accuracy: 0.8974
Epoch 90/400, Loss: 0.3058, Test Accuracy: 0.9231
Epoch 100/400, Loss: 0.0810, Test Accuracy: 0.8974
Epoch 110/400, Loss: 0.3100, Test Accuracy: 0.8974
Epoch 120/400, Loss: 0.0305, Test Accuracy: 0.8974
Epoch 130/400, Loss: 0.9550, Test Accuracy: 0.8974
Epoch 140/400, Loss: 0.2766, Test Accuracy: 0.8974
Epoch 150/400, Loss: 0.0156, Test Accuracy: 0.9487
Epoch 160/400, Loss: 0.0134, Test Accuracy: 0.9231
Epoch 170/400, Loss: 0.0094, Test Accuracy: 0.9231
Epoch 180/400, Loss: 0.0093, Test Accuracy: 0.9231
Epoch 190/400, Loss: 0.0085, Test Accuracy: 0.9231
Epoch 200/400, Loss: 0.0090, Test Accura

In [54]:
import optuna
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score
from torch.optim import Adam
from torch.utils.data import DataLoader, TensorDataset
from torch.optim.lr_scheduler import ReduceLROnPlateau
from sklearn.preprocessing import LabelEncoder

# Load and preprocess the dataset
url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/parkinsons/parkinsons.data'
df = pd.read_csv(url)

X = df.drop(['name', 'status'], axis=1)
y = df['status']

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)

X_train, X_test, y_train, y_test = train_test_split(X_scaled, y_encoded, test_size=0.2, random_state=42)

# Convert data to PyTorch tensors and create DataLoader
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.long)
y_test_tensor = torch.tensor(y_test, dtype=torch.long)

train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)

# Define the Transformer model with Optuna hyperparameters
class TabularTransformer(nn.Module):
    def __init__(self, input_dim, num_classes, d_model=128, nhead=8, num_layers=6, dim_feedforward=256, dropout=0.3):
        super(TabularTransformer, self).__init__()
        self.embedding = nn.Linear(input_dim, d_model)
        encoder_layer = nn.TransformerEncoderLayer(d_model=d_model, nhead=nhead, dim_feedforward=dim_feedforward, dropout=dropout)
        self.transformer_encoder = nn.TransformerEncoder(encoder_layer, num_layers=num_layers)
        self.fc = nn.Linear(d_model, num_classes)
        self.dropout = nn.Dropout(dropout)

    def forward(self, x):
        x = self.embedding(x)
        x = self.transformer_encoder(x.unsqueeze(1)).squeeze(1)
        x = self.dropout(x)
        return self.fc(x)

# Define objective function for Optuna to optimize Transformer hyperparameters
def objective(trial):
    # Ensure d_model is divisible by nhead
    d_model = trial.suggest_categorical('d_model', [64, 128, 256])
    nhead_options = [h for h in range(2, 9) if d_model % h == 0]  # Only select nhead values that divide d_model
    nhead = trial.suggest_categorical('nhead', nhead_options)
    
    num_layers = trial.suggest_int('num_layers', 4, 8)
    dim_feedforward = trial.suggest_int('dim_feedforward', 128, 512)
    dropout = trial.suggest_uniform('dropout', 0.1, 0.5)
    lr = trial.suggest_loguniform('lr', 1e-5, 1e-3)
    
    model = TabularTransformer(input_dim=X_train.shape[1], num_classes=len(np.unique(y_train)),
                               d_model=d_model, nhead=nhead, num_layers=num_layers,
                               dim_feedforward=dim_feedforward, dropout=dropout)

    optimizer = Adam(model.parameters(), lr=lr, weight_decay=1e-4)
    criterion = nn.CrossEntropyLoss()
    scheduler = ReduceLROnPlateau(optimizer, mode='max', factor=0.5, patience=5, verbose=True)
    
    epochs = 100
    for epoch in range(epochs):
        model.train()
        total_loss = 0
        for X_batch, y_batch in train_loader:
            optimizer.zero_grad()
            outputs = model(X_batch)
            loss = criterion(outputs, y_batch)
            loss.backward()
            optimizer.step()
            total_loss += loss.item()

        if (epoch + 1) % 10 == 0:
            model.eval()
            with torch.no_grad():
                test_outputs = model(X_test_tensor)
                _, y_pred = torch.max(test_outputs, 1)
                accuracy = accuracy_score(y_test_tensor, y_pred)
            scheduler.step(accuracy)
            trial.report(accuracy, epoch)
            if trial.should_prune():
                raise optuna.exceptions.TrialPruned()
                
    return accuracy


# Run Optuna to find the best hyperparameters
study = optuna.create_study(direction="maximize")
study.optimize(objective, n_trials=50)

# Train the model with the best parameters from Optuna
best_params = study.best_params
print("Best Parameters:", best_params)

# Initialize model with best parameters
model = TabularTransformer(input_dim=X_train.shape[1], num_classes=len(np.unique(y_train)),
                           d_model=best_params['d_model'], nhead=best_params['nhead'],
                           num_layers=best_params['num_layers'], dim_feedforward=best_params['dim_feedforward'],
                           dropout=best_params['dropout'])

# Set up optimizer and scheduler
optimizer = Adam(model.parameters(), lr=best_params['lr'], weight_decay=1e-4)
criterion = nn.CrossEntropyLoss()
scheduler = ReduceLROnPlateau(optimizer, mode='max', factor=0.5, patience=5, verbose=True)

# Training loop with optimal parameters
epochs = 150
for epoch in range(epochs):
    model.train()
    total_loss = 0
    for X_batch, y_batch in train_loader:
        optimizer.zero_grad()
        outputs = model(X_batch)
        loss = criterion(outputs, y_batch)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    
    if (epoch + 1) % 10 == 0:
        model.eval()
        with torch.no_grad():
            test_outputs = model(X_test_tensor)
            _, y_pred = torch.max(test_outputs, 1)
            accuracy = accuracy_score(y_test_tensor, y_pred)
            print(f"Epoch {epoch+1}/{epochs}, Loss: {total_loss:.4f}, Test Accuracy: {accuracy:.4f}")
        scheduler.step(accuracy)

# Final evaluation
model.eval()
with torch.no_grad():
    final_outputs = model(X_test_tensor)
    _, final_preds = torch.max(final_outputs, 1)
final_accuracy = accuracy_score(y_test_tensor, final_preds)
print(f"Final Transformer Model Accuracy: {final_accuracy:.4f}")


[I 2024-10-25 03:29:28,935] A new study created in memory with name: no-name-45532766-431d-4364-9dce-ce5029838225
  dropout = trial.suggest_uniform('dropout', 0.1, 0.5)
  lr = trial.suggest_loguniform('lr', 1e-5, 1e-3)
[I 2024-10-25 03:29:35,691] Trial 0 finished with value: 0.8974358974358975 and parameters: {'d_model': 64, 'nhead': 8, 'num_layers': 6, 'dim_feedforward': 378, 'dropout': 0.2844681673320026, 'lr': 0.00017553522233928757}. Best is trial 0 with value: 0.8974358974358975.
  dropout = trial.suggest_uniform('dropout', 0.1, 0.5)
  lr = trial.suggest_loguniform('lr', 1e-5, 1e-3)
[I 2024-10-25 03:29:44,044] Trial 1 finished with value: 0.9487179487179487 and parameters: {'d_model': 64, 'nhead': 8, 'num_layers': 8, 'dim_feedforward': 192, 'dropout': 0.18871778166281739, 'lr': 0.0006417350491503361}. Best is trial 1 with value: 0.9487179487179487.
  dropout = trial.suggest_uniform('dropout', 0.1, 0.5)
  lr = trial.suggest_loguniform('lr', 1e-5, 1e-3)
[I 2024-10-25 03:29:50,480] T

Best Parameters: {'d_model': 64, 'nhead': 8, 'num_layers': 8, 'dim_feedforward': 354, 'dropout': 0.2691263732301931, 'lr': 0.0005501979293251903}
Epoch 10/150, Loss: 1.9132, Test Accuracy: 0.8462
Epoch 20/150, Loss: 1.5467, Test Accuracy: 0.9487
Epoch 30/150, Loss: 0.7367, Test Accuracy: 0.9231
Epoch 40/150, Loss: 1.2467, Test Accuracy: 0.9487
Epoch 50/150, Loss: 0.8075, Test Accuracy: 0.8974
Epoch 60/150, Loss: 0.0400, Test Accuracy: 0.8974
Epoch 70/150, Loss: 0.2353, Test Accuracy: 0.8974
Epoch 80/150, Loss: 0.0236, Test Accuracy: 0.8974
Epoch 90/150, Loss: 0.2781, Test Accuracy: 0.9231
Epoch 100/150, Loss: 0.0245, Test Accuracy: 0.8974
Epoch 110/150, Loss: 0.0162, Test Accuracy: 0.8974
Epoch 120/150, Loss: 0.0132, Test Accuracy: 0.8974
Epoch 130/150, Loss: 0.0116, Test Accuracy: 0.8974
Epoch 140/150, Loss: 0.0130, Test Accuracy: 0.8974
Epoch 150/150, Loss: 0.0136, Test Accuracy: 0.8974
Final Transformer Model Accuracy: 0.8974


In [56]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score
from torch.optim import Adam
from sklearn.ensemble import VotingClassifier
from torch.optim.lr_scheduler import ReduceLROnPlateau
from sklearn.preprocessing import LabelEncoder
from catboost import CatBoostClassifier
from lightgbm import LGBMClassifier
from torch.utils.data import DataLoader, TensorDataset

# Load and preprocess the dataset
url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/parkinsons/parkinsons.data'
df = pd.read_csv(url)

X = df.drop(['name', 'status'], axis=1)
y = df['status']

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)

X_train, X_test, y_train, y_test = train_test_split(X_scaled, y_encoded, test_size=0.2, random_state=42)

# Convert data to PyTorch tensors and create DataLoader for Transformer model
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.long)
y_test_tensor = torch.tensor(y_test, dtype=torch.long)

train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)

# Transformer Model
class TabularTransformer(nn.Module):
    def __init__(self, input_dim, num_classes, d_model=64, nhead=8, num_layers=8, dim_feedforward=256, dropout=0.3):
        super(TabularTransformer, self).__init__()
        self.embedding = nn.Linear(input_dim, d_model)
        encoder_layer = nn.TransformerEncoderLayer(d_model=d_model, nhead=nhead, dim_feedforward=dim_feedforward, dropout=dropout)
        self.transformer_encoder = nn.TransformerEncoder(encoder_layer, num_layers=num_layers)
        self.fc = nn.Linear(d_model, num_classes)
        self.dropout = nn.Dropout(dropout)

    def forward(self, x):
        x = self.embedding(x)
        x = self.transformer_encoder(x.unsqueeze(1)).squeeze(1)
        x = self.dropout(x)
        return self.fc(x)

# Initialize Transformer model with tuned parameters
transformer_model = TabularTransformer(input_dim=X_train.shape[1], num_classes=len(np.unique(y_train)),
                                       d_model=64, nhead=8, num_layers=8, dim_feedforward=354, dropout=0.3)

# Training setup for Transformer
optimizer = Adam(transformer_model.parameters(), lr=0.0005, weight_decay=1e-4)
criterion = nn.CrossEntropyLoss()
scheduler = ReduceLROnPlateau(optimizer, mode='max', factor=0.5, patience=5, verbose=True)

# Training loop with gradient clipping
epochs = 100
for epoch in range(epochs):
    transformer_model.train()
    total_loss = 0
    for X_batch, y_batch in train_loader:
        optimizer.zero_grad()
        outputs = transformer_model(X_batch)
        loss = criterion(outputs, y_batch)
        loss.backward()
        
        # Gradient clipping
        torch.nn.utils.clip_grad_norm_(transformer_model.parameters(), max_norm=1.0)
        optimizer.step()
        
        total_loss += loss.item()

    if (epoch + 1) % 10 == 0:
        transformer_model.eval()
        with torch.no_grad():
            test_outputs = transformer_model(X_test_tensor)
            _, y_pred = torch.max(test_outputs, 1)
            accuracy = accuracy_score(y_test_tensor, y_pred)
            print(f"Epoch {epoch+1}/{epochs}, Loss: {total_loss:.4f}, Test Accuracy: {accuracy:.4f}")
        scheduler.step(accuracy)

# Transformer model evaluation
transformer_model.eval()
with torch.no_grad():
    final_outputs = transformer_model(X_test_tensor)
    _, transformer_preds = torch.max(final_outputs, 1)
transformer_accuracy = accuracy_score(y_test_tensor, transformer_preds)
print(f"Transformer Model Accuracy: {transformer_accuracy:.4f}")

# CatBoost and LightGBM Models
catboost_clf = CatBoostClassifier(iterations=200, learning_rate=0.1, depth=6, verbose=0)
catboost_clf.fit(X_train, y_train)
lgbm_clf = LGBMClassifier(n_estimators=200, learning_rate=0.1, max_depth=6)
lgbm_clf.fit(X_train, y_train)

# Soft voting ensemble
catboost_probs = catboost_clf.predict_proba(X_test)
lgbm_probs = lgbm_clf.predict_proba(X_test)
transformer_probs = torch.nn.functional.softmax(final_outputs, dim=1).numpy()  # Convert Transformer outputs to probabilities

# Averaging the probabilities
ensemble_probs = (catboost_probs + lgbm_probs + transformer_probs) / 3
ensemble_preds = np.argmax(ensemble_probs, axis=1)

# Calculate ensemble accuracy
ensemble_accuracy = accuracy_score(y_test, ensemble_preds)
print(f"Ensemble Model Accuracy: {ensemble_accuracy:.4f}")




Epoch 10/100, Loss: 2.4591, Test Accuracy: 0.8974
Epoch 20/100, Loss: 1.1013, Test Accuracy: 0.8974
Epoch 30/100, Loss: 0.7499, Test Accuracy: 0.8974
Epoch 40/100, Loss: 1.6488, Test Accuracy: 0.8974
Epoch 50/100, Loss: 0.0296, Test Accuracy: 0.8974
Epoch 60/100, Loss: 0.6080, Test Accuracy: 0.8974
Epoch 70/100, Loss: 0.0161, Test Accuracy: 0.8974
Epoch 80/100, Loss: 0.0101, Test Accuracy: 0.8974
Epoch 90/100, Loss: 0.0076, Test Accuracy: 0.8974
Epoch 100/100, Loss: 0.0057, Test Accuracy: 0.8974
Transformer Model Accuracy: 0.8974
[LightGBM] [Info] Number of positive: 115, number of negative: 41
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000080 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1132
[LightGBM] [Info] Number of data points in the train set: 156, number of used features: 22
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.737179 -> initscore=1.031360
[LightGBM] [Info] Start training

In [58]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
from sklearn.model_selection import StratifiedKFold
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score
from torch.optim import Adam
from torch.optim.lr_scheduler import ReduceLROnPlateau
from sklearn.preprocessing import LabelEncoder
from torch.utils.data import DataLoader, TensorDataset
from imblearn.over_sampling import SMOTE
import optuna

# Load and preprocess the dataset
url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/parkinsons/parkinsons.data'
df = pd.read_csv(url)

X = df.drop(['name', 'status'], axis=1).values
y = df['status'].values

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)

# Define the Transformer model
class TabularTransformer(nn.Module):
    def __init__(self, input_dim, num_classes, d_model=64, nhead=4, num_layers=2, dim_feedforward=128, dropout=0.5):
        super(TabularTransformer, self).__init__()
        self.embedding = nn.Linear(input_dim, d_model)
        encoder_layer = nn.TransformerEncoderLayer(
            d_model=d_model, nhead=nhead, dim_feedforward=dim_feedforward,
            dropout=dropout, activation='relu', batch_first=True
        )
        self.transformer_encoder = nn.TransformerEncoder(encoder_layer, num_layers=num_layers)
        self.fc = nn.Sequential(
            nn.Dropout(dropout),
            nn.Linear(d_model, num_classes)
        )

    def forward(self, x):
        x = self.embedding(x)
        x = self.transformer_encoder(x)
        x = self.fc(x[:, 0, :])  # Use the first token's output
        return x

# Objective function for Optuna
def objective(trial):
    accuracies = []

    # Hyperparameters to tune
    d_model = trial.suggest_categorical('d_model', [32, 64, 128])
    nhead_options = [h for h in [2, 4, 8] if d_model % h == 0]
    nhead = trial.suggest_categorical('nhead', nhead_options)
    num_layers = trial.suggest_int('num_layers', 1, 3)
    dim_feedforward = trial.suggest_categorical('dim_feedforward', [64, 128, 256])
    dropout = trial.suggest_uniform('dropout', 0.1, 0.5)
    lr = trial.suggest_loguniform('lr', 1e-5, 1e-3)
    weight_decay = trial.suggest_loguniform('weight_decay', 1e-6, 1e-3)
    batch_size = trial.suggest_categorical('batch_size', [8, 16, 32])

    skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
    
    for train_index, val_index in skf.split(X_scaled, y_encoded):
        X_train_fold, X_val_fold = X_scaled[train_index], X_scaled[val_index]
        y_train_fold, y_val_fold = y_encoded[train_index], y_encoded[val_index]
        
        # Apply SMOTE to the training fold
        smote = SMOTE(random_state=42)
        X_train_resampled, y_train_resampled = smote.fit_resample(X_train_fold, y_train_fold)
        
        # Convert data to PyTorch tensors
        X_train_tensor = torch.tensor(X_train_resampled, dtype=torch.float32)
        y_train_tensor = torch.tensor(y_train_resampled, dtype=torch.long)
        X_val_tensor = torch.tensor(X_val_fold, dtype=torch.float32)
        y_val_tensor = torch.tensor(y_val_fold, dtype=torch.long)
        
        train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
        train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
        
        model = TabularTransformer(
            input_dim=X_train_tensor.shape[1], num_classes=2, d_model=d_model,
            nhead=nhead, num_layers=num_layers, dim_feedforward=dim_feedforward, dropout=dropout
        )
        
        optimizer = Adam(model.parameters(), lr=lr, weight_decay=weight_decay)
        criterion = nn.CrossEntropyLoss()
        
        epochs = 50
        best_val_accuracy = 0
        early_stopping_counter = 0
        for epoch in range(epochs):
            model.train()
            for X_batch, y_batch in train_loader:
                optimizer.zero_grad()
                outputs = model(X_batch.unsqueeze(1))
                loss = criterion(outputs, y_batch)
                loss.backward()
                optimizer.step()
            
            # Validation
            model.eval()
            with torch.no_grad():
                val_outputs = model(X_val_tensor.unsqueeze(1))
                _, val_preds = torch.max(val_outputs, 1)
                val_accuracy = accuracy_score(y_val_tensor.numpy(), val_preds.numpy())
                
            # Early stopping
            if val_accuracy > best_val_accuracy:
                best_val_accuracy = val_accuracy
                early_stopping_counter = 0
            else:
                early_stopping_counter += 1
            if early_stopping_counter >= 5:
                break
        
        accuracies.append(best_val_accuracy)
    
    return np.mean(accuracies)

# Run Optuna optimization
study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=50)

# Get the best hyperparameters
best_params = study.best_params
print("Best Hyperparameters:", best_params)

# Apply SMOTE to the entire dataset
smote = SMOTE(random_state=42)
X_resampled, y_resampled = smote.fit_resample(X_scaled, y_encoded)

# Convert data to PyTorch tensors
X_tensor = torch.tensor(X_resampled, dtype=torch.float32)
y_tensor = torch.tensor(y_resampled, dtype=torch.long)

dataset = TensorDataset(X_tensor, y_tensor)
loader = DataLoader(dataset, batch_size=best_params['batch_size'], shuffle=True)

# Initialize the model with best hyperparameters
model = TabularTransformer(
    input_dim=X_tensor.shape[1], num_classes=2, d_model=best_params['d_model'],
    nhead=best_params['nhead'], num_layers=best_params['num_layers'],
    dim_feedforward=best_params['dim_feedforward'], dropout=best_params['dropout']
)

optimizer = Adam(model.parameters(), lr=best_params['lr'], weight_decay=best_params['weight_decay'])
criterion = nn.CrossEntropyLoss()

# Training loop
epochs = 100
for epoch in range(epochs):
    model.train()
    for X_batch, y_batch in loader:
        optimizer.zero_grad()
        outputs = model(X_batch.unsqueeze(1))
        loss = criterion(outputs, y_batch)
        loss.backward()
        optimizer.step()

    # Optional: print training progress
    if (epoch + 1) % 10 == 0:
        model.eval()
        with torch.no_grad():
            outputs = model(X_tensor.unsqueeze(1))
            _, preds = torch.max(outputs, 1)
            train_accuracy = accuracy_score(y_tensor.numpy(), preds.numpy())
            print(f"Epoch {epoch+1}/{epochs}, Training Accuracy: {train_accuracy:.4f}")

# Evaluate on the original dataset
model.eval()
with torch.no_grad():
    X_test_tensor = torch.tensor(X_scaled, dtype=torch.float32)
    y_test_tensor = torch.tensor(y_encoded, dtype=torch.long)
    outputs = model(X_test_tensor.unsqueeze(1))
    _, preds = torch.max(outputs, 1)
    accuracy = accuracy_score(y_test_tensor.numpy(), preds.numpy())
    print(f"Final Model Accuracy on Original Data: {accuracy:.4f}")


[I 2024-10-25 03:37:56,057] A new study created in memory with name: no-name-15943141-b4d5-4b6f-b537-8eb17929f62c
  dropout = trial.suggest_uniform('dropout', 0.1, 0.5)
  lr = trial.suggest_loguniform('lr', 1e-5, 1e-3)
  weight_decay = trial.suggest_loguniform('weight_decay', 1e-6, 1e-3)
[I 2024-10-25 03:37:59,078] Trial 0 finished with value: 0.9230769230769231 and parameters: {'d_model': 128, 'nhead': 4, 'num_layers': 2, 'dim_feedforward': 128, 'dropout': 0.1862174986391533, 'lr': 0.00023503052879137983, 'weight_decay': 6.996791608378533e-05, 'batch_size': 8}. Best is trial 0 with value: 0.9230769230769231.
  dropout = trial.suggest_uniform('dropout', 0.1, 0.5)
  lr = trial.suggest_loguniform('lr', 1e-5, 1e-3)
  weight_decay = trial.suggest_loguniform('weight_decay', 1e-6, 1e-3)
[I 2024-10-25 03:38:00,408] Trial 1 finished with value: 0.758974358974359 and parameters: {'d_model': 64, 'nhead': 4, 'num_layers': 1, 'dim_feedforward': 64, 'dropout': 0.3146244590153168, 'lr': 3.0490368856

Best Hyperparameters: {'d_model': 128, 'nhead': 8, 'num_layers': 1, 'dim_feedforward': 256, 'dropout': 0.2323907479291667, 'lr': 0.0009580356505027221, 'weight_decay': 0.0009471536139425605, 'batch_size': 16}
Epoch 10/100, Training Accuracy: 0.9660
Epoch 20/100, Training Accuracy: 0.9898
Epoch 30/100, Training Accuracy: 0.9796
Epoch 40/100, Training Accuracy: 0.9966
Epoch 50/100, Training Accuracy: 1.0000
Epoch 60/100, Training Accuracy: 1.0000
Epoch 70/100, Training Accuracy: 1.0000
Epoch 80/100, Training Accuracy: 1.0000
Epoch 90/100, Training Accuracy: 1.0000
Epoch 100/100, Training Accuracy: 0.9898
Final Model Accuracy on Original Data: 0.9897
