In [None]:
import os
import pandas as pd

In [None]:
df = pd.read_csv('cleaned.csv')

In [None]:
# Print the shape of the cleaned data
print(f"cleaned DataFrame shape: {df.shape}")

cleaned DataFrame shape: (4429, 24)


In [None]:
df = df.drop_duplicates()
print(f"DataFrame shape after removing duplicates: {df.shape}")

DataFrame shape after removing duplicates: (4407, 24)


In [None]:
df.head()

Unnamed: 0,PATNO,EVENT_ID,NP1COG,NP1HALL,NP1DPRS,NP1ANXS,NP1APAT,NP2SPCH,NP2SALV,NP2SWAL,...,NP3FRZGT,NP3PSTBL,NP3TOT,AGE_AT_VISIT,SDMTOTAL,HVLTRT1,HVLTRT2,HVLTRT3,HVLTRDLY,HVLTREC
0,3000,BL,1.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,...,0.0,0.0,4.0,69.1,53.0,8.0,11.0,11.0,10.0,12.0
1,3001,BL,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,12.0,65.1,42.0,9.0,10.0,8.0,12.0,12.0
2,3002,BL,1.0,0.0,1.0,0.0,1.0,1.0,2.0,1.0,...,0.0,0.0,17.0,67.6,41.0,6.0,10.0,12.0,11.0,12.0
3,3003,BL,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,...,0.0,0.0,29.0,56.7,37.0,7.0,10.0,12.0,11.0,12.0
4,3004,BL,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,2.0,59.4,47.0,6.0,8.0,8.0,5.0,12.0


In [None]:
# Step 1: Handle Missing Values
# Fill missing values for numerical columns with the median
numerical_columns = [
    "NP1COG", "NP1HALL", "NP1DPRS", "NP1ANXS", "NP1APAT",
    "NP2SPCH", "NP2SALV", "NP2SWAL", "NP2EAT", "NP2DRES",
    "NP3SPCH", "NP3GAIT", "NP3FRZGT", "NP3PSTBL", "NP3TOT",
    "AGE_AT_VISIT", "SDMTOTAL", "HVLTRT1", "HVLTRT2", "HVLTRT3", "HVLTRDLY", "HVLTREC"
]

In [None]:
df[numerical_columns] = df[numerical_columns].fillna(df[numerical_columns].median())


In [None]:
df = df.drop(columns=["EVENT_ID"])


In [None]:
# Step 3: Scale Numerical Features
# Standardize numerical columns to have mean = 0 and standard deviation = 1
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
df[numerical_columns] = scaler.fit_transform(df[numerical_columns])

In [None]:
# Step 4: Feature-Specific Preprocessing
# Drop columns with very low variance (e.g., NP3FRZGT has only 0.0 values)
low_variance_columns = ["NP3FRZGT"]
df = df.drop(columns=low_variance_columns)

In [None]:
import numpy as np  # Import numpy for numerical operations

# Compute the correlation matrix
correlation_matrix = df.corr().abs()

# Create an upper triangle mask using numpy
upper_triangle = correlation_matrix.where(
    np.triu(np.ones(correlation_matrix.shape), k=1).astype(bool)
)

# Identify columns to drop based on high correlation (e.g., > 0.9)
to_drop = [column for column in upper_triangle.columns if any(upper_triangle[column] > 0.8)]

# Drop the highly correlated columns
df = df.drop(columns=to_drop)

print(f"Columns dropped due to high correlation: {to_drop}")

Columns dropped due to high correlation: []


In [None]:
df.to_csv("cleaned_preprocessed.csv", index=False)
print("Preprocessed dataset saved as 'cleaned_preprocessed.csv'")

Preprocessed dataset saved as 'cleaned_preprocessed.csv'


In [None]:
df.head()
print(f"cleaned DataFrame shape: {df.shape}")

cleaned DataFrame shape: (4407, 22)


In [None]:
import pandas as pd

# Load the datasets
cleaned_df = pd.read_csv('cleaned_preprocessed.csv')
participant_status_df = pd.read_csv('Participant_Status_30Mar2025.csv')

# Select only the necessary columns from Participant_Status_30Mar2025.csv
participant_status_df = participant_status_df[['PATNO', 'COHORT_DEFINITION']]

# Merge the datasets on the PATNO column
merged_df = cleaned_df.merge(participant_status_df, on='PATNO', how='inner')

# Save the merged dataset to a new CSV file
merged_df.to_csv('merged_dataset.csv', index=False)

print("Merged dataset saved as 'merged_dataset.csv'.")

merged_df.head()

Merged dataset saved as 'merged_dataset.csv'.


Unnamed: 0,PATNO,NP1COG,NP1HALL,NP1DPRS,NP1ANXS,NP1APAT,NP2SPCH,NP2SALV,NP2SWAL,NP2EAT,...,NP3PSTBL,NP3TOT,AGE_AT_VISIT,SDMTOTAL,HVLTRT1,HVLTRT2,HVLTRT3,HVLTRDLY,HVLTREC,COHORT_DEFINITION
0,3000,1.093062,-0.206265,0.379001,0.683703,-0.39711,-0.461978,-0.498558,-0.312543,-0.387215,...,-0.065167,-0.61101,0.483564,0.868034,0.975283,1.157533,0.678225,0.575454,0.607089,Healthy Control
1,3001,-0.57434,-0.206265,-0.219443,-0.656039,-0.39711,-0.461978,-0.498558,-0.312543,-0.387215,...,-0.065167,0.083662,0.03183,-0.167952,1.498863,0.672808,-0.856663,1.264217,0.607089,Parkinson's Disease
2,3002,1.093062,-0.206265,0.379001,-0.656039,1.306943,1.144918,1.84132,1.912624,-0.387215,...,-0.065167,0.517831,0.314164,-0.262133,-0.071878,0.672808,1.189855,0.919836,0.607089,Parkinson's Disease
3,3003,-0.57434,-0.206265,-0.219443,0.683703,-0.39711,-0.461978,-0.498558,-0.312543,1.722124,...,-0.065167,1.559838,-0.916811,-0.638855,0.451702,0.672808,1.189855,0.919836,0.607089,Parkinson's Disease
4,3004,-0.57434,-0.206265,-0.219443,-0.656039,-0.39711,-0.461978,-0.498558,-0.312543,-0.387215,...,-0.065167,-0.784677,-0.611891,0.302951,-0.071878,-0.296643,-0.856663,-1.146454,0.607089,Healthy Control


In [None]:
# Compute correlation of features with the target (COHORT_DEFINITION)
# Convert COHORT_DEFINITION to numeric for correlation analysis
# Load the merged dataset
merged_df = pd.read_csv('merged_dataset.csv')

# Exclude PATNO (identifier) from the correlation matrix
features_df = merged_df.drop(columns=['PATNO'])

merged_df['COHORT_DEFINITION'] = merged_df['COHORT_DEFINITION'].map({
    "Parkinson's Disease": 1,
    "Healthy Control": 0
})

# Compute correlation
correlation = merged_df.corr()['COHORT_DEFINITION'].sort_values(ascending=False)
print(correlation)

COHORT_DEFINITION    1.000000
NP3TOT               0.612393
NP3GAIT              0.390452
NP3SPCH              0.312430
NP2DRES              0.291265
NP2SPCH              0.264534
NP2EAT               0.248967
NP2SALV              0.224557
NP1COG               0.146408
NP2SWAL              0.145890
NP1ANXS              0.141613
NP1APAT              0.134761
NP1DPRS              0.129693
NP3PSTBL             0.122127
NP1HALL              0.088026
PATNO                0.034323
AGE_AT_VISIT         0.011819
HVLTRT3             -0.070855
HVLTREC             -0.086256
HVLTRT2             -0.104233
HVLTRT1             -0.117775
HVLTRDLY            -0.123760
SDMTOTAL            -0.207535
Name: COHORT_DEFINITION, dtype: float64


### **Methodology**

#### **1. Image Feature Extraction**
- **Input**: MRI images of patients (e.g., `.nii` files) from two groups: "Control" and "PD".
- **Preprocessing**:
  - Images were resized to a consistent shape (e.g., `64x64x64`).
  - Pixel intensities were normalized to have zero mean and unit variance.
- **Feature Extraction**:
  - **Statistical Features**: Extracted mean intensity, standard deviation, skewness, and kurtosis of pixel intensities.
  - **Texture Features**: Extracted 13 Haralick features using the Mahotas library, which capture texture properties such as contrast, correlation, and homogeneity.
- **Output**: A structured dataset (`extracted_features.csv`) containing 17 features (4 statistical + 13 Haralick) for each image, along with patient IDs (`PATNO`) and labels (`Control` or `PD`).

---

#### **2. Clinical Feature Extraction**
- **Input**: Clinical data of patients, including features such as motor and non-motor symptoms, cognitive scores, and other clinical metrics.
- **Preprocessing**:
  - Clinical features were normalized to have zero mean and unit variance.
  - Missing values were imputed using the median of the respective feature.
- **Output**: A structured dataset (`Final_clinical_feat.csv`) containing normalized clinical features, patient IDs (`PATNO`), and labels (`Healthy Control` or `Parkinson's Disease`).

---

#### **3. Multimodal Graph Construction**
- **Input**: Image features (`extracted_features.csv`) and clinical features (`Final_clinical_feat.csv`).
- **Graph Construction**:
  - Two separate graphs were constructed:
    1. **Image Graph**:
       - Nodes represent patients, and node features are derived from image features.
       - Edges were created using the k-Nearest Neighbors (k-NN) algorithm based on image feature similarity.
    2. **Clinical Graph**:
       - Nodes represent patients, and node features are derived from clinical features.
       - Edges were created using the k-Nearest Neighbors (k-NN) algorithm based on clinical feature similarity.
  - Self-loops were added to both adjacency matrices to include each node's own information.
- **Output**: Two graphs (`image_graph` and `clinical_graph`) in PyTorch Geometric format, with node features and adjacency matrices.

---

#### **4. Graph Attention Networks (GATs)**
- **Objective**: Learn embeddings for each graph to capture relationships between nodes.
- **Architecture**:
  - Two separate GATs were used:
    1. **Image GAT**: Processes the image graph to generate embeddings for image features.
    2. **Clinical GAT**: Processes the clinical graph to generate embeddings for clinical features.
  - Each GAT consists of:
    - A first GAT layer with 4 attention heads and 16 hidden units per head.
    - A second GAT layer with 1 attention head and 16 output units.
    - ReLU activation applied after the first layer.
- **Output**: Low-dimensional embeddings (`image_embeddings` and `clinical_embeddings`) for each graph.

---

#### **5. Embedding Fusion**
- **Objective**: Combine the embeddings from the two graphs into a unified representation.
- **Fusion Process**:
  - Trainable linear transformations (`fusion_image_fc` and `fusion_clinical_fc`) were applied to the image and clinical embeddings, followed by ReLU activation.
  - The transformed embeddings were summed to create fused embeddings:
    \[
    \text{fused\_embeddings} = \text{ReLU}(\text{fusion\_image\_fc}(\text{image\_embeddings})) + \text{ReLU}(\text{fusion\_clinical\_fc}(\text{clinical\_embeddings}))
    \]
- **Output**: Fused embeddings representing both image and clinical modalities.

---

#### **6. Loss Functions**
- **Contrastive Loss**:
  - Encourages intra-class similarity and inter-class dissimilarity in the fused embedding space.
  - Pairwise distances between embeddings were computed, and positive/negative pairs were defined based on class labels.
  - The contrastive loss was computed as:
    \[
    L_{\text{contrastive}} = L_{\text{positive}} + L_{\text{negative}}
    \]
- **Classification Loss**:
  - A simple classifier (fully connected layer) was trained on the fused embeddings to predict class labels.
  - Cross-entropy loss was used for classification:
    \[
    L_{\text{classification}} = -\sum_{i=1}^{N} y_i \log(\hat{y}_i)
    \]
- **Combined Loss**:
  - The total loss was a weighted combination of contrastive and classification losses:
    \[
    L_{\text{combined}} = \beta L_{\text{contrastive}} + (1 - \beta) L_{\text{classification}}
    \]
  - The weighting factor \(\beta\) was set to 0.5.

---

#### **7. Training**
- **Process**:
  - The GATs and classifier were trained jointly using the combined loss function.
  - The optimizer used was Adam with a learning rate of 0.001.
  - Training was performed for 200 epochs.
- **Steps**:
  1. Forward pass through the GATs to generate embeddings.
  2. Fuse the embeddings.
  3. Compute contrastive and classification losses.
  4. Backpropagate the combined loss and update model parameters.

---

#### **8. Evaluation**
- **Metrics**:
  - Accuracy, precision, recall, and F1-score were used to evaluate classification performance.
  - t-SNE and PCA were used to visualize the fused embeddings in 2D.
- **Process**:
  - The dataset was split into training and test sets (80/20 split).
  - The classifier was trained on the training set and evaluated on the test set.
  - Silhouette scores and intra-/inter-cluster distances were computed to assess the quality of the fused embeddings.

---

#### **9. Results**
- The proposed framework demonstrated the ability to effectively integrate image and clinical data for Parkinson's Disease classification.
- Visualization of the fused embeddings showed clear separation between classes, indicating the effectiveness of the fusion process.
- The combined use of contrastive and classification losses improved both embedding quality and classification accuracy.

---

This methodology highlights the integration of multimodal data using graph-based learning and demonstrates its potential for advancing Parkinson's Disease research. Let me know if you'd like to refine or expand any section!

In [None]:
import pandas as pd

# Load the merged dataset
merged_df = pd.read_csv('merged_dataset.csv')

# Define the selected features based on correlation analysis
selected_features = [
   'PATNO', 'NP3TOT', 'NP3GAIT', 'NP3SPCH', 'NP2DRES', 'NP2SPCH', 'NP2EAT',
    'NP2SALV', 'NP1COG', 'NP2SWAL', 'NP1ANXS', 'NP1APAT', 'NP1DPRS', 'COHORT_DEFINITION'
]

# Filter the dataset to include only the selected features
filtered_df = merged_df[selected_features]

# Save the filtered dataset
filtered_df.to_csv('filtered_dataset.csv', index=False)

print("Filtered dataset saved as 'filtered_dataset.csv'.")

Filtered dataset saved as 'filtered_dataset.csv'.


In [None]:
import pandas as pd

# Load the filtered dataset
filtered_df = pd.read_csv('filtered_dataset.csv')

# Keep only rows where COHORT_DEFINITION is "Healthy Control" or "Parkinson's Disease"
filtered_df = filtered_df[filtered_df['COHORT_DEFINITION'].isin(["Healthy Control", "Parkinson's Disease"])]

# Save the updated dataset
filtered_df.to_csv('Final_clinical_feat.csv', index=False)

print("Rows with invalid COHORT_DEFINITION removed. Cleaned dataset saved as 'Final_clinical_feat.csv'.")

Rows with invalid COHORT_DEFINITION removed. Cleaned dataset saved as 'Final_clinical_feat.csv'.


In [None]:
filtered_df.shape


(2032, 14)

# Random Forest with clinical features

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report

# Load the cleaned dataset
df = pd.read_csv('Final_clinical_feat.csv')

# Convert COHORT_DEFINITION to numeric values
df['COHORT_DEFINITION'] = df['COHORT_DEFINITION'].map({
    "Parkinson's Disease": 1,
    "Healthy Control": 0
})

# Define features (X) and target (y)
X = df.drop(columns=['PATNO', 'COHORT_DEFINITION'])  # Exclude PATNO and target
y = df['COHORT_DEFINITION']

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train a Random Forest Classifier
rf_model = RandomForestClassifier(random_state=42)
rf_model.fit(X_train, y_train)

# Make predictions on the test set
y_pred = rf_model.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)

print("Model Evaluation Metrics:")
print(f"Accuracy: {accuracy:.2f}")
print(f"Precision: {precision:.2f}")
print(f"Recall: {recall:.2f}")
print(f"F1-Score: {f1:.2f}")
print("\nClassification Report:")
print(classification_report(y_test, y_pred))

# Feature importance
feature_importances = pd.Series(rf_model.feature_importances_, index=X.columns).sort_values(ascending=False)
print("\nFeature Importances:")
print(feature_importances)

Model Evaluation Metrics:
Accuracy: 0.97
Precision: 0.97
Recall: 0.99
F1-Score: 0.98

Classification Report:
              precision    recall  f1-score   support

           0       0.97      0.86      0.91        66
           1       0.97      0.99      0.98       341

    accuracy                           0.97       407
   macro avg       0.97      0.93      0.95       407
weighted avg       0.97      0.97      0.97       407


Feature Importances:
NP3TOT     0.718244
NP3GAIT    0.092973
NP3SPCH    0.045246
NP2DRES    0.030439
NP2SPCH    0.025661
NP2EAT     0.019631
NP2SALV    0.017984
NP1ANXS    0.015424
NP1APAT    0.009768
NP1DPRS    0.009193
NP1COG     0.008448
NP2SWAL    0.006989
dtype: float64


Gradient Boosting Classifier Metrics:
              precision    recall  f1-score   support

           0       0.95      0.86      0.90        66
           1       0.97      0.99      0.98       341

    accuracy                           0.97       407
   macro avg       0.96      0.93      0.94       407
weighted avg       0.97      0.97      0.97       407



In [None]:
import pandas as pd
from sklearn.model_selection import cross_val_score, StratifiedKFold
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import make_scorer, accuracy_score

# Load the cleaned dataset
df = pd.read_csv('Final_clinical_feat.csv')

# Convert COHORT_DEFINITION to numeric values
df['COHORT_DEFINITION'] = df['COHORT_DEFINITION'].map({
    "Parkinson's Disease": 1,
    "Healthy Control": 0
})

# Define features (X) and target (y)
X = df.drop(columns=['PATNO', 'COHORT_DEFINITION'])  # Exclude PATNO and target
y = df['COHORT_DEFINITION']

# Define k-fold cross-validation
kfold = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

# Define models
models = {
    "Gradient Boosting": GradientBoostingClassifier(random_state=42),
    "SVM": SVC(kernel='linear', random_state=42),
    "Logistic Regression": LogisticRegression(random_state=42, max_iter=1000)
}

# Evaluate each model using k-fold cross-validation
for model_name, model in models.items():
    print(f"\nEvaluating {model_name}...")
    accuracies = cross_val_score(model, X, y, cv=kfold, scoring=make_scorer(accuracy_score))
    print(f"Accuracies for each fold: {accuracies}")
    print(f"Mean Accuracy: {accuracies.mean():.2f}")


Evaluating Gradient Boosting...
Accuracies for each fold: [0.97542998 0.96560197 0.97044335 0.98522167 0.9679803 ]
Mean Accuracy: 0.97

Evaluating SVM...
Accuracies for each fold: [0.97788698 0.95577396 0.97044335 0.98029557 0.9729064 ]
Mean Accuracy: 0.97

Evaluating Logistic Regression...
Accuracies for each fold: [0.97788698 0.96314496 0.9679803  0.98768473 0.97536946]
Mean Accuracy: 0.97


In [None]:
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import StratifiedKFold

# Standardize the features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Reshape the data for LSTM (samples, timesteps, features)
X_lstm = X_scaled.reshape(X_scaled.shape[0], 1, X_scaled.shape[1])

# Define k-fold cross-validation
kfold = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

# Initialize variables to store accuracy for each fold
fold_accuracies = []

# Perform k-fold cross-validation
for train_idx, test_idx in kfold.split(X_lstm, y):
    # Split the data
    X_train, X_test = X_lstm[train_idx], X_lstm[test_idx]
    y_train, y_test = y.iloc[train_idx], y.iloc[test_idx]

    # Build the LSTM model
    lstm_model = Sequential()
    lstm_model.add(LSTM(50, activation='relu', input_shape=(X_train.shape[1], X_train.shape[2])))
    lstm_model.add(Dense(1, activation='sigmoid'))

    # Compile the model
    lstm_model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

    # Train the model
    lstm_model.fit(X_train, y_train, epochs=10, batch_size=32, verbose=0)

    # Evaluate the model
    _, accuracy = lstm_model.evaluate(X_test, y_test, verbose=0)
    fold_accuracies.append(accuracy)
    print(f"Fold Accuracy: {accuracy:.2f}")

# Print overall mean accuracy
print(f"\nMean Accuracy for LSTM: {np.mean(fold_accuracies):.2f}")

  super().__init__(**kwargs)


Fold Accuracy: 0.97
Fold Accuracy: 0.95
Fold Accuracy: 0.96
Fold Accuracy: 0.97
Fold Accuracy: 0.96

Mean Accuracy for LSTM: 0.96


In [1]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch_geometric.data import Data
from torch_geometric.nn import GATConv
from sklearn.metrics import accuracy_score, classification_report
from sklearn.neighbors import kneighbors_graph
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

# Step 1: Load and preprocess the data
file_path = 'final_clinical_feat.csv'  # Path to the dataset
df = pd.read_csv(file_path)

# Select features and labels
features = df.columns[:-1]  # All columns except the last one are features
label_column = df.columns[-1]  # The last column is the label
X = df[features].values
y = df[label_column].factorize()[0]  # Convert labels to numerical values (e.g., Control -> 0, PD -> 1)

# Handle missing values by filling with the median
X = pd.DataFrame(X, columns=features)
X = X.fillna(X.median())  # Replace NaN with the median
X = X.values  # Convert back to NumPy array

# Normalize the features
scaler = StandardScaler()
X = scaler.fit_transform(X)

# Split the dataset into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Step 2: Construct graphs for training and testing data
# Create adjacency matrices using k-NN
k = 5  # Number of neighbors
adj_matrix_train = kneighbors_graph(X_train, n_neighbors=k, mode='connectivity', include_self=True).toarray()
adj_matrix_test = kneighbors_graph(X_test, n_neighbors=k, mode='connectivity', include_self=True).toarray()

# Convert adjacency matrices to edge index format (required by PyTorch Geometric)
edge_index_train = torch.tensor(np.array(np.nonzero(adj_matrix_train)), dtype=torch.long)
edge_index_test = torch.tensor(np.array(np.nonzero(adj_matrix_test)), dtype=torch.long)

# Convert features and labels to PyTorch tensors
x_train = torch.tensor(X_train, dtype=torch.float)
y_train_tensor = torch.tensor(y_train, dtype=torch.long)
x_test = torch.tensor(X_test, dtype=torch.float)
y_test_tensor = torch.tensor(y_test, dtype=torch.long)

# Create PyTorch Geometric Data objects for training and testing
train_data = Data(x=x_train, edge_index=edge_index_train, y=y_train_tensor)
test_data = Data(x=x_test, edge_index=edge_index_test, y=y_test_tensor)

# Step 3: Define the GAT model
class GAT(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim, heads=1):
        super(GAT, self).__init__()
        self.gat1 = GATConv(input_dim, hidden_dim, heads=heads, concat=True)
        self.gat2 = GATConv(hidden_dim * heads, output_dim, heads=1, concat=False)
        self.relu = nn.ReLU()

    def forward(self, data):
        x, edge_index = data.x, data.edge_index
        x = self.relu(self.gat1(x, edge_index))
        x = self.gat2(x, edge_index)
        return x

# Initialize the GAT model
input_dim = X_train.shape[1]  # Number of features
hidden_dim = 64  # Hidden state size
output_dim = len(np.unique(y))  # Number of classes
heads = 4  # Number of attention heads
model = GAT(input_dim, hidden_dim, output_dim, heads=heads)

# Define loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.01)

# Step 4: Train the GAT model
num_epochs = 200
for epoch in range(num_epochs):
    model.train()
    optimizer.zero_grad()
    out = model(train_data)
    loss = criterion(out, train_data.y)
    loss.backward()
    optimizer.step()

    if (epoch + 1) % 10 == 0:
        print(f"Epoch [{epoch + 1}/{num_epochs}], Loss: {loss.item():.4f}")

# Step 5: Evaluate the GAT model on the test set
model.eval()
with torch.no_grad():
    logits = model(test_data)
    y_pred = logits.argmax(dim=1).numpy()
    y_true = y_test_tensor.numpy()

# Calculate accuracy and classification report
accuracy = accuracy_score(y_true, y_pred)
print(f"\nGAT Model Test Accuracy: {accuracy:.4f}")
print("\nClassification Report:")
print(classification_report(y_true, y_pred, target_names=pd.factorize(df[label_column])[1]))

Epoch [10/200], Loss: 0.1644
Epoch [20/200], Loss: 0.1139
Epoch [30/200], Loss: 0.0874
Epoch [40/200], Loss: 0.0739
Epoch [50/200], Loss: 0.0638
Epoch [60/200], Loss: 0.0556
Epoch [70/200], Loss: 0.0488
Epoch [80/200], Loss: 0.0432
Epoch [90/200], Loss: 0.0387
Epoch [100/200], Loss: 0.0353
Epoch [110/200], Loss: 0.0322
Epoch [120/200], Loss: 0.0301
Epoch [130/200], Loss: 0.0286
Epoch [140/200], Loss: 0.0276
Epoch [150/200], Loss: 0.0269
Epoch [160/200], Loss: 0.0262
Epoch [170/200], Loss: 0.0256
Epoch [180/200], Loss: 0.0251
Epoch [190/200], Loss: 0.0247
Epoch [200/200], Loss: 0.0243

GAT Model Test Accuracy: 0.9214

Classification Report:
                     precision    recall  f1-score   support

    Healthy Control       0.80      0.68      0.74        66
Parkinson's Disease       0.94      0.97      0.95       341

           accuracy                           0.92       407
          macro avg       0.87      0.82      0.85       407
       weighted avg       0.92      0.92     