# load datasets and prepare model

In [28]:
import torch
import numpy as np
from torch.utils.data import DataLoader, TensorDataset
import tf_keras as keras

from transformers import PatchTSTForClassification, PatchTSTConfig, Trainer, TrainingArguments, TrainerCallback
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, log_loss


## load datasets

In [29]:
import numpy as np

train_data = np.load('/Users/wuruoyu/Sleep-Staging/data/train_data2_noCoordinates.npz')
val_data = np.load('/Users/wuruoyu/Sleep-Staging/data/val_data2_noCoordinates.npz')
test_data = np.load('/Users/wuruoyu/Sleep-Staging/data/test_data_noCoordinates.npz') 
output_dir = 'models/PatchTST/'

X_train = train_data['X']
y_train = train_data['y']
X_test = test_data['X']
y_test = test_data['y']
X_val = val_data['X']
y_val = val_data['y']
X_train = X_train[:,0:2976]
X_test = X_test[:,0:2976]
X_val = X_val[:,0:2976]
X_train = X_train.reshape(3432, 1, 2976)
X_test = X_test.reshape(1144, 1, 2976)
X_val = X_val.reshape(1144, 1, 2976)
from torch.utils.data import Dataset
import torch


In [30]:
class MNI_to_Huggingface(Dataset):
    def __init__(self, X_data, y_data, patch_length=32):
        self.X_data = X_data
        self.y_data = y_data
        self.patch_length = patch_length
        
        # Compute number of patches per input sequence (3000 / 32 = 93 patches)
        self.num_patches = X_data.shape[2] // patch_length  # Assuming each sequence is 3000 long

        # Reshape the input data into patches: (batch_size, num_patches, patch_length)
        self.X_data = self.X_data.reshape(self.X_data.shape[0], self.patch_length, self.num_patches)
    
    def __len__(self):
        return len(self.X_data)
    
    def __getitem__(self, idx):
        # Each sample now has shape (num_patches, patch_length)
        return {
            'past_values': torch.tensor(self.X_data[idx], dtype=torch.float32),  # Input data
            'target_values': torch.tensor(self.y_data[idx], dtype=torch.long)   # Labels
        }

# Create the dataset instances
train_dataset = MNI_to_Huggingface(X_train, y_train, patch_length=32)
val_dataset = MNI_to_Huggingface(X_val, y_val, patch_length=32)
test_dataset = MNI_to_Huggingface(X_test, y_test, patch_length=32)


## preparing model

### initialize patchtst

In [31]:
# Set up PatchTST config
config = PatchTSTConfig(
    num_targets=4,     # 4 classes for classification
    num_input_channels=93,    # Single channel input (as per your reshaped data)
)


# Initialize the PatchTSTForClassification model
model = PatchTSTForClassification(config)

# Move model to GPU if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)


PatchTSTForClassification(
  (model): PatchTSTModel(
    (scaler): PatchTSTScaler(
      (scaler): PatchTSTStdScaler()
    )
    (patchifier): PatchTSTPatchify()
    (masking): Identity()
    (encoder): PatchTSTEncoder(
      (embedder): PatchTSTEmbedding(
        (input_embedding): Linear(in_features=1, out_features=128, bias=True)
      )
      (positional_encoder): PatchTSTPositionalEncoding(
        (positional_dropout): Identity()
      )
      (layers): ModuleList(
        (0-2): 3 x PatchTSTEncoderLayer(
          (self_attn): PatchTSTAttention(
            (k_proj): Linear(in_features=128, out_features=128, bias=True)
            (v_proj): Linear(in_features=128, out_features=128, bias=True)
            (q_proj): Linear(in_features=128, out_features=128, bias=True)
            (out_proj): Linear(in_features=128, out_features=128, bias=True)
          )
          (dropout_path1): Identity()
          (norm_sublayer1): PatchTSTBatchNorm(
            (batchnorm): BatchNorm1d(128, 

### customize metrics and number of epochs

In [32]:
def custom_compute_metrics(p):
    print(type(p))
    pred, labels = p
    pred = np.argmax(pred, axis=1)

    accuracy = accuracy_score(y_true=labels, y_pred=pred)
    recall = recall_score(y_true=labels, y_pred=pred)
    precision = precision_score(y_true=labels, y_pred=pred)
    f1 = f1_score(y_true=labels, y_pred=pred)

    return {"accuracy": accuracy, "precision": precision, "recall": recall, "f1": f1}

# Define Trainer
args = TrainingArguments(
    output_dir="output",
    num_train_epochs=1,
    per_device_train_batch_size=8,
    label_names=['stages']

)
custom_trainer = Trainer(
    model=model,
    args=args,
    train_dataset=train_dataset,
    eval_dataset=val_dataset,
    compute_metrics=custom_compute_metrics
)
     

# train, evaluate and save

## training

In [33]:
# Start training
custom_trainer.train()


100%|██████████| 429/429 [03:04<00:00,  2.33it/s]

{'train_runtime': 184.3121, 'train_samples_per_second': 18.621, 'train_steps_per_second': 2.328, 'train_loss': 1.319745257184222, 'epoch': 1.0}





TrainOutput(global_step=429, training_loss=1.319745257184222, metrics={'train_runtime': 184.3121, 'train_samples_per_second': 18.621, 'train_steps_per_second': 2.328, 'total_flos': 39636327684096.0, 'train_loss': 1.319745257184222, 'epoch': 1.0})

In [34]:
custom_trainer.evaluate()

100%|██████████| 143/143 [00:20<00:00,  6.89it/s]


{'eval_runtime': 20.8742,
 'eval_samples_per_second': 54.805,
 'eval_steps_per_second': 6.851,
 'epoch': 1.0}

## save model

In [35]:
# Save the trained model
custom_trainer.save_model(output_dir)
