In [None]:
import os
import torch
import warnings
import pandas as pd
import numpy as np
import gym
import torch.nn as nn
from gym import spaces
from torch.utils.data import DataLoader, Dataset
from stable_baselines3 import PPO
from stable_baselines3.common.vec_env import DummyVecEnv
from stable_baselines3.common.torch_layers import BaseFeaturesExtractor

In [None]:
if torch.cuda.is_available():
    device = "cuda"
    torch.cuda.empty_cache()
    torch.cuda.ipc_collect()
    torch.cuda.set_per_process_memory_fraction(0.75, device=0) 
    
else:
    device = "cpu"

warnings.filterwarnings('ignore')
torch.manual_seed(52)
np.random.seed(52)

In [None]:
SEQUENCE_LENGTH = 100
BATCH_SIZE = 64
N_EPISODES = 500
LEARNING_RATE = 3e-4
TRAIN_DIR = "../src/train_denoised/"
TEST_DIR = "../src/test_denoised/"
MODEL_SAVE_PATH = "../models/ppo_rnn_model.pkl"

In [None]:
class TimeSeriesDataset(Dataset):
    def __init__(self, directory):
        self.files = [os.path.join(directory, file) for file in os.listdir(directory)]
        self.data = []

        for file_path in self.files:
            df = pd.read_csv(file_path, sep="\\s+", names=["time", "pressure"])
            if df.empty or "pressure" not in df:
                continue
            
            for i in range(len(df) - SEQUENCE_LENGTH):
                self.data.append(df["pressure"].iloc[i: i + SEQUENCE_LENGTH].values.astype(np.float32))

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        return self.data[idx]

In [None]:
train_dataset = TimeSeriesDataset(TRAIN_DIR)
train_dataloader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)

In [None]:
class RNNFeatureExtractor(BaseFeaturesExtractor):
    def __init__(self, observation_space: spaces.Box, hidden_dim=128):
        super(RNNFeatureExtractor, self).__init__(observation_space, features_dim=hidden_dim)
        self.lstm = nn.LSTM(input_size=1, hidden_size=hidden_dim, batch_first=True)
        self.fc = nn.Linear(hidden_dim, hidden_dim)

    def forward(self, x):
        x, _ = self.lstm(x)
        x = self.fc(x[:, -1, :])  
        return x

class TimeSeriesEnv(gym.Env):
    def __init__(self, dataloader):
        super(TimeSeriesEnv, self).__init__()
        self.dataloader = iter(dataloader)
        self.current_batch = next(self.dataloader)
        self.batch_idx = 0  
        self.observation_space = spaces.Box(low=-1.0, high=1.0, shape=(SEQUENCE_LENGTH, 1), dtype=np.float32)
        self.action_space = spaces.Discrete(3)  

    def reset(self):
        try:
            self.current_batch = next(self.dataloader)  
        except StopIteration:
            self.dataloader = iter(train_dataloader)  
            self.current_batch = next(self.dataloader)

        self.batch_idx = 0  
        return self.current_batch[self.batch_idx].reshape(SEQUENCE_LENGTH, 1)  

    def step(self, action):
        reward = np.random.uniform(-1, 1)  
        self.batch_idx += 1  

        if self.batch_idx >= BATCH_SIZE:  
            done = True
        else:
            done = False

        obs = self.current_batch[self.batch_idx].reshape(SEQUENCE_LENGTH, 1) if not done else self.reset()
        return obs, reward, done, {}

In [None]:
train_env = DummyVecEnv([lambda: TimeSeriesEnv(train_dataloader)])

policy_kwargs = dict(
    features_extractor_class=RNNFeatureExtractor,
    features_extractor_kwargs=dict(hidden_dim=128),
)

model = PPO(
    policy="MlpPolicy",
    env=train_env,  
    learning_rate=LEARNING_RATE,
    n_epochs=2,
    verbose=1,
    device=device,
    policy_kwargs=policy_kwargs
)

model.learn(total_timesteps=N_EPISODES * len(train_dataloader))

In [None]:
model.save(MODEL_SAVE_PATH)