In [None]:
import os
import torch
import warnings
import pandas as pd
import numpy as np
import gym
import torch.nn as nn
from gym import spaces
from torch.utils.data import DataLoader, IterableDataset
from stable_baselines3 import PPO
from stable_baselines3.common.torch_layers import BaseFeaturesExtractor
from transformers import BertModel

In [None]:
device = "cuda" if torch.cuda.is_available() else "cpu"
torch.cuda.empty_cache()
torch.cuda.ipc_collect()
torch.cuda.set_per_process_memory_fraction(0.75, device=0)
warnings.filterwarnings('ignore')
torch.manual_seed(52)
np.random.seed(52)

In [None]:
SEQUENCE_LENGTH = 100
N_EPISODES = 500
LEARNING_RATE = 3e-4
BATCH_SIZE = 64
TRAIN_DIR = "../src/train_denoised/"
TEST_DIR = "../src/test_denoised/"
MODEL_SAVE_PATH = "../models/ppo_transformer_model.pkl"

In [None]:
class TransformerFeatureExtractor(BaseFeaturesExtractor):
    def __init__(self, observation_space, hidden_dim=128):
        super(TransformerFeatureExtractor, self).__init__(observation_space, hidden_dim)
        self.transformer = BertModel.from_pretrained("bert-base-uncased")
        self.fc = nn.Linear(hidden_dim, hidden_dim)
        self._features_dim = hidden_dim

    def forward(self, observations):
        x = self.transformer(observations).last_hidden_state
        return self.fc(x[:, -1, :])
    
class TimeSeriesDataset(IterableDataset):
    def __init__(self, directory):
        self.directory = directory

    def __iter__(self):
        return self.load_data_from_dir()

    def load_data_from_dir(self):
        for file_name in os.listdir(self.directory):
            file_path = os.path.join(self.directory, file_name)
            df = pd.read_csv(file_path, sep="\\s+", names=["time", "pressure", "label"])
            if df.empty or "pressure" not in df:
                continue

            for i in range(len(df) - SEQUENCE_LENGTH):
                seq = df["pressure"].iloc[i: i + SEQUENCE_LENGTH].values.astype(np.float32)
                label_seq = df["label"].iloc[i: i + SEQUENCE_LENGTH].values.astype(np.int8)
                yield seq, label_seq  # ✅ `yield`, чтобы не загружать всё в память

# ⚡ Gym среда
class TimeSeriesEnv(gym.Env):
    def __init__(self, dataloader):
        super(TimeSeriesEnv, self).__init__()
        self.dataloader = dataloader
        self.iterator = iter(dataloader)
        self.observation_space = spaces.Box(low=0.0, high=1.0, shape=(SEQUENCE_LENGTH, 1), dtype=np.float32)
        self.action_space = spaces.Discrete(3)  # 3 класса

    def reset(self):
        try:
            self.current_seq, self.current_label = next(self.iterator)
        except StopIteration:
            self.iterator = iter(self.dataloader)  # Обновляем итератор, если данные кончились
            self.current_seq, self.current_label = next(self.iterator)
        return self.current_seq.numpy().reshape(-1, 1)

    def step(self, action):
        reward = 1 if action == self.current_label else -1
        done = True  # PPO работает на эпизодах, не на временных рядах
        return self.current_seq.numpy().reshape(-1, 1), reward, done, {}

In [None]:
# ⚡ DataLoader (загрузка батчами)
train_dataset = TimeSeriesDataset(TRAIN_DIR)
train_dataloader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)

In [None]:
train_env = TimeSeriesEnv(train_dataloader)

In [None]:
policy_kwargs = dict(
    features_extractor_class=TransformerFeatureExtractor,
    features_extractor_kwargs=dict(hidden_dim=128),
)

model = PPO(
    policy="MlpPolicy",
    env=train_env,
    learning_rate=LEARNING_RATE,
    n_epochs=2,
    verbose=1,
    device=device,
    policy_kwargs=policy_kwargs
)

# ⚡ Обучаем PPO
model.learn(total_timesteps=N_EPISODES * len(train_dataset))

In [None]:
model.save(MODEL_SAVE_PATH)

# Инференс

In [None]:
from collections import Counter
loaded_model = PPO.load(MODEL_SAVE_PATH, device=device)

In [None]:
predicted_dfs = []

for test_file in test_files:
    df_test = pd.read_csv(test_file, sep="\\s+", names=["time", "pressure"])
    test_sequences = [df_test["pressure"].iloc[i: i + SEQUENCE_LENGTH].values for i in range(len(df_test) - SEQUENCE_LENGTH)]
    test_sequences = np.array(test_sequences)
    all_predictions = [[] for _ in range(len(df_test))]
    for start in range(len(test_sequences)):
        pred_classes, _ = loaded_model.predict(test_sequences[start])
        
        for i in range(SEQUENCE_LENGTH):
            if start + i < len(df_test):  
                all_predictions[start + i].append(pred_classes[i])

    final_classes = np.zeros(len(df_test))
    for i in range(len(all_predictions)):
        if all_predictions[i]:  
            final_classes[i] = Counter(all_predictions[i]).most_common(1)[0][0]  
        elif i > 0:  
            final_classes[i] = final_classes[i - 1] 

    df_test["prediction"] = final_classes
    predicted_dfs.append(df_test)