In [None]:
import os
import torch
import warnings
import pandas as pd
import numpy as np
import gym
import torch.nn as nn
from gym import spaces
from stable_baselines3 import PPO
from stable_baselines3.common.policies import ActorCriticPolicy
from transformers import BertModel

In [None]:
device = "cuda" if torch.cuda.is_available() else "cpu"
torch.cuda.empty_cache()
torch.cuda.ipc_collect()
torch.cuda.set_per_process_memory_fraction(0.75, device = 0)
warnings.filterwarnings('ignore')
torch.manual_seed(52)
np.random.seed(52)

In [None]:
SEQUENCE_LENGTH = 100
SEQUENCE_LENGTH = 100
N_EPISODES = 500
LEARNING_RATE = 3e-4
TRAIN_DIR = "../src/train_denoised/"
TEST_DIR = "../src/test_denoised/"

MODEL_SAVE_PATH = "../models/ppo_transformer_model.pkl"

In [None]:
class TransformerFeatureExtractor(nn.Module):
    def __init__(self, input_dim, hidden_dim):
        super(TransformerFeatureExtractor, self).__init__()
        self.transformer = BertModel.from_pretrained("bert-base-uncased")
        self.fc = nn.Linear(hidden_dim, hidden_dim)

    def forward(self, x):
        x = self.transformer(x).last_hidden_state
        x = self.fc(x[:, -1, :])
        return x

class CustomTransformerPolicy(ActorCriticPolicy):
    def __init__(self, *args, **kwargs):
        super(CustomTransformerPolicy, self).__init__(*args, **kwargs)
        self.features_dim = kwargs["features_extractor_kwargs"]["features_dim"]
        self.feature_extractor = TransformerFeatureExtractor(self.features_dim, 128)
        self.fc = nn.Linear(128, self.features_dim)

    def forward(self, features):
        transformer_out = self.feature_extractor(features)
        return self.fc(transformer_out)

In [None]:
class TimeSeriesEnv(gym.Env):
    def __init__(self, data, labels):
        super(TimeSeriesEnv, self).__init__()
        self.data = data
        self.labels = labels
        self.current_step = 0
        self.observation_space = spaces.Box(low=0.0, high=1.0, shape=(SEQUENCE_LENGTH, 1), dtype=np.float32)
        self.action_space = spaces.MultiDiscrete([3] * SEQUENCE_LENGTH) 

    def reset(self):
        self.current_step = 0
        return np.array(self.data[self.current_step], dtype=np.float32)

    def step(self, actions):
        true_labels = self.labels[self.current_step]
        reward = sum([1 if actions[i] == true_labels[i] else -1 for i in range(SEQUENCE_LENGTH)])
        self.current_step += 1
        done = self.current_step >= len(self.data) - 1
        obs = np.array(self.data[self.current_step], dtype=np.float32)
        return obs, reward, done, {}

In [None]:
def load_data_from_dir(directory):
    all_sequences, all_labels = [], []
    for file_name in os.listdir(directory):
        file_path = os.path.join(directory, file_name)
        df = pd.read_csv(file_path, sep="\\s+", names=["time", "pressure", "label"])
        if df.empty or "pressure" not in df:
            continue
        
        for i in range(len(df) - SEQUENCE_LENGTH):
            seq = df["pressure"].iloc[i: i + SEQUENCE_LENGTH].values
            label_seq = df["label"].iloc[i: i + SEQUENCE_LENGTH].values
            all_sequences.append(seq)
            all_labels.append(label_seq)

    return np.array(all_sequences), np.array(all_labels)

In [None]:
train_data, train_labels = load_data_from_dir(TRAIN_DIR)
test_files = [os.path.join(TEST_DIR, file) for file in os.listdir(TEST_DIR)]


train_env = TimeSeriesEnv(train_data, train_labels)

policy_kwargs = dict(
    features_extractor_class=CustomTransformerPolicy,
    features_extractor_kwargs=dict(features_dim=128),
)

In [None]:
model = PPO("MlpPolicy", train_env, learning_rate=LEARNING_RATE, verbose=1, device=device, policy_kwargs=policy_kwargs)
model.learn(total_timesteps=N_EPISODES * len(train_data))

In [None]:
model.save(MODEL_SAVE_PATH)

# Инференс

In [None]:
from collections import Counter
loaded_model = PPO.load(MODEL_SAVE_PATH, device=device)

In [None]:
predicted_dfs = []

for test_file in test_files:
    df_test = pd.read_csv(test_file, sep="\\s+", names=["time", "pressure"])
    test_sequences = [df_test["pressure"].iloc[i: i + SEQUENCE_LENGTH].values for i in range(len(df_test) - SEQUENCE_LENGTH)]
    test_sequences = np.array(test_sequences)
    all_predictions = [[] for _ in range(len(df_test))]
    for start in range(len(test_sequences)):
        pred_classes, _ = loaded_model.predict(test_sequences[start])
        
        for i in range(SEQUENCE_LENGTH):
            if start + i < len(df_test):  
                all_predictions[start + i].append(pred_classes[i])

    final_classes = np.zeros(len(df_test))
    for i in range(len(all_predictions)):
        if all_predictions[i]:  
            final_classes[i] = Counter(all_predictions[i]).most_common(1)[0][0]  
        elif i > 0:  
            final_classes[i] = final_classes[i - 1] 

    df_test["prediction"] = final_classes
    predicted_dfs.append(df_test)