In [8]:
import os
import torch
import warnings
import pandas as pd
import numpy as np
import gym
import torch.nn as nn
from gym import spaces
from torch.utils.data import DataLoader, Dataset
from stable_baselines3 import PPO
from stable_baselines3.common.vec_env import DummyVecEnv
from stable_baselines3.common.torch_layers import BaseFeaturesExtractor
from stable_baselines3.common.vec_env import VecNormalize
from stable_baselines3.common.callbacks import StopTrainingOnMaxEpisodes

In [9]:
if torch.cuda.is_available():
    device = "cuda"
    torch.cuda.empty_cache()
    torch.cuda.ipc_collect()
    torch.cuda.set_per_process_memory_fraction(0.75, device=0) 
    
else:
    device = "cpu"

warnings.filterwarnings('ignore')
torch.manual_seed(52)
np.random.seed(52)

In [10]:
SEQUENCE_LENGTH = 100
BATCH_SIZE = 64
N_EPISODES = 500
LEARNING_RATE = 3e-4
TRAIN_DIR = "../src/train_denoised/"
TEST_DIR = "../src/test_denoised/"
MODEL_SAVE_PATH = "../models/ppo_rnn_model.pkl"

# Структура модели & обучение

In [11]:
class TimeSeriesDataset(Dataset):
    def __init__(self, directory):
        self.files = [os.path.join(directory, file) for file in os.listdir(directory)]
        self.data = []

        for file_path in self.files:
            df = pd.read_csv(file_path, sep="\\s+", names=["time", "pressure"])
            if df.empty or "pressure" not in df:
                continue
            
            for i in range(len(df) - SEQUENCE_LENGTH):
                self.data.append(df["pressure"].iloc[i: i + SEQUENCE_LENGTH].values.astype(np.float32))

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        return self.data[idx]

In [13]:
train_dataset = TimeSeriesDataset(TRAIN_DIR)
train_dataloader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)

In [17]:
class RNNFeatureExtractor(BaseFeaturesExtractor):
    def __init__(self, observation_space: spaces.Box, hidden_dim=128):
        super(RNNFeatureExtractor, self).__init__(observation_space, features_dim=hidden_dim)
        self.lstm = nn.LSTM(input_size=1, hidden_size=hidden_dim, batch_first=True)
        self.fc = nn.Linear(hidden_dim, hidden_dim)

    def forward(self, x):
        x, _ = self.lstm(x)
        x = self.fc(x[:, -1, :])  
        return x

class TimeSeriesEnv(gym.Env):
    def __init__(self, dataloader):
        super(TimeSeriesEnv, self).__init__()
        self.dataloader = iter(dataloader)
        self.current_batch = next(self.dataloader)
        self.batch_idx = 0  
        self.observation_space = spaces.Box(low=-1.0, high=1.0, shape=(SEQUENCE_LENGTH, 1), dtype=np.float32)
        self.action_space = spaces.Discrete(3)  

    def reset(self):
        try:
            self.current_batch = next(self.dataloader)  
        except StopIteration:
            self.dataloader = iter(train_dataloader)  
            self.current_batch = next(self.dataloader)

        self.batch_idx = 0  
        return self.current_batch[self.batch_idx].reshape(SEQUENCE_LENGTH, 1).cpu().numpy() 

    def step(self, action):
        reward = np.random.uniform(-1, 1)  
        self.batch_idx += 1  

        if self.batch_idx >= BATCH_SIZE:  
            done = True
        else:
            done = False

        obs = self.current_batch[self.batch_idx].reshape(SEQUENCE_LENGTH, 1).cpu().numpy() if not done else self.reset() 
        return obs, reward, done, {}

In [18]:
train_env = DummyVecEnv([lambda: TimeSeriesEnv(train_dataloader)])
train_env = VecNormalize(train_env, norm_obs=True, norm_reward=True, clip_obs=10.0)

policy_kwargs = dict(
    features_extractor_class=RNNFeatureExtractor,
    features_extractor_kwargs=dict(hidden_dim=128),
)

model = PPO(
    policy="MlpPolicy",
    env=train_env,
    learning_rate=2.5e-4, 
    n_epochs=3,
    ent_coef=0.01,  
    target_kl=0.005,  
    clip_range=0.15,
    verbose=1,
    device=device,
    policy_kwargs=policy_kwargs
)

callback = StopTrainingOnMaxEpisodes(max_episodes=1000, verbose=1)

model.learn(total_timesteps=N_EPISODES * len(train_dataloader), callback=callback)

Using cpu device
-----------------------------
| time/              |      |
|    fps             | 430  |
|    iterations      | 1    |
|    time_elapsed    | 4    |
|    total_timesteps | 2048 |
-----------------------------
-------------------------------------------
| time/                   |               |
|    fps                  | 324           |
|    iterations           | 2             |
|    time_elapsed         | 12            |
|    total_timesteps      | 4096          |
| train/                  |               |
|    approx_kl            | 2.5591842e-05 |
|    clip_fraction        | 0             |
|    clip_range           | 0.15          |
|    entropy_loss         | -1.1          |
|    explained_variance   | -0.00586      |
|    learning_rate        | 0.00025       |
|    loss                 | 0.24          |
|    n_updates            | 3             |
|    policy_gradient_loss | 7.58e-05      |
|    value_loss           | 0.65          |
-------------------------

KeyboardInterrupt: 

In [None]:
model.save(MODEL_SAVE_PATH)

# Инференс

## Предсказание, формирование submit

In [None]:
from collections import Counter

In [None]:
SUBMIT_INPUT_PATH = "../src/raw_data/submit.csv"
SUBMIT_OUTPUT_PATH = "../src/submit.csv"
MODEL_SAVE_PATH = "../models/ppo_transformer_model.pkl"
TEST_DIR = "../src/test_denoised/"
SEQUENCE_LENGTH = 100

In [None]:
loaded_model = PPO.load(MODEL_SAVE_PATH, device = device)

In [None]:
def extract_intervals(predictions, time_series):
    intervals = []
    start = None

    for i in range(len(predictions) - 1):
        if predictions[i] == predictions[i + 1]:  
            if start is None:  
                start = time_series[i]
        else:
            if start is not None:
                end = time_series[i]
                intervals.append([start, end])
                start = None  

    if start is not None:
        intervals.append([start, time_series[-1]])

    return intervals

submit_df = pd.read_csv(SUBMIT_INPUT_PATH, names=["file", "recovery", "drop"])

predicted_dfs = []
results = []

for _, row in submit_df.iterrows():
    file_name = row["file"]
    file_path = os.path.join(TEST_DIR, file_name)

    if not os.path.exists(file_path):
        print(f"File {file_name} not found")
        results.append([file_name, [], []])
        continue

    df_test = pd.read_csv(file_path, sep="\\s+", names=["time", "pressure"])

    test_sequences = np.array([
        df_test["pressure"].iloc[i: i + SEQUENCE_LENGTH].values.reshape(SEQUENCE_LENGTH, 1)
        for i in range(len(df_test) - SEQUENCE_LENGTH)
    ], dtype=np.float32)

    all_predictions = [[] for _ in range(len(df_test))]

    for start in range(len(test_sequences)):
        obs = test_sequences[start][None, :, :] 
        pred_class, _ = loaded_model.predict(obs, deterministic=True)

        for i in range(SEQUENCE_LENGTH):
            if start + i < len(df_test):
                all_predictions[start + i].append(pred_class)


    final_classes = np.zeros(len(df_test))

    for i in range(len(all_predictions)):
        if all_predictions[i]:  
            final_classes[i] = Counter(all_predictions[i]).most_common(1)[0][0]  
        elif i > 0:  
            final_classes[i] = final_classes[i - 1]  

    df_test["prediction"] = final_classes
    predicted_dfs.append(df_test)

    recovery_intervals = extract_intervals(final_classes == 2, df_test["time"].values)
    drop_intervals = extract_intervals(final_classes == 1, df_test["time"].values)

    results.append([file_name, recovery_intervals, drop_intervals])

In [None]:
submit_output_df = pd.DataFrame(results, columns=["file", "recovery", "drop"])
submit_output_df.to_csv(SUBMIT_OUTPUT_PATH, index=False)

## Оценка предсказаний

дальше нужно проанализировать predicted_dfs