In [1]:
import numpy as np
import cv2
from glob import glob
import pandas as pd
import matplotlib.pyplot as plt

# Parsuj dane i podziel na zbiór treningowy i testowy

In [2]:
process_data = False

main_keys = [('2021-05-08', 'train'), ('2021-05-27', 'test')]

keys = ['manipulowane', 'straty', 'zaklocajace', 'zaklocane']
data = pd.read_excel('./data/zmienne.xlsx', names=['a', 'b', 'c']).drop(columns=['a'])
mappsy = {'001NIR0SZR0.daca.pv': 'Straty łącznie'}
for ind, row in data.iterrows():
    mappsy[row.c] = row.b

if process_data:
    for dataset, tset in main_keys:
        for k in keys:
            data = sorted(glob(f'data/dane/{dataset}/{k}*'))
            df_list = []
            for d in data:
                df = pd.read_csv(d)
                df_list.append(df)
            merged_df = pd.concat(df_list)
            merged_df.to_csv(f'data/{tset}_{k}.csv')

        df_manipulowane = pd.read_csv(f'data/{tset}_manipulowane.csv', index_col='Czas', parse_dates=True)
        df_manipulowane = df_manipulowane.drop(columns=['Unnamed: 0', 'Unnamed: 5'])
        # df_manipulowane.columns = ['mani_' + k for k in df_manipulowane.columns]

        df_straty = pd.read_csv(f'data/{tset}_straty.csv', index_col='czas', parse_dates=True)
        df_straty = df_straty.drop(columns=['Unnamed: 0'])
        # df_straty.columns = ['straty_' + k for k in df_straty.columns]

        df_zaklocajace = pd.read_csv(f'data/{tset}_zaklocajace.csv', index_col='Czas', parse_dates=True)
        df_zaklocajace = df_zaklocajace.drop(columns=['Unnamed: 0'])
        # df_zaklocajace.columns = ['zaklucaj_' + k for k in df_zaklocajace.columns]

        df_zaklocane = pd.read_csv(f'data/{tset}_zaklocane.csv', index_col='Czas', parse_dates=True)
        df_zaklocane = df_zaklocane.drop(columns=['Unnamed: 0', 'Unnamed: 5'])
        # df_zaklocane.columns = ['zaklucan_' + k for k in df_zaklocane.columns]

        m = pd.merge(df_manipulowane, df_straty, left_index=True, right_index=True)
        m = pd.merge(m, df_zaklocajace, left_index=True, right_index=True)
        m = pd.merge(m, df_zaklocane, left_index=True, right_index=True)

        m.columns = m.columns.map(mappsy)

        m.to_hdf(f'data/{tset}_merged.hdf', 'kej')

df_train = pd.read_hdf(f'data/train_merged.hdf')
df_test = pd.read_hdf(f'data/test_merged.hdf')

In [3]:
straty_columns = df_train.columns[[4,5,6,7,8,9]]
zadane_columns = df_train.columns[[0,1,2,3]]
zaklucane_columns = df_train.columns[10:]

# Trenowanie modelu świata

In [4]:
X_train = df_train.drop(columns=straty_columns)
y_train = df_train[straty_columns]

X_test = df_test.drop(columns=straty_columns)
y_test = df_test[straty_columns]

In [5]:
from lightgbm import LGBMRegressor
from sklearn.multioutput import MultiOutputRegressor
import joblib

train_world = False

if train_world:
    world = MultiOutputRegressor(LGBMRegressor())
    world.fit(X_train.values[::10][:-1], y_train.values[::10][1:])
    
    joblib.dump(world, 'world.pkl')
    
world = joblib.load('world.pkl')

print('Train: ',world.score(X_train.values[::10][:-1], y_train.values[::10][1:]))
print('Test: ', world.score(X_test.values[::10][:-1], y_test.values[::10][1:]))

Train:  0.9541211486219704
Test:  -0.03679137153041164


# Regulator z ograniczeniami

In [6]:
class Regulator:
    def __init__(self):
        # przepływ powietrza
        self.air_max_regulation = (1900, 3500)
        self.air_max_step = 800 # 
        self.air_min_time = 10 # sec
    
        # zawartość tlenu
        self.oxy_max_regulation = (65, 81)
        self.oxy_max_step = 2
        self.oxy_min_time = 150
        
        # dmuch
        self.puff_max_regulation = (40, 70)
        self.puff_max_step = 10
        self.puff_min_time = 5
        
        #pyły
        self.dust_max_regulation = (13, 27)
        self.dust_max_step = 13
        self.dust_min_time = 5*60
    
    def __call__(self, prev, new):
        prev_air, prev_oxy, prev_puff, prev_dust = prev
        new_air, new_oxy, new_puff, new_dust = new
        
        dest_air = prev_air
        dest_oxy = prev_oxy
        dest_puff = prev_puff
        dest_dust = prev_dust
        
        dest_air += np.min([np.abs(new_air), self.air_max_step]) * np.sign(new_air)
        dest_oxy += np.min([np.abs(new_oxy), self.oxy_max_step]) * np.sign(new_oxy)
        dest_puff += np.min([np.abs(new_puff), self.puff_min_time]) * np.sign(new_puff) 
        dest_dust += np.min([np.abs(new_dust), self.dust_min_time]) * np.sign(new_dust) 
        
        dest_air = np.clip(dest_air, *self.air_max_regulation)
        dest_oxy = np.clip(dest_oxy, *self.oxy_max_regulation)
        dest_puff = np.clip(dest_puff, *self.puff_max_regulation)
        dest_dust = np.clip(dest_dust, *self.dust_max_regulation)
        
        return dest_air, dest_oxy, dest_puff, dest_dust

reg = Regulator()
reg([1900,0,0, 23], [900,0,0,13])

(2700, 65, 40, 27)

# Samooptymalizujący się model

In [63]:
import math
import random
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
from collections import namedtuple, deque
from itertools import count
from PIL import Image
from tqdm import trange

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F

In [70]:
# class DQN(nn.Module):

#     def __init__(self):
#         super(DQN, self).__init__()
#         self.head = nn.Linear(22, 4)

#     def forward(self, x):
#         x = torch.tensor(x, dtype=torch.float32)
#         return self.head(x)
# network = DQN()

network = torch.nn.Sequential(
    nn.Linear(22, 4)
)

In [71]:
optimizer = optim.RMSprop(network.parameters())

# TODO
criterion = nn.SmoothL1Loss()
# TODO

In [72]:
straty_columns = df_train.columns[[4,5,6,7,8,9]]
zadane_columns = df_train.columns[[0,1,2,3]]
zaklucane_columns = df_train.columns[10:]

In [78]:
steps = 1000
epochs = 400

state = df_train.iloc[0:1].copy()

for e in range(epochs):
    t = trange(steps, desc=f'{e+1}/{epochs} - Loss: ', leave=True)
    losses = []
    
    for i in t:

        without_zadane = state.drop(columns=zadane_columns)
        same_zadane = state[zadane_columns]

        mani_out = network(torch.tensor(without_zadane.values, dtype=torch.float32))

        mani_out_cpu = mani_out.detach().clone().numpy()
        mani_out_cpu = reg(same_zadane.values[0], mani_out_cpu[0])
        mani_out_cpu = pd.DataFrame([mani_out_cpu], columns=zadane_columns)

        new_state = state.copy()
        new_state[zadane_columns] = mani_out_cpu

        new_straty = world.predict(new_state.drop(columns=straty_columns))
        new_straty = pd.DataFrame(new_straty, columns=straty_columns)
        
        # TODO
        loss = criterion(torch.from_numpy(new_state[straty_columns].values), torch.from_numpy(new_straty.values))
        # TODO
        
        losses.append(loss.numpy())
        
        t.set_description(f'{e+1}/{epochs} - Loss: {np.mean(losses)}')
        t.refresh() # to show immediately the update

        optimizer.zero_grad()
        
        # TODO
    #     loss.backward()
    #     optimizer.step()
        # TODO

        k = np.random.choice(np.arange(df_train.shape[0]))
        state = df_train.iloc[k:k+1].copy()

1/400 - Loss: 0.12714003552980827: 100%|██████████| 1000/1000 [00:12<00:00, 81.14it/s]
2/400 - Loss: 0.1367276575654923: 100%|██████████| 1000/1000 [00:11<00:00, 86.57it/s]
3/400 - Loss: 0.1307675565763089: 100%|██████████| 1000/1000 [00:12<00:00, 82.38it/s]
4/400 - Loss: 0.12739864967608464:  16%|█▌        | 156/1000 [00:01<00:10, 80.72it/s]


KeyboardInterrupt: 