In [12]:
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
from torch import optim
from ucimlrepo import fetch_ucirepo
from sklearn.preprocessing import MinMaxScaler, LabelEncoder

import torch
import torch.nn as nn
from torch.autograd import Variable
from torch.utils.data import DataLoader, TensorDataset
from sklearn.metrics import r2_score, mean_squared_error, root_mean_squared_error
import warnings
warnings.filterwarnings("ignore")

## Подготовка датасета

### Кодирование строковых признаков

In [13]:
steel_industry_energy_consumption = fetch_ucirepo(id=851)

# data (as pandas dataframes)
X = steel_industry_energy_consumption.data.features
y = steel_industry_energy_consumption.data.targets

encoder = LabelEncoder()
encoder.fit_transform(X['WeekStatus'].to_frame())
X['WeekStatus'] = encoder.transform(X['WeekStatus'].to_frame())

encoder = LabelEncoder()
encoder.fit_transform(X['Day_of_week'].to_frame())
X['Day_of_week'] = encoder.transform(X['Day_of_week'].to_frame())

encoder = LabelEncoder()
encoder.fit_transform(y['Load_Type'].to_frame())
y['Load_Type'] = encoder.transform(y['Load_Type'].to_frame())

dataset = X
dataset['Load_Type'] = y['Load_Type']
dataset.to_csv('kw_dataset.csv', index=False) # сохраняем на случай если будет нужно вернуться от заскейленных значений к исходным

### Нормализация данных

In [15]:
dataset = pd.read_csv('kw_dataset.csv')

for column in dataset.columns:
    scaler = MinMaxScaler()
    scaler.fit_transform(dataset[column].to_frame())
    dataset[column] = scaler.transform(dataset[column].to_frame())

dataset.head()

### Подготовка обучающей и тестовой выборок

In [16]:
X = dataset.drop(columns=['Usage_kWh'])
y = dataset['Usage_kWh'].values

X = np.expand_dims(X.to_numpy().astype(float), axis=1)
y = np.expand_dims(y.astype(float), axis=1)
split_ratio = int(y.shape[0]*0.8)
X_train = X[:split_ratio].astype(float)
X_test = X[split_ratio:].astype(float)
y_train = y[:split_ratio].astype(float)
y_test = y[split_ratio:].astype(float)

print(f'X_train.shape={X_train.shape}')
print(f'X_test.shape={X_test.shape}')
print(f'y_train.shape={y_train.shape}')
print(f'y_test.shape={y_test.shape}')

### Подготовка обучающего датасета и лоадера

In [17]:
dataset = TensorDataset(torch.Tensor(X_train), torch.Tensor(y_train))
dataloader = DataLoader(dataset, batch_size=100, shuffle=False)
print('Loader created')

## Реализация моделей - RNN, GRU, LSTM

In [19]:
class RNNModel(nn.Module):
    def __init__(self, input_dim, hidden_dim, layer_dim, output_dim):
        super(RNNModel, self).__init__()
        self.hidden_dim = hidden_dim
        self.layer_dim = layer_dim
        self.rnn = nn.RNN(input_dim, hidden_dim, layer_dim, batch_first=True, nonlinearity='relu')
        self.fc = nn.Linear(hidden_dim, output_dim)

    def forward(self, x):
        h0 = Variable(torch.zeros(self.layer_dim, x.size(0), self.hidden_dim)).to(device)
        out, hn = self.rnn(x, h0)
        out = self.fc(out[:, -1, :])
        return out

class GRU(nn.Module):
    def __init__(self, input_dim, hidden_dim, layer_dim, output_dim):
        super(GRU, self).__init__()
        self.hidden_size = hidden_dim
        self.num_layers = layer_dim
        self.gru = nn.GRU(input_dim, hidden_dim, layer_dim, batch_first=True)
        self.fc1 = nn.Linear(hidden_dim, output_dim)

    def forward(self, x):
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(device)
        out, _ = self.gru(x, h0)
        out = out.reshape(out.shape[0], -1)
        out = self.fc1(out)
        return out

class LSTM(nn.Module):
    def __init__(self, input_dim, hidden_dim, layer_dim, output_dim):
        super(LSTM, self).__init__()
        self.hidden_size = hidden_dim
        self.num_layers = layer_dim
        self.lstm = nn.LSTM(input_dim, hidden_dim, layer_dim, batch_first=True)
        self.fc1 = nn.Linear(hidden_dim, output_dim)

    def forward(self, x):
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(device=device)
        c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(device=device)
        out, _ = self.lstm(x, (h0, c0))
        out = out.reshape(out.size(0), -1)
        out = self.fc1(out)
        return out

### Инициализация параметров моделей

In [21]:
input_dim = 9    # input dimension
hidden_dim = 50  # hidden layer dimension
layer_dim = 1     # number of hidden layers
output_dim = 1   # output dimension
device = 'cuda'