In [None]:
import pandas as pd
import yaml
import os


home_path = os.getcwd()
params_file = 'params.yaml'
#initial_data_file = 'initial_data'
params_path = os.path.join(home_path, params_file)
#initial_data_path = os.path.join(home_path, initial_data_file)

params = yaml.load(open(params_path, 'r'), Loader= yaml.SafeLoader)

csv_train_data = params['csv_train_data']
lstm0_units = params['lstm0_units']
lstm1_units = params['lstm1_units']
lstm2_units = params['lstm2_units']
lag = params['lag']
activation = params['activation']
optimizer = params['optimizer']
dense_units = params['dense_units']
dropout_count = params['dropout_count']
epochs = params['epochs']
col_for_train = params['col_for_train']
# batch_size = params['batch_size']

In [None]:
df_all_data = pd.read_csv(csv_train_data)
df_all_data['time'] = pd.to_datetime(df_all_data['time']).apply(lambda x: x.replace(second=0))
df_all_data = df_all_data.sort_values(by='time')
df_all_data['time'] = df_all_data['time'].dt.strftime('%Y-%m-%d %H:%M:%S')
df_all_data = df_all_data.rename(columns={'load_consumption': 'P_l'})

In [None]:
import importlib.util

# Специфицируем путь к файлу Python
path_to_file = '/content/norm_new.py'

# Загрузка модуля
spec = importlib.util.spec_from_file_location("norm_new", path_to_file)
norm_new = importlib.util.module_from_spec(spec)
spec.loader.exec_module(norm_new)

# Теперь можно использовать класс TimeNormalization
tn = norm_new.TimeNormalization()


In [None]:
df_all_data_norm = tn.df_normalize_with_meta(df_all_data)
print(df_all_data_norm)

             P_l   year      week  day_of_week      hour    minute  second  \
0       0.239276  0.750  0.686275     0.166667  0.652174  0.542373     0.0   
1       0.268970  0.750  0.686275     0.166667  0.652174  0.627119     0.0   
2       0.276861  0.750  0.686275     0.166667  0.652174  0.711864     0.0   
3       0.283909  0.750  0.686275     0.166667  0.652174  0.796610     0.0   
4       0.258117  0.750  0.686275     0.166667  0.652174  0.881356     0.0   
...          ...    ...       ...          ...       ...       ...     ...   
134770  0.522720  0.875  0.980392     0.000000  0.391304  0.016949     0.0   
134771  0.542458  0.875  0.980392     0.000000  0.391304  0.101695     0.0   
134772  0.528427  0.875  0.980392     0.000000  0.391304  0.186441     0.0   
134773  0.521900  0.875  0.980392     0.000000  0.391304  0.271186     0.0   
134774  0.524155  0.875  0.980392     0.000000  0.391304  0.355932     0.0   

        hour_sin  hour_cos  day_of_week_sin  day_of_week_cos  w

In [None]:
df = pd.read_csv('https://docs.google.com/spreadsheets/d/e/2PACX-1vRzjnptk4SENCQOEH3cpi2MzpGlYen1v4b8xtE9ENs97_ObR0h2Kk7CZSZoGdNHy9PuVhSjYjTbQ_5I/pub?gid=419625624&single=true&output=csv')

df_all_data_norm[['temperature', 'pressure',
       'dew_point', 'heat_index', 'humidity', 'solar_irradiance', 'uv_index',
       'wind_chill']] = df[['temperature', 'pressure',
       'dew_point', 'heat_index', 'humidity', 'solar_irradiance', 'uv_index',
       'wind_chill']]

In [None]:
date_for_test = '2023-12-10'

date_1 = date_for_test + ' 00:00:00'
date_2 = date_for_test + ' 00:05:00'
start_day = df_all_data[(df_all_data['time'] >= date_1) & (df_all_data['time'] <= date_2)]
start_day_index = start_day.index[0]
df_all_data_norm = df_all_data_norm[:start_day_index + 289]

In [None]:
all_col = df_all_data_norm.columns
print(f'Все доступные колонки - {all_col}')

Все доступные колонки - Index(['P_l', 'year', 'week', 'day_of_week', 'hour', 'minute', 'second',
       'hour_sin', 'hour_cos', 'day_of_week_sin', 'day_of_week_cos',
       'week_sin', 'week_cos', 'is_holiday', 'temperature', 'pressure',
       'dew_point', 'heat_index', 'humidity', 'solar_irradiance', 'uv_index',
       'wind_chill'],
      dtype='object')


In [None]:
diff_cols = all_col.difference(col_for_train)

In [None]:
import pandas as pd
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Bidirectional, TimeDistributed, Conv1D, MaxPooling1D, Flatten, Dropout
import tensorflow as tf
from tensorflow.keras.callbacks import Callback

class SaveBestWeights(Callback):
    def __init__(self):
        super(SaveBestWeights, self).__init__()
        self.best_weights = None
        self.best_loss = float('inf')

    def on_epoch_end(self, epoch, logs=None):
        current_loss = logs.get('loss')
        if current_loss is None:
            return
        if current_loss < self.best_loss:
            self.best_loss = current_loss
            self.best_weights = self.model.get_weights()

# Create an instance of the custom callback
save_best_weights_callback = SaveBestWeights()

# Split a multivariate sequence
def split_sequence(sequence, n_steps):
    X, y = [], []
    for i in range(len(sequence)):
        end_ix = i + n_steps
        if end_ix > len(sequence)-1:
            break
        seq_x, seq_y = sequence[i:end_ix, :], sequence[end_ix, 0]
        X.append(seq_x)
        y.append(seq_y)
    return np.array(X), np.array(y)

def create_x_input(df_train, n_steps):
    df_input = df_train.iloc[len(df_train)-n_steps:]
    x_input = df_input.values
    return x_input

def make_predictions(x_input, x_future):
    predict_values = []
    x_future_len = len(x_future)
    for i in range(x_future_len):
        x_input_tensor = tf.convert_to_tensor(x_input.reshape((1, lag, n_features)), dtype=tf.float32)
        y_predict = model.predict(x_input_tensor, verbose=1)
        predict_values.append(y_predict)
        x_input = np.delete(x_input, (0), axis=1)
        future_lag = x_future[0]
        x_future = np.delete(x_future, 0, axis=0)
        future_lag[0] = y_predict
        x_input = np.append(x_input, future_lag.reshape(1, 1, -1), axis=1)
    return predict_values

columns = col_for_train
print(columns)

df = df_all_data_norm
train_index = len(df) - 288
df_train_all_col = df.loc[:train_index]
df_test_all_col = df.loc[train_index+1:]
df_true_all_col = df_test_all_col.copy()

df = df_all_data_norm[col_for_train]
df_train = df.loc[:train_index]
df_test = df.loc[train_index+1:]
df_true = df_test.copy()

df_test['P_l'] = None
df_forecast = df_test.copy()

values = df_train[columns].values

x_input = create_x_input(df_train, lag)
x_future = df_test.values

X, y = split_sequence(values, lag)
n_features = values.shape[1]

# # Replace NaN and Inf in X
# if np.any(np.isnan(X)):
#     X = np.nan_to_num(X, nan=np.nanmean(X))  # Replace NaN with column mean

# if np.any(np.isinf(X)):
#     max_val = np.finfo(np.float32).max  # Maximum value for float32
#     X[X == np.inf] = max_val
#     X[X == -np.inf] = -max_val

# # Replace NaN and Inf in y
# if np.any(np.isnan(y)):
#     y = np.nan_to_num(y, nan=np.nanmean(y))  # Replace NaN with column mean

# if np.any(np.isinf(y)):
#     max_val = np.finfo(np.float32).max
#     y[y == np.inf] = max_val
#     y[y == -np.inf] = -max_val

# # Check data again
# if np.any(np.isnan(X)) or np.any(np.isinf(X)) or np.any(np.isnan(y)) or np.any(np.isinf(y)):
#     raise ValueError("Data still contains NaN or Inf after attempts to clean.")

# Reshape input to be [samples, time steps, features, channels]
#X = X.reshape((X.shape[0], X.shape[1], n_features, 1))

# Model architecture
model = Sequential()

model.add(Bidirectional(LSTM(lstm0_units, activation=activation, return_sequences=True), input_shape=(lag, n_features)))
model.add(Dropout(0.01))
model.add(Bidirectional(LSTM(lstm1_units, activation=activation, return_sequences=True)))
model.add(Dropout(0.01))
model.add(Bidirectional(LSTM(lstm2_units, activation=activation)))
model.add(Dropout(0.01))
model.add(Dense(100, activation='relu'))
model.add(Dense(1))

# Final layers
model.compile(
    optimizer=optimizer,
    loss='mse',
    metrics=['mse', 'mae']
)

print(X)
print(y)

# Train the model
history = model.fit(X, y, epochs=epochs, verbose=1, callbacks=[save_best_weights_callback])
model.set_weights(save_best_weights_callback.best_weights)

x_input = x_input.reshape((1, lag, n_features))


['P_l', 'year', 'week', 'day_of_week', 'hour', 'minute', 'hour_sin', 'hour_cos', 'day_of_week_sin', 'day_of_week_cos', 'week_sin', 'week_cos']




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



[[[ 0.2392758   0.75        0.68627451 ...  0.6234898  -0.93501624
   -0.35460489]
  [ 0.26897013  0.75        0.68627451 ...  0.6234898  -0.93501624
   -0.35460489]
  [ 0.27686104  0.75        0.68627451 ...  0.6234898  -0.93501624
   -0.35460489]
  [ 0.28390916  0.75        0.68627451 ...  0.6234898  -0.93501624
   -0.35460489]
  [ 0.25811702  0.75        0.68627451 ...  0.6234898  -0.93501624
   -0.35460489]
  [ 0.25153711  0.75        0.68627451 ...  0.6234898  -0.93501624
   -0.35460489]]

 [[ 0.26897013  0.75        0.68627451 ...  0.6234898  -0.93501624
   -0.35460489]
  [ 0.27686104  0.75        0.68627451 ...  0.6234898  -0.93501624
   -0.35460489]
  [ 0.28390916  0.75        0.68627451 ...  0.6234898  -0.93501624
   -0.35460489]
  [ 0.25811702  0.75        0.68627451 ...  0.6234898  -0.93501624
   -0.35460489]
  [ 0.25153711  0.75        0.68627451 ...  0.6234898  -0.93501624
   -0.35460489]
  [ 0.24753179  0.75        0.68627451 ...  0.6234898  -0.93501624
   -0.35460489]]



KeyboardInterrupt: 

In [None]:
predict_values = make_predictions(x_input, x_future)
predict_values = np.array(predict_values).flatten()
df_forecast['P_l'] = predict_values
if len(diff_cols) > 0:
    for col in diff_cols:
        df_forecast[col] = df_true_all_col[col]



In [None]:
df_forecast[col] = df_true_all_col[col]
df_comparative = tn.df_denormalize_with_meta(df_forecast)
df_true = tn.df_denormalize_with_meta(df_true_all_col)

In [None]:
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import pandas as pd


fig_p_l = make_subplots(rows=1, cols=1, subplot_titles=['P_l_real vs P_l_predict'])

fig_p_l.add_trace(go.Scatter(x=df_true['time'], y=df_true['P_l'], mode='lines', name='P_l_real', line=dict(color='blue')), row=1, col=1)
fig_p_l.add_trace(go.Scatter(x=df_comparative['time'], y=df_comparative['P_l'], mode='lines', name='P_l_predict', line=dict(color='orange')), row=1, col=1)
template = "presentation"

fig_p_l.show()

In [None]:
import importlib.util


# Укажите точный путь к файлу calculate_metrics.py
path_to_file = '/content/calculate_metrics.py'

# Загрузка модуля
spec = importlib.util.spec_from_file_location("calculate_metrics", path_to_file)
calculate_metrics = importlib.util.module_from_spec(spec)
spec.loader.exec_module(calculate_metrics)


y_true=df_true['P_l']
y_pred=df_comparative['P_l']

rmse, r2, mae, mape, wmape = calculate_metrics.calculate_metrics(y_true=y_true, y_pred=y_pred)
print(f'RMSE = {rmse}')
print(f'R-squared = {r2}')
print(f'MAE = {mae}')
print(f'MAPE = {mape}')
print(f'WMAPE = {wmape}')

RMSE = 6851.031562003017
R-squared = 0.41713729400074384
MAE = 5293.090503637843
MAPE = 14.138223873119928
WMAPE = 16.01671867528488


In [None]:
import plotly.graph_objects as go
import os
import yaml


home_path_models = os.path.abspath('models')
name_model_dir = f'l0{lstm0_units}_l1{lstm1_units}_dc{dropout_count}_du{dense_units}_a_{activation}_o_{optimizer}_e{epochs}'
model_dir_path = os.path.join(home_path_models, name_model_dir)
os.makedirs(model_dir_path, exist_ok=True)
model_name = f'{name_model_dir}.h5'
model.save(os.path.join(model_dir_path, model_name))

params_file_path = os.path.join(model_dir_path, 'params.yaml')
with open(params_file_path, 'w') as params_file:
    yaml.dump(params, params_file, default_flow_style=False)

fig = go.Figure()
fig.add_trace(go.Scatter(x=list(range(1, len(history.history['loss']) + 1)), y=history.history['loss'], mode='lines'))
fig.update_layout(title='Model Training Loss (Interactive)',
                  xaxis_title='Epoch',
                  yaxis_title='Loss',
                  template='plotly_dark',
                  hovermode='x')

html_file_path = os.path.join(model_dir_path, 'training_loss_plot_interactive.html')
fig.write_html(html_file_path)
print(name_model_dir)