In [1]:
import warnings
warnings.filterwarnings('ignore')

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from tensorflow.keras.optimizers import Adam
from sklearn.metrics import mean_absolute_error, mean_squared_error

# 1. Загрузка и подготовка данных
print("1. Загрузка и подготовка данных...")
data = pd.read_csv("CupIT_Sber_data.csv", delimiter=";")

data['service_amount_net'] = pd.to_numeric(data['service_amount_net'], errors='coerce')
data['service_date'] = pd.to_datetime(data['service_date'])
data = data.dropna(subset=['service_date', 'service_amount_net'])

# 2. Агрегация по месяцам и логарифмирование
print("\n2. Агрегация данных по месяцам...")
monthly_data = data.resample('M', on='service_date')['service_amount_net'].sum().to_frame('total_payments')
monthly_data = monthly_data[monthly_data['total_payments'] > 0]
monthly_data['log_payments'] = np.log(monthly_data['total_payments'])

# 3. Нормализация данных
scaler = MinMaxScaler(feature_range=(0, 1))
monthly_data['scaled_payments'] = scaler.fit_transform(monthly_data[['log_payments']])

# 4. Подготовка данных для LSTM
def create_sequences(data, seq_length=12):
    sequences, targets = [], []
    for i in range(len(data) - seq_length):
        sequences.append(data[i: i + seq_length])
        targets.append(data[i + seq_length])
    return np.array(sequences), np.array(targets)

seq_length = 12
train_size = int(len(monthly_data) * 0.8)
train_data, test_data = monthly_data.iloc[:train_size]['scaled_payments'].values, monthly_data.iloc[train_size:]['scaled_payments'].values

X_train, y_train = create_sequences(train_data, seq_length)
X_test, y_test = create_sequences(test_data, seq_length)

X_train = np.expand_dims(X_train, axis=-1)
X_test = np.expand_dims(X_test, axis=-1)
print("X_train shape:", X_train.shape)  # Должно быть (N, 12, 3)
print("y_train shape:", y_train.shape)  # Должно быть (N,)
print("X_test shape:", X_test.shape)  # Должно быть (M, 12, 3)
print("y_test shape:", y_test.shape)  # Долж

# 5. Построение и обучение LSTM модели
print("\n5. Обучение LSTM модели...")
model = Sequential([
    LSTM(50, return_sequences=True, input_shape=(seq_length, 1)),
    LSTM(50, return_sequences=False),
    Dense(25, activation='relu'),
    Dense(1)
])

model.compile(optimizer=Adam(learning_rate=0.001), loss='mse')
model.fit(X_train, y_train, epochs=50, batch_size=16, validation_data=(X_test, y_test), verbose=1)

# 6. Прогнозирование
predictions = model.predict(X_test)
predictions = scaler.inverse_transform(predictions)
actual = scaler.inverse_transform(y_test.reshape(-1, 1))

# 7. Визуализация результатов
plt.figure(figsize=(14, 7))
plt.plot(monthly_data.index[train_size + seq_length:], np.exp(actual), label='Фактические значения', color='green', linewidth=2)
plt.plot(monthly_data.index[train_size + seq_length:], np.exp(predictions), label='Прогноз', color='red', linestyle='--', linewidth=2)
plt.title('Прогноз месячных выплат (после обратного преобразования)', fontsize=14)
plt.xlabel('Дата', fontsize=12)
plt.ylabel('Сумма выплат', fontsize=12)
plt.legend()
plt.grid(True, linestyle='--', alpha=0.7)
plt.tight_layout()
plt.show()

# 8. Расчет метрик
mae = mean_absolute_error(np.exp(actual), np.exp(predictions))
rmse = np.sqrt(mean_squared_error(np.exp(actual), np.exp(predictions)))
mape = np.mean(np.abs((np.exp(actual) - np.exp(predictions)) / np.exp(actual))) * 100

print("\nМетрики качества на тестовой выборке:")
print(f"MAE: {mae:.2f}")
print(f"RMSE: {rmse:.2f}")
print(f"MAPE: {mape:.2f}%")

# 9. Прогноз на будущие периоды
future_steps = 12
future_predictions = []
current_input = test_data[-seq_length:].tolist()

for _ in range(future_steps):
    input_seq = np.array(current_input[-seq_length:]).reshape(1, seq_length, 1)
    next_pred = model.predict(input_seq)[0, 0]
    future_predictions.append(next_pred)
    current_input.append(next_pred)

future_predictions = scaler.inverse_transform(np.array(future_predictions).reshape(-1, 1))

plt.figure(figsize=(14, 7))
plt.plot(monthly_data.index, np.exp(monthly_data['log_payments']), label='Исторические данные', linewidth=2)
plt.plot(pd.date_range(start=monthly_data.index[-1], periods=future_steps+1, freq='M')[1:], np.exp(future_predictions), label='Прогноз', color='red', linewidth=2)
plt.title(f'Прогноз на {future_steps} месяцев вперед', fontsize=14)
plt.xlabel('Дата', fontsize=12)
plt.ylabel('Сумма выплат', fontsize=12)
plt.legend()
plt.grid(True, linestyle='--', alpha=0.7)
plt.tight_layout()
plt.show()

# 10. Сохранение прогноза
forecast_df = pd.DataFrame({
    'date': pd.date_range(start=monthly_data.index[-1], periods=future_steps+1, freq='M')[1:],
    'forecast': np.exp(future_predictions).flatten()
})
forecast_df.to_csv('lstm_monthly_forecast.csv', index=False)

KeyboardInterrupt: 