LSTM CASHFLOW PREDICTOR

This is a first version of a cash flow predictor. It shows the 

In [None]:
# Installing libraries (run if needed)
!pip install pandas
!pip install numpy

In [None]:
#importing packages
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from tensorflow.keras.optimizers import Adam
import matplotlib.pyplot as plt

In [None]:
#loading data + checking headers/info
data = pd.read_csv('<filename>.csv')
data.info()

In [None]:
data.head()

In [None]:
data.describe()

In [None]:
#show the column names for viewing
print('Columns in the dataset:', data.columns)

In [None]:
#Data checking for debugging:
print(data['Bedrag'].head(20)) #checking 20 rows for data-setup

In [None]:
#Preparing: parsing dates and sorting/aggregate
data['Valutadatum'] = pd.to_datetime(data['Valutadatum'], format='%d-%m-%Y')

In [None]:
#Checking NaN-Values:
print(data[data['Bedrag'].isna()])

In [None]:
#Cleaning column Bedrag
data['Bedrag'] = data['Bedrag'].astype(str).str.replace(',', '.').str.replace(r'[^0-9.-]', '', regex=True)
data['Bedrag'] = pd.to_numeric(data['Bedrag'], errors='coerce')
if data['Bedrag'].isna().any():
    raise ValueError("Some values in 'Bedrag' could not be converted to numeric. Please inspect these rows.")
data = data.dropna(subset=['Valutadatum', 'Bedrag']) #dropping rows with NaN values in valutadatum or bedrag
data = data.sort_values('Valutadatum')

In [None]:
#Aggregate weekly cashflow
data.set_index('Valutadatum', inplace=True)
weekly_data = data['Bedrag'].resample('W').sum()

#checking weekly_data:
print(weekly_data.head(10))

In [None]:
#scaling and converting
scaler = MinMaxScaler(feature_range=(0, 1))
scaled_data = scaler.fit_transform(weekly_data.values.reshape(-1, 1))

def create_sequences(data, sequence_length):
    X, y = [], []
    for i in range(len(data) - sequence_length):
        X.append(data[i:i+sequence_length])
        y.append(data[i+sequence_length])
    return np.array(X), np.array(y)

In [None]:
# setting weeks looking back
sequence_length = 10  
X, y = create_sequences(scaled_data, sequence_length)

In [None]:
#splitting into test/training
split = int(0.8 * len(X))
X_train, X_test = X[:split], X[split:]
y_train, y_test = y[:split], y[split:]

In [None]:
#setting up the model
model = Sequential([
    LSTM(50, activation='relu', return_sequences=True, input_shape=(X_train.shape[1], 1)),
    LSTM(50, activation='relu'),
    Dense(1)
])

model.compile(optimizer=Adam(learning_rate=0.001), loss='mse')

In [None]:
#Training
history = model.fit(X_train, y_train, epochs=50, batch_size=16, validation_data=(X_test, y_test), verbose=1)

In [None]:
#Setting up predictions
predictions = model.predict(X_test)
predictions = scaler.inverse_transform(predictions)
actual = scaler.inverse_transform(y_test)

In [None]:
#Prediction next 10 weeks
future_predictions = []
current_sequence = X_test[-1]  # Start from the last sequence in the test set

for _ in range(10):  # Predict for 10 weeks
    next_pred = model.predict(current_sequence[np.newaxis, :, :])[0, 0]
    future_predictions.append(next_pred)
    # Update the sequence with the predicted value
    current_sequence = np.append(current_sequence[1:], next_pred).reshape(-1, 1)

# Rescale future predictions back to original scale
future_predictions = scaler.inverse_transform(np.array(future_predictions).reshape(-1, 1))

In [None]:
#plotting results
plt.figure(figsize=(14, 7))
plt.plot(weekly_data.index[-len(y_test):], scaler.inverse_transform(y_test), label='Actual')
plt.plot(weekly_data.index[-len(y_test):], predictions, label='Predicted')
plt.title('LSTM Model Predictions')
plt.legend()
plt.show()


In [None]:
#Plotting future cashflow
future_dates = pd.date_range(start=weekly_data.index[-1], periods=11, freq='W')[1:]
plt.figure(figsize=(14, 7))
plt.plot(future_dates, future_predictions, marker='o', label='Future Predictions')
plt.title('Future Cashflow (Next 10 Weeks)')
plt.legend()
plt.show()

In [None]:
#Check overfitting/underfitting
#importing packages
from sklearn.metrics import mean_squared_error

In [None]:
#RSME as indicator for performance, calculating RSME
test_rmse = np.sqrt(mean_squared_error(actual, predictions))
print(f"Test RMSE: {test_rmse:.2f}")

In [None]:
#training set predictions:
train_predictions = model.predict(X_train)
train_predictions = scaler.inverse_transform(train_predictions)
train_actual = scaler.inverse_transform(y_train)

train_rmse = np.sqrt(mean_squared_error(train_actual, train_predictions))
print(f"Train RMSE: {train_rmse:.2f}")

In [None]:
#Plotting training data vs validation loss:
plt.figure(figsize=(14, 7))
plt.plot(history.history['loss'], label='Training Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.title('Training vs Validation Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.show()

In [None]:
#RSME comparison
labels = ['Train RMSE', 'Test RMSE']
values = [train_rmse, test_rmse]
plt.figure(figsize=(10, 5))
plt.bar(labels, values, color=['blue', 'orange'])
plt.title('RMSE Comparison')
plt.ylabel('RMSE')
plt.show()