# Predicting Capital Calls & Distributions in Commitment-Based Funds using LSTM
This notebook builds a multi-step forecast model using LSTM to predict capital calls and distributions.

In [None]:
# Install dependencies (only required for local/Jupyter)
# !pip install pandas numpy scikit-learn matplotlib torch
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from sklearn.preprocessing import MinMaxScaler
import matplotlib.pyplot as plt
from sklearn.metrics import mean_absolute_error, mean_squared_error

In [None]:
# Google Colab: Upload the CSV file
from google.colab import files
uploaded = files.upload()
import io
df = pd.read_csv(io.BytesIO(uploaded['synthetic_fund_cashflows.csv']))

features = ['Capital_Called', 'Distribution', 'NAV', 'Fund_Age_Qtrs', 'Committed_Capital']
scaler = MinMaxScaler()
df[features] = scaler.fit_transform(df[features])
df = pd.get_dummies(df, columns=['Strategy', 'Geography'])

In [None]:
# Sequence Preparation
sequence_length = 8
forecast_horizon = 3
fund_ids = df['Fund_ID'].unique()
sequences, targets = [], []
for fund in fund_ids:
    fund_data = df[df['Fund_ID'] == fund].sort_values(by='Quarter')
    fund_values = fund_data.drop(columns=['Fund_ID', 'Quarter', 'Vintage_Year']).values
    for i in range(len(fund_values) - sequence_length - forecast_horizon):
        seq = fund_values[i:i+sequence_length]
        target = fund_values[i+sequence_length:i+sequence_length+forecast_horizon, 0:2].flatten()
        sequences.append(seq)
        targets.append(target)
X = np.array(sequences)
y = np.array(targets)

In [None]:
# Dataset & Model
class FundDataset(Dataset):
    def __init__(self, X, y):
        self.X = torch.tensor(X, dtype=torch.float32)
        self.y = torch.tensor(y, dtype=torch.float32)
    def __len__(self):
        return len(self.X)
    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]

class LSTMModel(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super().__init__()
        self.lstm = nn.LSTM(input_size, hidden_size, batch_first=True)
        self.linear = nn.Linear(hidden_size, output_size)
    def forward(self, x):
        out, _ = self.lstm(x)
        return self.linear(out[:, -1, :])

In [None]:
# Train Model
dataset = FundDataset(X, y)
loader = DataLoader(dataset, batch_size=16, shuffle=True)
model = LSTMModel(input_size=X.shape[2], hidden_size=64, output_size=y.shape[1])
loss_fn = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
for epoch in range(30):
    for batch_X, batch_y in loader:
        optimizer.zero_grad()
        output = model(batch_X)
        loss = loss_fn(output, batch_y)
        loss.backward()
        optimizer.step()

In [None]:
# Evaluate
model.eval()
with torch.no_grad():
    X_tensor = torch.tensor(X, dtype=torch.float32)
    predictions = model(X_tensor).numpy()
mae = mean_absolute_error(y, predictions)
rmse = mean_squared_error(y, predictions, squared=False)
print(f"MAE: {mae:.4f}, RMSE: {rmse:.4f}")