<a href="https://colab.research.google.com/github/ManjuRama/FinMath/blob/main/fx.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Import necessary libraries
import pandas as pd
import numpy as np
from statsmodels.tsa.api import VAR
from sklearn.metrics import mean_absolute_percentage_error
import matplotlib.pyplot as plt

# Load the dataset (replace with your actual file paths)
data_usdchf = pd.read_csv('USDCHF_data.csv', parse_dates=['timestamp'], index_col='timestamp')
data_gbpusd = pd.read_csv('GBPUSD_data.csv', parse_dates=['timestamp'], index_col='timestamp')
data_eurusd = pd.read_csv('EURUSD_data.csv', parse_dates=['timestamp'], index_col='timestamp')

# Function to preprocess and prepare the data for VAR model
def prepare_data_for_VAR(data, target_col, lag_hours=1):
    """
    Prepare data for VAR modeling.
    - data: FX pair data containing volume, spread, mid quote
    - target_col: column name of the target variable (next hour volume)
    - lag_hours: number of hours to lag the data (default is 1)
    """
    # Create lag features
    data_lagged = data.copy()
    for i in range(1, lag_hours * 60 + 1):  # Create lag for each minute in the past hour
        data_lagged[f'volume_lag_{i}'] = data_lagged['volume_last_minute'].shift(i)
        data_lagged[f'spread_lag_{i}'] = data_lagged['spread'].shift(i)
        data_lagged[f'midquote_lag_{i}'] = data_lagged['mid_quote'].shift(i)

    # Drop rows with NaN values due to shifting
    data_lagged.dropna(inplace=True)

    # Separate into train and test sets
    train_size = int(0.8 * len(data_lagged))
    train_data = data_lagged[:train_size]
    test_data = data_lagged[train_size:]

    # Features and target (next hour volume)
    X_train = train_data.drop(columns=[target_col])
    y_train = train_data[target_col]
    X_test = test_data.drop(columns=[target_col])
    y_test = test_data[target_col]

    return X_train, y_train, X_test, y_test

# Function to fit VAR model and predict next hour volume
def fit_VAR_and_predict(X_train, y_train, X_test, y_test):
    """
    Fit VAR model and predict next hour volume.
    - X_train, y_train: Training data
    - X_test, y_test: Test data
    """
    # Combine features and target into one dataframe for VAR modeling
    train_data = pd.concat([X_train, y_train], axis=1)
    model = VAR(train_data)

    # Fit the model
    model_fitted = model.fit(maxlags=60, ic='aic')

    # Generate predictions
    lag_order = model_fitted.k_ar
    forecast_input = train_data.values[-lag_order:]
    forecast = model_fitted.forecast(y=forecast_input, steps=len(X_test))

    # Convert forecasted results into a DataFrame
    forecast_df = pd.DataFrame(forecast, index=X_test.index, columns=train_data.columns)

    # Calculate MAPE
    mape = mean_absolute_percentage_error(y_test, forecast_df[y_test.name])

    return forecast_df[y_test.name], mape

# Prepare and predict for each FX pair
for pair, data in [('USDCHF', data_usdchf), ('GBPUSD', data_gbpusd), ('EURUSD', data_eurusd)]:
    print(f'Predicting for {pair}...')

    # Prepare data
    X_train, y_train, X_test, y_test = prepare_data_for_VAR(data, target_col='volume_last_hour')

    # Fit VAR model and predict
    predicted_volume, mape = fit_VAR_and_predict(X_train, y_train, X_test, y_test)

    print(f'{pair} - MAPE: {mape:.4f}')

    # Optional: plot actual vs predicted
    plt.figure(figsize=(10, 6))
    plt.plot(y_test.index, y_test, label='Actual Volume')
    plt.plot(predicted_volume.index, predicted_volume, label='Predicted Volume', linestyle='--')
    plt.title(f'{pair} - Actual vs Predicted Next Hour Volume')
    plt.legend()
    plt.show()
