<a href="https://colab.research.google.com/github/ManjuRama/MSFE/blob/main/VarModel.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import numpy as np
from statsmodels.tsa.api import VAR
from sklearn.metrics import mean_squared_error
import matplotlib.pyplot as plt

# Example DataFrame (ensure your real dataset is properly loaded here)
# Assuming you have a dataset with columns: 'volumelastminute', 'volumelasthour', 'spread', 'midprice', 'nexthourvolume'
# Load your dataset as a pandas DataFrame
# df = pd.read_csv('your_dataset.csv')

# For example, generating synthetic data for demonstration purposes:
np.random.seed(42)
n = 1000  # number of time steps
data = {
    'volumelastminute': np.random.rand(n) * 100,
    'volumelasthour': np.random.rand(n) * 500,
    'spread': np.random.rand(n) * 5,
    'midprice': np.random.rand(n) * 1000,
    'nexthourvolume': np.random.rand(n) * 600
}
df = pd.DataFrame(data)

# Lag selection: for VAR, we need to determine how many lags to include
# We'll keep 1 lag for this example (VAR(1))
df_lagged = df.shift(1).dropna()

# Split data into train and test sets
train_size = int(len(df_lagged) * 0.8)  # 80% for training, 20% for testing
train_data = df_lagged[:train_size]
test_data = df_lagged[train_size:]

# Fit the VAR model using the training data
model = VAR(train_data)
var_model = model.fit(maxlags=1)  # Fit the model with 1 lag (you can tune this by testing multiple lags)

# Make predictions using the test data
predictions = var_model.forecast(train_data.values[-1:], steps=len(test_data))

# Convert predictions into a DataFrame for easier comparison
predicted_df = pd.DataFrame(predictions, index=test_data.index, columns=train_data.columns)

# Evaluate the model by comparing the predicted 'nexthourvolume' with the actual values
actual_volume = test_data['nexthourvolume'].values
predicted_volume = predicted_df['nexthourvolume'].values

# Calculate the Mean Squared Error (MSE) for the predictions
mse = mean_squared_error(actual_volume, predicted_volume)
print(f"Mean Squared Error: {mse}")

# Plot actual vs predicted 'nexthourvolume'
plt.figure(figsize=(10,6))
plt.plot(test_data.index, actual_volume, label='Actual nexthourvolume', color='blue')
plt.plot(predicted_df.index, predicted_volume, label='Predicted nexthourvolume', color='red')
plt.xlabel('Time')
plt.ylabel('nexthourvolume')
plt.legend()
plt.title('Actual vs Predicted nexthourvolume')
plt.show()
