<a href="https://colab.research.google.com/github/Sruthij93/Market-Anomaly-Detection/blob/main/Market_Anomaly_Milestone2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import pandas as pd
import numpy as np
import pickle
import shap
import pickle
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import IsolationForest
from imblearn.over_sampling import SMOTE
from sklearn.metrics import classification_report, accuracy_score
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
import xgboost as xgb
from sklearn.model_selection import GridSearchCV

In [3]:
from sklearn.metrics import roc_auc_score, roc_curve, auc
from sklearn.ensemble import VotingClassifier
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier

In [4]:
!git clone https://github.com/Sruthij93/Market-Anomaly-Detection

Cloning into 'Market-Anomaly-Detection'...
remote: Enumerating objects: 82, done.[K
remote: Counting objects: 100% (82/82), done.[K
remote: Compressing objects: 100% (62/62), done.[K
remote: Total 82 (delta 39), reused 48 (delta 18), pack-reused 0 (from 0)[K
Receiving objects: 100% (82/82), 10.35 MiB | 13.14 MiB/s, done.
Resolving deltas: 100% (39/39), done.


In [5]:
%cd /content/Market-Anomaly-Detection

/content/Market-Anomaly-Detection


In [6]:
sim_data = pd.read_csv('/content/Market-Anomaly-Detection/sim_financial_data.csv')

In [15]:
# Clean the test data and conduct preprocessing

def data_cleaning(test_df):

  model_features = ['XAU BGNL', 'BDIY', 'CRY', 'DXY', 'VIX', 'USGG30YR', 'GT10', 'EONIA', 'GTITL30YR', 'GTJPY30YR', 'LP01TREU', 'MXUS', 'MXEU', 'MXIN']

  # Drop any extra columns not in the model features
  test_df = test_df.loc[:, test_df.columns.intersection(model_features)]


  # Add missing columns with default value
  missing_columns = [col for col in model_features if col not in test_df.columns]

  for col in missing_columns:
      test_df[col] = 0

  # Reorder columns to match model's feature order
  test_df = test_df[model_features]

  # Remove the date column
  if 'Date' in test_df.columns:
    test_df = test_df.drop(columns = ['Date'])

  # Add additional columns of moving averages and simple moving averages
  test_df['VIX_EMA'] = test_df['VIX'].ewm(span=50, adjust=False).mean()
  test_df['BDIY_EMA'] = test_df['BDIY'].ewm(span=50, adjust=False).mean()
  test_df['DXY_EMA'] = test_df['DXY'].ewm(span=50, adjust=False).mean()
  test_df['VIX_MA'] = test_df['VIX'].rolling(window=50).mean()
  test_df['BDIY_MA'] = test_df['BDIY'].rolling(window=50).mean()

  test_df['VIX_MA'] = test_df['VIX_MA'].fillna(test_df['VIX'])
  test_df['BDIY_MA'] = test_df['BDIY_MA'].fillna(test_df['BDIY'])

  return test_df


def data_preprocessing(test_df, model):

  test_df = data_cleaning(test_df)

  # Load the scaler saved in pickle file
  with open('/content/Market-Anomaly-Detection/scaler.pkl', 'rb') as file:
    scaler = pickle.load(file)

  # Scale the data
  test_df_scaled = scaler.fit_transform(test_df)

  if model == 'isolation_forest':
    # Load the PCA saved in pickle file
    with open('/content/Market-Anomaly-Detection/pca_isoforest.pkl', 'rb') as file:
      pca = pickle.load(file)
  else:
    # Load PCA
    with open('/content/Market-Anomaly-Detection/pca.pkl', 'rb') as file:
      pca = pickle.load(file)

  # Apply PCA
  test_df_pca = pca.transform(test_df_scaled)

  # Adding raw features(scaled) for better performance
  test_df_columns = {name: index for index, name in enumerate(test_df.columns)}
  selected_columns = ['VIX', 'VIX_EMA', 'BDIY_EMA', 'DXY_EMA', 'VIX_MA', 'BDIY_MA']
  selected_columns_indices = [test_df_columns[col] for col in selected_columns]

  test_set = np.hstack((test_df_pca, test_df_scaled[:, selected_columns_indices]))

  return test_set




In [10]:
# Test the Isolation Forest model
# sim_data = sim_data.to_frame().T
test_df = data_preprocessing(sim_data, 'isolation_forest')

with open('/content/Market-Anomaly-Detection/isoforest_model.pkl', 'rb') as file:
  isoforest_model = pickle.load(file)

isoforest_predictions = isoforest_model.predict(test_df)

# Print the anomaly score for each row and add anomaly label column to a copy of test_df
isoforest_scores = isoforest_model.score_samples(test_df)
data_copy = sim_data.copy()
data_copy['isoforest_predictions'] = isoforest_scores


https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations


In [12]:
# Test the SVM model
with open('/content/Market-Anomaly-Detection/svm_model.pkl', 'rb') as file:
  svm_model = pickle.load(file)

test_df = data_preprocessing(sim_data, 'svm')
svm_predictions = svm_model.predict(test_df)


https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations


In [16]:
# Test the Voting classifier model
with open('/content/Market-Anomaly-Detection/voting_clf_model.pkl', 'rb') as file:
  voting_clf = pickle.load(file)

test_df = data_preprocessing(sim_data, 'voting')
voting_predictions = voting_clf.predict(test_df)

https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations


In [14]:
voting_predictions= voting_predictions.reshape(-1,1)