# Introduction

This notebook aims to detect anomalies in financial markets using the Isolation Forest model.

In [139]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.ensemble import IsolationForest
from xgboost import XGBClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import silhouette_score
from sklearn.neighbors import LocalOutlierFactor
from sklearn.metrics import classification_report, confusion_matrix

# Data Cleaning

In [175]:
dataset = pd.read_excel('../data/FinancialMarketData.xlsx', sheet_name='EWS' )
# Moyennes mobiles
dataset['VIX_moving_average'] = dataset['VIX'].rolling(window=7).mean()
dataset['BDIY_moving_average'] = dataset['BDIY'].rolling(window=7).mean()

# Ratios
dataset['DXY_to_VIX'] = dataset['DXY'] / dataset['VIX']

# Différences
dataset['VIX_diff'] = dataset['VIX'].diff()

correlation_matrix = dataset.corr()


low_cor_cols=correlation_matrix['Y'][abs(correlation_matrix['Y'])<0.1].index

#cleaning data
dataset_cleaned=dataset.drop(columns=low_cor_cols)


dataset['Année'] = dataset['Data'].dt.year
dataset['Mois'] = dataset['Data'].dt.month
dataset['Jour'] = dataset['Data'].dt.day
dataset['Jour_semaine'] = dataset['Data'].dt.dayofweek
dataset['Est_weekend'] = dataset['Jour_semaine'].apply(lambda x: 1 if x >= 5 else 0)

if 'Data' in dataset_cleaned.columns:
    dataset_cleaned = dataset_cleaned.drop(columns=['Data'])


# Data Preperation

In [176]:
X= dataset_cleaned.drop(columns=['Y'])
y=dataset_cleaned['Y']

# Split the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)


# Model Training

In [192]:
model = XGBClassifier(n_estimators=50, learning_rate=0.21, random_state=55)
model.fit(X_train, y_train)
y_pred = model.predict(X_test)

print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.91      0.96      0.93       266
           1       0.79      0.62      0.69        68

    accuracy                           0.89       334
   macro avg       0.85      0.79      0.81       334
weighted avg       0.88      0.89      0.88       334

