# Anomaly Detection

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd

import warnings
warnings.filterwarnings("ignore")

# fix_yahoo_finance is used to fetch data 
import fix_yahoo_finance as yf
yf.pdr_override()

In [2]:
# input
symbol = 'AMD'
start = '2007-01-01'
end = '2018-12-28'

# Read data 
dataset = yf.download(symbol,start,end)

# View Columns
dataset.head()

[*********************100%***********************]  1 of 1 downloaded


Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2007-01-03,20.08,20.4,19.35,19.52,19.52,28350300
2007-01-04,19.66,19.860001,19.32,19.790001,19.790001,23652500
2007-01-05,19.540001,19.91,19.540001,19.709999,19.709999,15902400
2007-01-08,19.709999,19.860001,19.370001,19.469999,19.469999,15814800
2007-01-09,19.450001,19.709999,19.370001,19.65,19.65,14494200


In [3]:
# Create more data
dataset['Increase_Decrease'] = np.where(dataset['Volume'].shift(-1) > dataset['Volume'],1,0)
dataset['Buy_Sell_on_Open'] = np.where(dataset['Open'].shift(-1) > dataset['Open'],1,0)
dataset['Buy_Sell'] = np.where(dataset['Adj Close'].shift(-1) > dataset['Adj Close'],1,0)
dataset['Return'] = dataset['Adj Close'].pct_change()
dataset = dataset.dropna()
dataset.head()

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume,Increase_Decrease,Buy_Sell_on_Open,Buy_Sell,Return
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2007-01-04,19.66,19.860001,19.32,19.790001,19.790001,23652500,0,0,0,0.013832
2007-01-05,19.540001,19.91,19.540001,19.709999,19.709999,15902400,0,1,0,-0.004043
2007-01-08,19.709999,19.860001,19.370001,19.469999,19.469999,15814800,0,0,1,-0.012177
2007-01-09,19.450001,19.709999,19.370001,19.65,19.65,14494200,1,1,1,0.009245
2007-01-10,19.639999,20.02,19.5,20.01,20.01,19783200,1,1,1,0.018321


In [4]:
dataset.shape

(3018, 10)

In [5]:
X = dataset[['Open', 'High', 'Low', 'Volume']].values
y = dataset['Buy_Sell'].values

In [6]:
from sklearn import metrics
from sklearn.model_selection import train_test_split

In [7]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0)

In [8]:
from sklearn.ensemble import IsolationForest

model = IsolationForest()

In [9]:
model.fit(X_test)

IsolationForest(behaviour='old', bootstrap=False, contamination='legacy',
        max_features=1.0, max_samples='auto', n_estimators=100,
        n_jobs=None, random_state=None, verbose=0)

In [12]:
y_pred = model.predict(X_test)



In [16]:
print('Anomaly Detection Score:') 
sklearn_score_anomalies = model.decision_function(X_test)
score = [-1*s + 0.5 for s in sklearn_score_anomalies]
print(score)

Anomaly Detection Score:
[0.40935756963442016, 0.47633790271693449, 0.54017926018857121, 0.40286363968056177, 0.41055012534261981, 0.40697620584274263, 0.43998745594148353, 0.40414322049217466, 0.60454525308124951, 0.40520002530887039, 0.42985011238567411, 0.40149384952479478, 0.46866133486550315, 0.41730542643852797, 0.44244457125503217, 0.43928238500507749, 0.42985990133572388, 0.45600750455687289, 0.43747670691472018, 0.39548025781347573, 0.44505215271777238, 0.44388462839343779, 0.40011541446463422, 0.47787917886727521, 0.50645834661067324, 0.45120429521981081, 0.4066452924526337, 0.58080502470275097, 0.40260378682464781, 0.48914833638008282, 0.42528830717543875, 0.43790513871272863, 0.41168201469525506, 0.45679745920987203, 0.40520297829482244, 0.43817234615483186, 0.45714766697880421, 0.51106329395066508, 0.43619350203472396, 0.43587790406452809, 0.52938376181757563, 0.39571686807824025, 0.41185493292256442, 0.47645591448176822, 0.63426024197245368, 0.41723601930750381, 0.4347649