In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

<div class="text_cell_render border-box-sizing rendered_html">
<div style="color:white;
           display:fill;
           border-radius:5px;
           background-color:crimson;
           font-size:110%;
           font-family:Verdana;
           letter-spacing:0.5px">
<h1 style="text-align: center;
           padding: 10px;
              color:white">
Simple Anomaly Detection using Unsupervised KNN
</h1>
</div>
</div>

<img src="https://ei.marketwatch.com/Multimedia/2017/05/22/Photos/ZH/MW-FM980_netfli_20170522210252_ZH.jpg?uuid=8bee2706-3f53-11e7-8476-9c8e992d421e" alt="">

<div class="text_cell_render border-box-sizing rendered_html">
<div style="color:white;
           display:fill;
           border-radius:5px;
           background-color:crimson;
           font-size:110%;
           font-family:Verdana;
           letter-spacing:0.5px">
<h1 style="text-align: center;
           padding: 10px;
              color:white">
Import Libraries
</h1>
</div>
</div>

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.neighbors import NearestNeighbors

<div class="text_cell_render border-box-sizing rendered_html">
<div style="color:white;
           display:fill;
           border-radius:5px;
           background-color:crimson;
           font-size:110%;
           font-family:Verdana;
           letter-spacing:0.5px">
<h1 style="text-align: center;
           padding: 10px;
              color:white">
Read Data
</h1>
</div>
</div>

In [None]:
df = pd.read_csv('/kaggle/input/updated-netflix-stock-price-all-time/netflix.csv')
df = df.iloc[4000:,:]
df['Date']=pd.to_datetime(df['Date'])
df

In [None]:
fig, ax = plt.subplots(figsize=(20,8))
plt.plot(df["Date"], df["Open"], color='Red')
ax.set_xlabel('Date', fontsize='11')
ax.set_ylabel('Opening Price in USD', fontsize='11')
plt.title('Netflix Stock Prices (April 13th 2018 ~)')
plt.grid()
plt.show()

<div class="text_cell_render border-box-sizing rendered_html">
<div style="color:white;
           display:fill;
           border-radius:5px;
           background-color:crimson;
           font-size:110%;
           font-family:Verdana;
           letter-spacing:0.5px">
<h1 style="text-align: center;
           padding: 10px;
              color:white">
Data Pre-processing
</h1>
</div>
</div>

In [None]:
df = df.drop(columns=['High','Low','Close','Volume','Adj Close'])

In [None]:
for i in range(len(df)):
    df['Date'][i+4000] = i
df

<div class="text_cell_render border-box-sizing rendered_html">
<div style="color:white;
           display:fill;
           border-radius:5px;
           background-color:crimson;
           font-size:110%;
           font-family:Verdana;
           letter-spacing:0.5px">
<h1 style="text-align: center;
           padding: 10px;
              color:white">
Unsupervised Learning : KNN
</h1>
</div>
</div>

<img src="https://miro.medium.com/max/778/1*z8PiWePLH9P82hJ2P0N0Jg.png" alt="">

**KNN is a supervised learning-based algorithm.  
However, using KNN's distance calculation method can also be used as an unsupervised learning method.**  

**In this work, we will use Scikit-Learn's NearestNeighbors, which we can use it for unsupervised learning**

### sklearn.neighbors.NearestNeighbors
```
class sklearn.neighbors.NearestNeighbors(*, n_neighbors=5, radius=1.0, algorithm='auto', leaf_size=30, metric='minkowski', p=2, metric_params=None, n_jobs=None)
```
- n_neighbors : int, default=5 (Number of neighbors to use by default for kneighbors queries.)
- radius : float, default=1.0 (Range of parameter space to use by default for radius_neighbors queries.)
- algorithm : {‘auto’, ‘ball_tree’, ‘kd_tree’, ‘brute’}, default=’auto’ (Algorithm used to compute the nearest neighbors)
- metric : str or callable, default=’minkowski’ (the distance metric to use for the tree.)
- p : int, default=2 (Parameter for the Minkowski metric from sklearn.metrics.pairwise.pairwise_distances. When p = 1, this is equivalent to using manhattan_distance (l1), and euclidean_distance (l2) for p = 2.)

In [None]:
# create model
nbrs = NearestNeighbors(n_neighbors = 10)
# fit model
nbrs.fit(df)

In [None]:
# distances and indexes of k-neaighbors from model outputs
distances, indexes = nbrs.kneighbors(df)
# plot
plt.figure(figsize=(15, 7))
plt.plot(distances.mean(axis =1))

In [None]:
distances = pd.DataFrame(distances)
distances_mean = distances.mean(axis =1)
distances_mean

<div class="text_cell_render border-box-sizing rendered_html">
<div style="color:white;
           display:fill;
           border-radius:5px;
           background-color:crimson;
           font-size:110%;
           font-family:Verdana;
           letter-spacing:0.5px">
<h1 style="text-align: center;
           padding: 10px;
              color:white">
Get Abnormal Points
</h1>
</div>
</div>

**Set thresholds with reference to statistics.**

In [None]:
distances_mean.describe()

Since 75th percentile is 8.35, we will set threshold into 9.0

In [None]:
th = 9.0
outlier_index = np.where(distances_mean > th)
outlier_index

In [None]:
outlier_values = df.iloc[outlier_index]
outlier_values

<div class="text_cell_render border-box-sizing rendered_html">
<div style="color:white;
           display:fill;
           border-radius:5px;
           background-color:crimson;
           font-size:110%;
           font-family:Verdana;
           letter-spacing:0.5px">
<h1 style="text-align: center;
           padding: 10px;
              color:white">
Visualize Abnormal Points
</h1>
</div>
</div>

In [None]:
# plot data
plt.figure(figsize=(20, 7))
plt.plot(df["Date"], df["Open"], color = "b")
# plot outlier values
plt.scatter(outlier_values["Date"], outlier_values["Open"], color = "r")

**It can be seen that our KNN algorithm captures the soaring and plunging points of the stock graph quite well! 🙌**

## If you like this notebook then please give an upvote 👍