### Dependencies

In [1]:
import sqlite3 # Database library.
import os # Folder management library.
import pickle # Serializing module.
import numpy as np # Scientific computing library.
import matplotlib.pyplot as plt # Plotting library.
from mpl_toolkits.mplot3d import Axes3D # 3D plotting tool.
from sklearn.neighbors import LocalOutlierFactor # Outlier Dection library.

### Retreive Examples from Database

In [2]:
# Initializing database and cursor.
star_data_db = sqlite3.connect('star_data.db')
star_data_cursor = star_data_db.cursor()

# Retrieving star_data from database.
classes = ['cep_1o', 'cep_f', 'dsct','eb_ec', 'eb_ed', 'eb_esd', 'lpv_mira_agb_c', 'lpv_mira_agb_o', 'lpv_osarg_agb',
           'lpv_osarg_rgb', 'lpv_srv_agb_c', 'lpv_srv_agb_o', 'rrab', 'rrc', 'rrd', 'rre', 't2cep']

X, Y = [], []
for label, classv in enumerate(classes):
    temp_X, temp_Y = [], []
    star_data_cursor.execute('SELECT star_features FROM '+classv)
    for row in star_data_cursor.fetchall()[:600]:
        # Deserializing features.
        features = pickle.loads(row[0])
        temp_X.append(features)
        temp_Y.append([label])
    X.append(np.array(temp_X))
    Y.append(np.array(temp_Y))
    
names = []
for classv in classes:
    temp_names = []
    star_data_cursor.execute('SELECT star_name FROM '+classv)
    for row in star_data_cursor.fetchall()[:600]:
        temp_names.append(row[0])
    names.append(temp_names)
        

# Close cursor and database    
star_data_cursor.close
star_data_db.close()

print('done')

done


### Unsupervised Outlier Detection using Local Outlier Factor (LOF)
The LOF score of an observation is equal to the ratio of the average local density of its k-nearest neighbors, and its own local density: a normal instance is expected to have a local density similar to that of its neighbors, while abnormal data are expected to have much smaller local density. LOF can detect both local and global outliers.
<img src="img/lof.png", width=400, height=auto>


### 10 Best Outlier Candidates

In [5]:
# Constructing data.
names_t2cep = names[16]
X_t2cep = X[16]

best_outliers = []

# Recursively applying lof algorithm to obtain 10 best outliers.
for n in range(0,10):
    
    # Fitting the Local Outlier Detection model to the sample data set, and looking for 1 outlier. 
    lof = LocalOutlierFactor(n_neighbors=20, contamination=1/len(X_t2cep))
    outlier_pred = lof.fit_predict(X_t2cep)

    # Find the indices if all outliers.
    outlier_ind = np.where(outlier_pred == -1)[0]

    # Find the names of the outliers, and their coordinates.
    outlier_x, outlier_y = [], []
    for ind in outlier_ind:
        best_outliers.append(names_t2cep[ind])
    
    # Remove outlier from data set.
    X_t2cep = np.delete(X_t2cep, outlier_ind[0], 0)
    names_t2cep = np.delete(names_t2cep, outlier_ind[0], 0)
    
print('The 10 best outliers are: ', best_outliers)



The 10 best outliers are:  ['OGLE-LMC-T2CEP-113.dat', 'OGLE-BLG-T2CEP-177.dat', 'OGLE-LMC-T2CEP-115.dat', 'OGLE-BLG-T2CEP-351.dat', 'OGLE-BLG-T2CEP-345.dat', 'OGLE-BLG-T2CEP-354.dat', 'OGLE-LMC-T2CEP-200.dat', 'OGLE-BLG-T2CEP-350.dat', 'OGLE-BLG-T2CEP-352.dat', 'OGLE-LMC-T2CEP-061.dat']
