In [None]:
import pyisc;
import numpy as np
from scipy.stats import poisson, norm, multivariate_normal
%matplotlib inline
from pylab import plot, figure

In [None]:
class_num = 3
data_len = 10000
anomaly_len = 15
data = None

#Create the normal data 
for i in range(class_num):
    
    po_normal = poisson(10+i)
    po_normal2 = poisson(2+i)
    gs_normal = norm(1+i, 12)
    
    tmp = np.column_stack(
        [
            [1] * (data_len),
            list(po_normal.rvs(data_len)),
            list(po_normal2.rvs(data_len)),
            list(gs_normal.rvs(data_len)),
            [i] * (data_len),
        ]
    )
    if data is None:
        data = tmp
    else:
        data = np.r_[data,tmp]
        
# Create Anomaly Data
for i in range(class_num):
    
    po_anomaly = poisson(25+i)
    po_anomaly2 = poisson(3+i)
    gs_anomaly = norm(2+i,30)

    tmp = np.column_stack(
        [
            [1] * (anomaly_len),
            list(po_anomaly.rvs(anomaly_len)),
            list(po_anomaly2.rvs(anomaly_len)),
            list(gs_anomaly.rvs(anomaly_len)),
            [i] * (anomaly_len),
        ]
    )
    if data is None:
        data = tmp
    else:
        data = np.r_[data,tmp]
        
#print(data)

In [None]:
anomaly_detector = pyisc.AnomalyDetector(
    component_models=[
        pyisc.P_PoissonOnesided(1,0), # One sided for frequency 1
        pyisc.P_Poisson(2,0), # Two Sided for frequency 2
        pyisc.P_Gaussian(3) # Gaussian for Class
    ],
    output_combination_rule=pyisc.cr_max #Maximum of the anomaly scores
)

In [None]:
anomaly_detector.fit(data, y=4) #Fit the data into the model

In [None]:
scores = anomaly_detector.anomaly_score(data, y=4) #Get the anomaly scores

In [None]:
#Get the dataframe with the overall anomaly scores
from pandas import DataFrame
df= DataFrame(data[:15], columns=['Class','#Days', 'Freq1','Freq2','Class'])
#df= DataFrame(data, columns=['Class','#Days', 'Freq1','Freq2','Measure'])
#print df
df['Anomaly Score'] = scores[:15]
print df.to_string()

In [None]:
#get the anomaly scores for the 45 datapoints with the class and measure together
df= DataFrame(data[-45:], columns=['#Days', 'Freq1','Freq2','Measure','Class'])
#Parse their anomaly scores as well
df['Anomaly Score'] = scores[-45:]
print df.to_string()

In [None]:
#Plot the anomaly scores against the data points
plot(scores, '.');

In [None]:
#Get the anomaly scores of each columns and the final anomaly score which is max of those columns
score_details = anomaly_detector.anomaly_score_details(data,y=4)
df= DataFrame(data[-45:], columns=['#Days', 'Freq1','Freq2','Measure','Class'])
df['Anomaly:Freq1'] = [detail[2][0] for detail in score_details[-45:]]   # Anomaly Score of Freq1
df['Anomaly:Freq2'] = [detail[2][1] for detail in score_details[-45:]]   # Anomaly Score of Freq2
df['Anomaly:Measure'] = [detail[2][2] for detail in score_details[-45:]] # Anomaly Score of Measure
df['Anomaly Score'] = [detail[0] for detail in score_details[-45:]]      # Combined Anomaly Score
print df

In [None]:
#Create the data for classification using the same logic as above and pass this data to the anomaly model already created
data2 = None
true_classes = []
length = 1000
for i in range(n_classes):
    po_normal = poisson(10+i)
    po_normal2 = poisson(2+i)
    gs_normal = norm(1+i, 12)
    tmp = np.column_stack(
        [
            [1] * (length),
            list(po_normal.rvs(length)),
            list(po_normal2.rvs(length)),
            list(gs_normal.rvs(length)),
            [None] * (length),
        ]
    )
    
    true_classes += [i] * length
    
    if data2 is None:
        data2 = tmp
    else:
        data2 = np.r_[data2,tmp]

In [None]:
#Create the classifier for the pyISC framework using the anomaly detection model already created above
from pandas import DataFrame
from sklearn.metrics import accuracy_score
result = DataFrame(columns=['Algorithm','Accuracy'])
clf = pyisc.SklearnClassifier.clf(anomaly_detector)
predicted_classes = clf.predict(data2)
acc = accuracy_score(true_classes, predicted_classes)
result.loc[0] = ['pyISC classifier', acc]

In [None]:
#From the Scikit-Learn library import the classification algorithms
#We use gaussian, KNN, Random Forest
#Set the parameters to the initial values as mentioned in the paper
#Get the prediction and comparison with the pyISC classifier
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier

X = data.T[:-1].T
y = data.T[-1]
count = 1
for name, clf in zip(['GaussianNB',
                      'KNeighborsClassifier', 
                      'RandomForestClassifier'],
                     [GaussianNB(), 
                      KNeighborsClassifier(n_neighbors=1000,weights='distance'), 
                      RandomForestClassifier(max_depth=5, n_estimators=10, max_features=1)]):
    clf.fit(X,y);

    predicted_classes_SK= clf.predict(data2.T[:-1].T)
    acc = accuracy_score(true_classes,predicted_classes_SK)
    result.loc[count] = [name, acc]
    count += 1

result