In [None]:
import numpy as np

from kafka_infra.MongoDbClient import MongoDbClient

client = MongoDbClient("autoencoder_recognized_samples_signal")

In [None]:
domain = np.arange(0.600, 0.8, 0.0005)

Create MongoDb query that returns details about model results

In [None]:
def define_pipeline(x):
    pipeline = [
        {
            "$lookup": {
                "from": "generated_samples_signal",
                "localField": "sample_id",
                "foreignField": "sample_id",
                "as": "generated_sample_data"
            }
        },
        {
            "$unwind": "$generated_sample_data"
        },
        {
            "$project": {
                "sample_id": 1,
                "predicted_value": 1,
                "duration": 1,
                "normal_data": "$generated_sample_data.normal_data"
            }
        },
        {
            "$facet": {
                "normal_data_total": [
                    {
                        "$match": {
                            "normal_data": 1
                        }
                    },
                    {
                        "$count": "count"
                    }
                ],
                "normal_data_recognized": [
                    {
                        "$match": {
                            "normal_data": 1,
                            "predicted_value": { "$lte": x }
                        }
                    },
                    {
                        "$count": "count"
                    }
                ],
                "anomalous_data_total": [
                    {
                        "$match": {
                            "normal_data": 0
                        }
                    },
                    {
                        "$count": "count"
                    }
                ],
                "anomalous_data_recognized": [
                    {
                        "$match": {
                            "normal_data": 0,
                            "predicted_value": { "$gt": x }
                        }
                    },
                    {
                        "$count": "count"
                    }
                ]
            }
        },
        {
            "$project": {
                "normal_data_count": { "$arrayElemAt": ["$normal_data_total.count", 0] },
                "correct_recognized_normal_data": { "$arrayElemAt": ["$normal_data_recognized.count", 0] },
                "anomalous_data_count": { "$arrayElemAt": ["$anomalous_data_total.count", 0] },
                "correct_recognized_anomalous_data": { "$arrayElemAt": ["$anomalous_data_recognized.count", 0] }
            }
        }
    ]

    return pipeline

In [None]:
sample_details = []
normal_data_count = -1
anomalous_data_count = -1

In [None]:
for x in domain:
    pipeline = define_pipeline(x)
    result = client.aggregate(pipeline)
    for doc in result:
        if normal_data_count == -1:
            normal_data_count = doc['normal_data_count']
        if anomalous_data_count == -1:
            anomalous_data_count = doc['anomalous_data_count']
        try:
            doc['correct_recognized_normal_data']
        except KeyError:
            doc['correct_recognized_normal_data'] = 0
        try:
            doc['correct_recognized_anomalous_data']
        except KeyError:
            doc['correct_recognized_anomalous_data'] = 0
        sample_details.append({
            'factor': x,
            'correct_recognized_normal_data': doc['correct_recognized_normal_data'],
            'correct_recognized_anomalous_data': doc['correct_recognized_anomalous_data']
        })

In [None]:
correct_recognized_normal_samples_ratio = []
correct_recognized_anomalous_samples_ratio = []
correct_recognized_samples_ratio = []

for sample in sample_details:
    if 0.005 < sample['factor'] < 0.011:
        correct_recognized_normal_samples_ratio.append(sample['correct_recognized_normal_data']/normal_data_count)
        correct_recognized_anomalous_samples_ratio.append(sample['correct_recognized_anomalous_data']/anomalous_data_count)
    else:
        correct_recognized_normal_samples_ratio.append(sample['correct_recognized_normal_data']/normal_data_count)
        correct_recognized_anomalous_samples_ratio.append(sample['correct_recognized_anomalous_data']/anomalous_data_count)
    correct_recognized_samples_ratio.append(((sample['correct_recognized_normal_data'] + sample['correct_recognized_anomalous_data'])/(anomalous_data_count + normal_data_count)))

In [None]:
import matplotlib.pyplot as plt

plt.plot(domain, correct_recognized_normal_samples_ratio, label='Próbki normalne')
plt.plot(domain, correct_recognized_anomalous_samples_ratio, label='Anomalie')
plt.xlabel("Wartość podziału")
plt.ylabel("Procent poprawnie rozpoznanych próbek")
plt.ylim(-0.19, 1.05)
plt.legend(loc='lower left')
plt.show()

Create function based on value

In [None]:
min_difference = float('inf')
for i in range(0, len(correct_recognized_normal_samples_ratio)):

    difference = abs(correct_recognized_normal_samples_ratio[i] - correct_recognized_anomalous_samples_ratio[i])

    if difference < min_difference:
        closest_pair = (correct_recognized_normal_samples_ratio[i], correct_recognized_anomalous_samples_ratio[i])
        index = i
        min_difference = difference

print(index)
print(np.mean(closest_pair))

In [None]:
print(sample_details[296])

In [None]:
print(normal_data_count)
print(anomalous_data_count)

In [None]:
print((810+135)/1400)

In [None]:
print(normal_data_count - 741)
print(anomalous_data_count - 169)

swoistość

In [None]:
print(789/1141)

In [None]:
print(sample_details[37])

In [None]:
np.mean([0.675, 0.702, 0.668])

## Data Visualisation

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

data = {
    'Rok': [1,1,1,2,2,2,3,3,3],
    'Produkt': ['ANN', 'Autoenkoder', 'SVM', 'ANN', 'Autoenkoder', 'SVM','ANN', 'Autoenkoder', 'SVM'],
    'acc': [0.,0.675,0., 0., 0.681,0., 0.,0.652, 0.],
    'recall': [0.,0.675,0.,0., 0.702,0.,0., 0.668, 0.]
}
df = pd.DataFrame(data)

produkty = df['Produkt'].unique()
years = df['Rok'].unique()
bar_width = 0.25
index = np.arange(len(produkty))

bar1 = [df[(df['Produkt'] == produkt) & (df['Rok'] == years[0])]['acc'].values[0] for produkt in produkty]
bar2 = [df[(df['Produkt'] == produkt) & (df['Rok'] == years[1])]['acc'].values[0] for produkt in produkty]
bar3 = [df[(df['Produkt'] == produkt) & (df['Rok'] == years[2])]['acc'].values[0] for produkt in produkty]

plt.bar(index, bar1, bar_width, label='Strumień '+str(years[0]))
plt.bar(index + bar_width, bar2, bar_width, label='Strumień '+str(years[1]))
plt.bar(index + 2*bar_width, bar3, bar_width, label='Strumień '+str(years[2]))
plt.ylim(0,1)
plt.xlabel('Model')
plt.ylabel('Współczynnik dokładności')
plt.xticks(index + bar_width / 2, produkty)
plt.legend()

plt.tight_layout()
plt.show()

In [None]:
durations = client.find({'duration': 1, '_id': 0})

In [None]:
values = []
for doc in durations:
    print(doc['duration'])
    values.append(doc['duration'])

In [None]:
import numpy as np
import matplotlib.pyplot as plt

data = [np.random.randint(0, 141) for _ in range(1400)]

bins = list(range(0, 151, 10))

hist, edges = np.histogram(values, bins=bins)

plt.figure(figsize=(15, 6))
plt.bar(edges[:-1], hist, width=9)

plt.xlabel("Czas przetwarzania")
plt.ylabel("Liczba obserwacji")
plt.title('Autoenkoder')
plt.xticks(edges, labels=[f"{int(edge)}-{int(edge + 9)}" for edge in edges[:-1]])

plt.grid(axis='y')
plt.tight_layout()
plt.show()

In [None]:
np.mean(values)