In [None]:
# BLOK 1

# importovanie potrebnych kniznic

from keras.models import Sequential
from keras.layers import Convolution1D
from keras.layers import MaxPooling1D
from keras.layers import Flatten
from keras.layers import Dense
from keras.layers import Dropout

import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
from keras.utils import np_utils
from keras.models import load_model
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
from sklearn.metrics import precision_recall_fscore_support
import json
import pickle
np.random.seed(1234)

In [None]:
# BLOK 2

# nacitanie observacnych dat a cieloveho atributu
# vyber atributu 'flux', ktory predstavuje svietivost svetelnych kriviek a ich transformacia na numpy polia
# preskalovanie hodnot svetelnej krivky podla MinMax skalovania
# transformacia oznacenia cieloveho atributu: over-contact - 0, semi-detached - 1, detached - 2

processed = pd.read_pickle('observation_data.pkl')['processed_lightcurve']
morphology = pd.read_pickle('observation_data.pkl')['morphology']
curves = []
for j in processed:
    processed_data = eval(j)
    processed_data = np.array(processed_data['flux'])
    single_curve = []
    for i in range(len(processed_data)):
        point = []
        point_data = (processed_data[i] - processed_data.min()) / (processed_data.max() - processed_data.min())
        point.extend([point_data])
        single_curve.append(point)
    curves.append(single_curve)
    single_curve = pd.DataFrame(single_curve)
curves = np.array(curves)

target = []
for i in morphology:
    if i == 'over-contact':
        target.extend([0])
    if i == 'semi-detached':
        target.extend([1])
    if i == 'detached':
        target.extend([2])
#target = np.array(target)
#target = np_utils.to_categorical(target, 3)

In [None]:
# BLOK 3

# odstranenie anomalii v observacnych datach
# za anomalie sa povazuju svetelne krivky, ktore maju vo faze 100 odlisnu hodnotu ako vacsina svetelnych kriviek danej triedy
# intervaly pre hodnoty svietivosti vo faze 100: over-contact <0;0.2>, semi-detached <0.35;0.75>, detached <0;0.75>


oc = []
for i in range(len(target)):
    if target[i] == 0:  # over contact
        if 0 <= curves[i][100] <= 0.2:
            oc.append(curves[i])

sd = []
for i in range(len(target)):
    if target[i] == 1:  # semi detached
        if 0.35 <= curves[i][100] <= 0.75:
            sd.append(curves[i])

dt = []
for i in range(len(target)):
    if target[i] == 2:  # detached
        if 0 <= curves[i][100] <= 0.75:
            dt.append(curves[i])


In [None]:
# BLOK 4

# zistenie maximalnej a minimalnej hodnoty svietivosti v kazdej z 201 faz, v troch triedach osobitne

oc_range = []
for phase in range(201):
    maximum, minimum = 0, 1
    for curve in oc:
        if curve[phase] > maximum:
            maximum = curve[phase]
        if curve[phase] < minimum:
            minimum = curve[phase]
    oc_range.append([minimum, maximum])

sd_range = []
for phase in range(201):
    maximum, minimum = 0, 1
    for curve in sd:
        if curve[phase] > maximum:
            maximum = curve[phase]
        if curve[phase] < minimum:
            minimum = curve[phase]
    sd_range.append([minimum, maximum])

dt_range = []
for phase in range(201):
    maximum, minimum = 0, 1
    for curve in dt:
        if curve[phase] > maximum:
            maximum = curve[phase]
        if curve[phase] < minimum:
            minimum = curve[phase]
    dt_range.append([minimum, maximum])


In [None]:
# BLOK 5

# nacitanie syntetickych dat
# preskalovanie hodnot syntetickej svetelnej krivky podla MinMax skalovania

mor_dt = pd.read_pickle('morphology_dt.pkl')["lightcurve"]
mor_sd = pd.read_pickle('morphology_sd.pkl')["lightcurve"]
mor_oc = pd.read_pickle('morphology_oc.pkl')["lightcurve"]
data_dt = []
for i in mor_dt:
    data_dt.append(json.loads(i))
data_sd = []
for i in mor_sd:
    data_sd.append(json.loads(i))
data_oc = []
for i in mor_oc:
    data_oc.append(json.loads(i))

data_dt_array = []
for i in range(len(data_dt)):
    data_df = pd.DataFrame(data_dt[i][0], columns=['x', 'y'])
    data_df = (data_df['y'] - data_df['y'].min()) / (data_df['y'].max() - data_df['y'].min())
    data_temp = []
    for j in data_df:
        data_temp.append([j])
    data_dt_array.append(data_temp)
        

data_sd_array = []
for i in range(len(data_sd)):
    data_df = pd.DataFrame(data_sd[i][0], columns=['x', 'y'])
    data_df = (data_df['y'] - data_df['y'].min()) / (data_df['y'].max() - data_df['y'].min())
    data_temp = []
    for j in data_df:
        data_temp.append([j])
    data_sd_array.append(data_temp)

data_oc_array = []
for i in range(len(data_oc)):
    data_df = pd.DataFrame(data_oc[i][0], columns=['x', 'y'])
    data_df = (data_df['y'] - data_df['y'].min()) / (data_df['y'].max() - data_df['y'].min())
    data_temp = []
    for j in data_df:
        data_temp.append([j])
    data_oc_array.append(data_temp)

target_dt = []
for i in range(len(data_dt_array)):
    target_dt.append(2)

target_sd = []
for i in range(len(data_sd_array)):
    target_sd.append(1)

target_oc = []
for i in range(len(data_oc_array)):
    target_oc.append(0)


In [None]:
# BLOK 6

# vyber syntetickych kriviek na zaklade rozpätia pre kazdu fazu, ziskaneho z observacnych kriviek
# moznost nastavenia odchylky pre vyber kriviek pomocou premennej 'dev' - predstavuje percentualnu odchylku rozpatia v oboch smeroch
# do premennej 'data_all' sa ukladaju krivky, do premennej 'y' sa uklada cielovy atribut
# prevedenie 'data_all' a 'y' na numpy polia
# rozdelenie dat na trenovaciu a testovaciu mnozinu
# kategorizacia premennej 'y_train' a 'y_test'

data_all = []
y = []

oc_syntetic = []
for i in data_oc_array:
    isValid = True
    for j in range(201):
        dev = (oc_range[j][1] - oc_range[j][0]) * 0
        if not oc_range[j][0]-dev <= i[j] <= oc_range[j][1]+dev:
            isValid = False
    if isValid:
        oc_syntetic.append(i)
        y.append(0)
print("Pocet kriviek triedy over-contact po vybere: " + str(len(oc_syntetic)))


sd_syntetic = []
for i in data_sd_array:
    isValid = True
    for j in range(201):
        dev = (sd_range[j][1] - sd_range[j][0]) * 0.25
        if not sd_range[j][0]-dev <= i[j] <= sd_range[j][1]+dev:
            isValid = False
    if isValid:
        sd_syntetic.append(i)
        y.append(1)
print("Pocet kriviek triedy semi-detached po vybere: " + str(len(sd_syntetic)))


dt_syntetic = []
for i in data_dt_array:
    isValid = True
    for j in range(201):
        dev = (dt_range[j][1] - dt_range[j][0]) * 0
        if not dt_range[j][0]-dev <= i[j] <= dt_range[j][1]+dev:
            isValid = False
    if isValid:
        dt_syntetic.append(i)
        y.append(2)
print("Pocet kriviek triedy detached po vybere: " + str(len(dt_syntetic)))


data_all.extend(oc_syntetic)
data_all.extend(sd_syntetic)
data_all.extend(dt_syntetic)
data_all = np.array(data_all)
print("Pocet vsetkych kriviek po vybere: " + str(len(data_all)))
print("Pocet vsetkych cielovych atributov po vybere: " + str(len(y)))
y = np.array(y)
X_train, X_test, y_train, y_test = train_test_split(data_all, y, test_size=0.2)
y_train = np_utils.to_categorical(y_train, 3)
y_test = np_utils.to_categorical(y_test, 3)


In [None]:
# BLOK 7

# architektura a trenovanie MODEL 1

classifier = Sequential()
classifier.add(Convolution1D(32, 10, activation='relu', input_shape=(201, 1)))
classifier.add(MaxPooling1D(pool_size=2))
classifier.add(Flatten())
classifier.add(Dense(64, activation='relu'))
classifier.add(Dense(3, activation='softmax'))
classifier.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
history = classifier.fit(X_train, y_train, validation_split=0.2, epochs=25, batch_size=32, verbose=1)
accuracy = classifier.evaluate(X_test, y_test, batch_size=32, verbose=1)
print("Celková presnosť modelu 1: " + str(accuracy))


In [None]:
# BLOK 8

# architektura a trenovanie MODEL 2

classifier = Sequential()
classifier.add(Convolution1D(32, 10, activation='relu', input_shape=(201, 1)))
classifier.add(MaxPooling1D(pool_size=2))
classifier.add(Convolution1D(32, 10, activation='relu'))
classifier.add(MaxPooling1D(pool_size=2))
classifier.add(Flatten())
classifier.add(Dense(64, activation='relu'))
classifier.add(Dense(3, activation='softmax'))
classifier.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
history = classifier.fit(X_train, y_train, validation_split = 0.2, epochs=25, batch_size=32, verbose=1)
accuracy = classifier.evaluate(X_test, y_test, batch_size=32, verbose=1)
print("Celková presnosť modelu 2: " + str(accuracy))


In [None]:
# BLOK 9

# architektura a trenovanie MODEL 3

classifier = Sequential()
classifier.add(Convolution1D(64, 20, activation='relu', input_shape=(201, 1)))
classifier.add(MaxPooling1D(pool_size=2))
classifier.add(Convolution1D(32, 10, activation='relu'))
classifier.add(MaxPooling1D(pool_size=2))
classifier.add(Flatten())
classifier.add(Dense(64, activation='relu'))
classifier.add(Dropout(0.25))
classifier.add(Dense(3, activation='softmax'))
classifier.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
history = classifier.fit(X_train, y_train, validation_split=0.2, epochs=8, batch_size=32, verbose=1)
accuracy = classifier.evaluate(X_test, y_test, batch_size=32, verbose=1)
print("Celková presnosť modelu 3: " + str(accuracy))


In [None]:
# BLOK 10

# architektura a trenovanie MODEL 4

classifier = Sequential()
classifier.add(Convolution1D(64, 20, activation='relu', input_shape=(201, 1)))
classifier.add(MaxPooling1D(pool_size=2))
classifier.add(Convolution1D(32, 10, activation='relu'))
classifier.add(MaxPooling1D(pool_size=2))
classifier.add(Convolution1D(32, 10, activation='relu'))
classifier.add(MaxPooling1D(pool_size=2))
classifier.add(Flatten())
classifier.add(Dense(64, activation='relu'))
classifier.add(Dense(3, activation='softmax'))
classifier.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
history = classifier.fit(X_train, y_train, validation_split = 0.2, epochs=25, batch_size=32, verbose=1)
accuracy = classifier.evaluate(X_test, y_test, batch_size=32, verbose=1)
print("Celková presnosť modelu 4: " + str(accuracy))


In [None]:
# BLOK 11

# nacitanie ulozeneho modelu
# MODEL1: 
# MODEL2:
# MODEL3:
# MODEL4:

classifier = load_model('model3_filteredData.h5')


In [None]:
# BLOK 12

# vyhodnotenie modelu na syntetickych datach pomocou kontingencnej tabulky
# vypocet presnosti, navratnosti a f1 skore

y_pred = classifier.predict(X_test)
y_pred2 = []
for i in y_pred:
    maximum = np.argmax(i)
    y_pred2 = np.append(y_pred2, maximum)
y_pred2 = np_utils.to_categorical(y_pred2, 3)

cm = confusion_matrix(y_test.argmax(axis=1), y_pred2.argmax(axis=1))
print("Kontingencna tablulka: " + str(cm))
prfs = precision_recall_fscore_support(y_test.argmax(axis=1), y_pred2.argmax(axis=1), average=None)
print("Presnost, navratnost, fmiera, support: " + str(prfs))


In [None]:
# BLOK 13

# vyhodnotenie modelu na observacnych datach pomocou kontingencnej tabulky
# vypocet presnosti, navratnosti a f1 skore

y_pred = classifier.predict(curves)
y_pred2 = []
for i in y_pred:
    maximum = np.argmax(i)
    y_pred2 = np.append(y_pred2, maximum)
# plt.bar(i, height=1)
# plt.show()
y_pred2 = np_utils.to_categorical(y_pred2, 3)
cm = confusion_matrix(target.argmax(axis=1), y_pred2.argmax(axis=1))
print("Kontingencna tablulka: " + str(cm))
prfs = precision_recall_fscore_support(target.argmax(axis=1), y_pred2.argmax(axis=1), average=None)
print("Presnost, navratnost, fmiera, suppert: " + str(prfs))


In [None]:
# BLOK 14

# vyhodnotenie zlozeneho modelu
# prvym krokom je nacitanie modelu (classifier_all), ktorý rozhoduje, ci krivka patri do triedy over-contact, alebo nie
# nacitanie modelu (classifier_sddt), ktory klasifikuje krivku do triedy detached,alebo semi-detached
# vyhodnotenie na observacnych datach

classifier_all = load_model("model3_OC-SDDT_filter.h5")
classifier_sddt = load_model("model3_SD-DT_filter.h5")

y_pred2 = []
for i in curves:
    prediction = classifier_all.predict(np.array([i, ]))
    for j in prediction:
        maximum = np.argmax(j)
    if maximum == 0:
        y_pred2.append(maximum)
    else:
        prediction = classifier_sddt.predict(np.array([i, ]))
        for j in prediction:
            maximum2 = np.argmax(j)
        y_pred2.append(maximum2 + 1)
        
y_pred2 = np_utils.to_categorical(y_pred2, 3)
cm = confusion_matrix(target.argmax(axis=1), y_pred2.argmax(axis=1))
print("Kontingencna tablulka: " + str(cm))
prfs = precision_recall_fscore_support(target.argmax(axis=1), y_pred2.argmax(axis=1), average=None)
print("Presnost, navratnost, fmiera, suppert: " + str(prfs))
