In [None]:
import pymongo
import pprint
import numpy as np
import matplotlib.pyplot as plt
# !pip install dnspython

%load_ext autoreload
%autoreload 2

In [None]:
import mongoengine as me

db_name = "spectre_db" #change db access here
root_pwd = "" # add password

db_uri = f"mongodb+srv://root:{root_pwd}@cluster0.sn2un.mongodb.net/{db_name}?retryWrites=true&w=majority"
client = pymongo.MongoClient(db_uri)

db = client.spectre_db.reading_set #remember to change the db name



In [None]:
import pandas as pd

#inserting db into a dataframe

datapoints = list(db.find({'device_id':'jt_spec_1'}))
df_all = pd.json_normalize(datapoints)
df_all.tail()

In [None]:
cols = ['timestamp', 'readings', 'calibration_readings', 'sample_name', 'api_type', 'ref']
df = df_all[cols].reset_index()
set1 = df.readings[0]

def proc_readingset(row):
    X = np.array([reading['values'] for reading in row['readings']])
    X_cal = np.array([reading['values'] for reading in row['calibration_readings']])
    out = row[['timestamp', 'sample_name', 'api_type', 'ref', 'index']].to_dict()
    out.update({
        'X': X,
        'X_cal': X_cal
    })
    return out

lbl_fmt = lambda s: s.lower().rstrip(' ').replace(' ', '_')
df_proc = df.apply(proc_readingset, axis=1)
set([f"{el['index']}: {lbl_fmt(el['api_type'])} - {el['sample_name']}" for el in df_proc])

In [None]:
df_all[cols].reset_index()

In [None]:
target_apis = ['sb_semillon', 'chenin']
target_apis = ['ia', 'got'] # gin
target_apis = ['dw', 'jam'] # whisky
target_apis = ['a', 'm'] # vinegar 2 (apple cider, malt)

# target_apis = ['o', 's'] 

# target_apis = ['apple cider vinegar', 'malt vinegar']

def extract_api_data(api, df_proc):
    lbl_fmt = lambda s: s.lower().rstrip(' ').replace(' ', '_')
    data = {'X':[], 'X_cal':[], 'sample_names': [], 'refs': [], 'timestamps': []}
    
    for row in df_proc:
        if lbl_fmt(row['api_type']) == api:
            data['X'].append(row['X'])
            data['X_cal'].append(row['X_cal'])
            data['sample_names'].append(row['sample_name'])
            data['refs'].append(row['ref'])
            data['timestamps'].append(row['timestamp'])

    for k in ['X', 'X_cal']: # convert to numpy matrices
        Xm = np.array([np.mean(sample_set, axis=0) for sample_set in data[k]])
        data[k] = Xm
    
    data['X_rel'] = (data['X']/data['X_cal'])
    return data

data = {api:None for api in target_apis}
for api in target_apis:
    data[api] = extract_api_data(api, df_proc)

In [None]:
fig, axes = plt.subplots(1,len(target_apis), figsize=(14,6), sharey=True)
for i in range(len(target_apis)):
    api = target_apis[i]
    axes[i].plot(data[api]['X_rel'].T)
#     axes[i].legend(data[api]['refs'])
    print(data[api]['refs'])

In [None]:
from sklearn.model_selection import train_test_split

def form_dataset(data_map, key='X_rel'):
    X = None
    y = None
    for i, (api, data) in enumerate(data_map.items()):
        if X is None:
            X = data[key]
            y = np.ones(data[key].shape[0])*i
        else:
            X = np.r_[X, data[key]]
            y = np.r_[y, np.ones(data[key].shape[0])*i]
            
    X = np.array(X).reshape(-1, 128)
    y = np.array(y).reshape(-1, 1)

    return X, y


X, y = form_dataset(data)

# Xnorm = ((X-np.mean(X))/np.std(X)).T
# C = Xnorm.dot(Xnorm.T)

# lam, A = np.linalg.eig(C)

# Xprime = A[:, 1:4].T.dot(Xnorm)
# Xprime = np.real(Xprime.T)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, random_state=42, shuffle=True)

In [None]:
sigmoid = lambda x: 1/(1+np.e**-x)

w = np.linalg.pinv(X_train).dot(y_train)
y_pred = sigmoid(X_test.dot(w))

print(f"y: {y_test.flatten().T}, y pred: {y_pred.round(decimals=2).flatten().T}") 

In [None]:
from scipy.ndimage.interpolation import shift

def shift_matrix(X, delta):
    """Expects X to be a [samples x features] matrix"""
    X_shift = np.zeros_like(X)
    for i in range(X.shape[0]):
        X_shift[i, :] = shift(X[i, :], delta, mode='nearest')
    return X_shift

In [None]:
from sklearn.model_selection import KFold
from sklearn.metrics import accuracy_score

kf = KFold(n_splits=5, shuffle=False)
kf.get_n_splits(X)

def sigmoid(x):
    return 1/(1+np.e**(-x))

for train_index, test_index in kf.split(X):
    # print("TRAIN:", train_index, "TEST:", test_index)
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]

    X_test = shift_matrix(X_test, 4)

    w = np.linalg.pinv(X_train).dot(y_train)
    y_pred = X_test.dot(w).round(decimals=2)
    y_pred_bin = np.maximum(np.round(y_pred), 0)
    print(accuracy_score(y_pred_bin, y_test))

In [None]:
delta_range = list(range(-25, 25))
accs = []
for d in delta_range:
    X_test_sh = shift_matrix(X_test, d)

    w = np.linalg.pinv(X_train).dot(y_train)
    y_pred = X_test_sh.dot(w).round(decimals=2)
    y_pred_bin = np.maximum(np.round(y_pred), 0)

    acc=accuracy_score(y_pred_bin, y_test)
    accs.append(acc)

plt.plot(delta_range, accs)

In [None]:
X_tr_shift = shift_matrix(X_train, 5)
plt.plot(np.squeeze(X_tr_shift[1, :]).T)
plt.plot(np.squeeze(X_train[1, :]).T)

In [None]:
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Flatten
from keras.layers import Dropout
from keras.layers.convolutional import Conv1D
from keras.layers.convolutional import MaxPooling1D, AveragePooling1D
from keras.utils import to_categorical
from sklearn.model_selection import KFold

pool_model = Sequential([
        AveragePooling1D(pool_size=2,  name='first_pool')
    ])

def create_model(input_shape, n_outputs=1):
    model = Sequential()
    model.add(MaxPooling1D(pool_size=2,  name='first_pool', input_shape=input_shape))
    model.add(Conv1D(filters=8, kernel_size=8, padding='same', activation='relu'))
    model.add(AveragePooling1D(pool_size=2))
    model.add(Conv1D(filters=16, kernel_size=4, padding='same', activation='relu'))
    model.add(AveragePooling1D(pool_size=2))
    model.add(Conv1D(filters=16, kernel_size=2, padding='same', activation='relu'))
    model.add(Dropout(0.2))
    model.add(Flatten())
    model.add(Dense(20, activation='relu'))
    out_act = 'sigmoid'
    loss = 'binary_crossentropy'
    if n_outputs > 1:
        out_act = 'softmax'
        loss = 'categorical_crossentropy'
    model.add(Dense(n_outputs, activation=out_act))
    model.compile(loss=loss, optimizer='adam', metrics=['accuracy'])
    return model

In [None]:
X_train, X_test = X[train_index], X[test_index]
y_train, y_test = y[train_index], y[test_index]

X_train_aug = X_train
y_train_aug = y_train

for delta in [-4, -2, 2, 4]: # data augmentation
    X_shift = shift_matrix(X_train, delta)
    X_train_aug = np.r_[X_train_aug, X_shift]
    y_train_aug = np.r_[y_train_aug, y_train]

X_train = X_train_aug
X_test = shift_matrix(X_test, 0)

X_train.shape
y_train.shape

In [None]:
np.random.seed(0)

In [None]:
kf = KFold(n_splits=4, shuffle=True, random_state=1) #random_state=123
kf.get_n_splits(X)

cv_acc = []
histories = []

np.random.seed(0)

for train_index, test_index in kf.split(X):
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]
    
    X_train_aug = X_train
    y_train_aug = y_train

    for delta in [-4, -2, 2, 4]: # data augmentation
        X_shift = shift_matrix(X_train, delta)
        X_train_aug = np.r_[X_train_aug, X_shift]
        y_train_aug = np.r_[y_train_aug, y_train]
    
    rnd_idx = np.random.permutation(X_train_aug.shape[0])
    X_train = X_train_aug[rnd_idx] # shuffle the augmented matrices
    y_train = y_train_aug[rnd_idx]
    X_test_sh = shift_matrix(X_test, 4)

    # model input should be 3D tensor of [samples, time steps, features] (only 1 feature in our case)
    X_train_win, X_test_win =  [Xi.reshape(Xi.shape[0], -1, 1) for Xi in (X_train, X_test_sh)]
    y_train_win, y_test_win = y_train, y_test

    epochs = 100
    batch_size = 10

    model = create_model(X_train_win.shape[1:])
    hist = model.fit(X_train_win, y_train_win, validation_data=(X_test_win, y_test_win), epochs=epochs, batch_size=batch_size, verbose=0)
    histories.append(hist.history)
    _, acc = model.evaluate(X_test_win, y_test_win, batch_size=batch_size, verbose=1)
    cv_acc.append(acc)

print(f"\n\nAccuracy across folds: {cv_acc}. Average: {np.mean(cv_acc)}")
plt.plot(hist.history['accuracy'])

In [None]:
for hist in histories:
    plt.plot(hist['val_accuracy'])

In [None]:
model = create_model(X_train_win.shape[1:])
model.summary()

In [None]:
delta_range = list(range(-20, 20))
accs = []
for d in delta_range:
    X_test_sh = shift_matrix(X_test, d)
    X_test_sh = X_test_sh.reshape(X_test_sh.shape[0], -1, 1)
    _, acc=model.evaluate(X_test_sh, y_test_win, verbose=0)
    accs.append(acc)

plt.plot(delta_range, accs)