In [5]:
%cd ..

/Users/mateoibarguen/Desktop/DATA-Capstone


In [6]:
from spectra_generator import Spectrum, SpectraLoader, SpectraGenerator
import pickle
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split

In [17]:
def spectra_train_test_splitter(spectra_filename, test_size=0.15, random_seed=42):
    spectra_loader = SpectraLoader(spectra_filename)
    spectra = np.array(spectra_loader.spectra)
    n_peaks = np.array(spectra_loader.get_n())
    spectra_train , spectra_test, _, _ = train_test_split(spectra, n_peaks, stratify=n_peaks)
    return spectra_train , spectra_test

In [18]:
def find_nearest(array, value):
    array = np.asarray(array)
    idx = (np.abs(array - value)).argmin()
    return idx

In [19]:
class SpectraPreprocessor:
    def __init__(self, spectra_filename):
        self.spectra_filename = spectra_filename
        self.spectra_loader = SpectraLoader(self.spectra_filename)
        
    
    def get_locations(self, spectrum):
        if spectrum.n == 1:
            return [spectrum.peak_locations]
        else:
            return spectrum.peak_locations[0]
    
    def standardize_locs(self, locs):
        dimensionality = len(self.spectra_loader.get_dm()[0][0])
        x_range = np.linspace(0.0, 1.0, dimensionality)
        x_filled = np.linspace(0.0, 0.0, dimensionality)
        for loc in locs:
            x_filled[find_nearest(x_range, loc)] = 1
        return x_filled
        
        
    def transform(self):
        locs = pd.Series(self.spectra_loader.spectra).apply(self.get_locations)
        X = np.stack([dm.T for dm in self.spectra_loader.get_dm()], axis=0)
        y = locs.apply(self.standardize_locs)
        del locs
        return X, y
    
    

In [20]:
spectra_preprocessor = SpectraPreprocessor('spectra_generator/data/spectra_01.pkl')

In [21]:
spectra_train, spectra_test = spectra_train_test_splitter('spectra_generator/data/spectra_01.pkl')
spectra_train_json = [spectrum.__dict__ for spectrum in spectra_train]
spectra_test_json = [spectrum.__dict__ for spectrum in spectra_test]

SpectraGenerator.save_spectra(spectra_train_json, "spectra_models/data/set_01/train_01.pkl")
SpectraGenerator.save_spectra(spectra_test_json, "spectra_models/data/set_01/test_01.pkl")

In [22]:
train_path = 'spectra_models/data/set_01/train_01.pkl'
test_path = 'spectra_models/data/set_01/test_01.pkl'
train_preprocessor = SpectraPreprocessor(train_path)
test_preprocessor = SpectraPreprocessor(test_path)

In [23]:
random_sample = np.random.randint(0, 7500, 1000)

In [42]:
# X_train_full, y_train_full = train_preprocessor.transform()
# X_train_sub, y_train_sub = X_train_full[random_sample], y_train_full[random_sample]
X_train = X_train_sub.reshape(1000, 1001, 10, 1)
y_train = np.stack(y_train_sub, axis=0)

In [43]:
from keras.models import Sequential
from keras.layers import Dense, Conv2D, Flatten
#create model
model = Sequential()
#add model layers
model.add(Conv2D(64, kernel_size=3, activation='relu', input_shape=(1001, 10, 1)))
model.add(Conv2D(32, kernel_size=3, activation='relu'))
model.add(Flatten())
model.add(Dense(1001, activation='sigmoid'))

In [50]:
model.compile(optimizer='adam', loss='mse', metrics=['cosine_proximity'])

In [None]:
model.fit(X_train, y_train, epochs=3, batch_size=3)

Epoch 1/3
 150/1000 [===>..........................] - ETA: 10:47 - loss: 0.0121 - cosine_proximity: 0.0101