In [2]:
%cd ..

/Users/mateoibarguen/Desktop/DATA-Capstone


In [3]:
from spectra_generator import Spectrum, SpectraLoader, SpectraGenerator
import pickle
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder
from spectra_models import BaseModel

In [5]:
def spectra_train_test_splitter(spectra_loader, test_size=0.15, random_seed=42):
    spectra = np.array(spectra_loader.spectra)
    n_peaks = np.array(spectra_loader.get_n())
    spectra_train , spectra_test, _, _ = train_test_split(spectra, n_peaks, stratify=n_peaks)
    return spectra_train , spectra_test

In [6]:
def find_nearest(array, value):
    array = np.asarray(array)
    idx = (np.abs(array - value)).argmin()
    return idx

In [7]:
class SpectraPreprocessor:
    def __init__(self, train_filename, test_filename):
        self.train_filename = train_filename
        self.test_filename = test_filename
        self.train_spectra_loader = SpectraLoader(self.train_filename)
        self.test_spectra_loader = SpectraLoader(self.test_filename)
        self.one_hot_encoder = OneHotEncoder(sparse=False, categories='auto')
    
    def get_data(self, loader):
        dm = np.array(loader.get_dm())
        X = dm.reshape(dm.shape[0], dm.shape[2], dm.shape[1], 1)
        del dm
        y = np.array(loader.get_n())
        y = y.reshape(y.shape[0], 1)
        return X, y
    
    def transform(self):
        X_train, y_train = self.get_data(self.train_spectra_loader)
        X_test, y_test = self.get_data(self.test_spectra_loader)
        y_train_enc = self.one_hot_encoder.fit_transform(y_train)
        y_test_enc = self.one_hot_encoder.transform(y_test)
        del y_train, y_test
        return X_train, y_train_enc, X_test, y_test_enc
        
        

In [8]:
train_path = 'spectra_models/data/set_01/train_01.pkl'
test_path = 'spectra_models/data/set_01/test_01.pkl'
spectra_preprocessor = SpectraPreprocessor(train_filename=train_path, test_filename=test_path)

In [9]:
X_train, y_train, X_test, y_test = spectra_preprocessor.transform()

### Train/Test Split

In [27]:
spectra_train, spectra_test = spectra_train_test_splitter('spectra_generator/data/spectra_01.pkl')
spectra_train_json = [spectrum.__dict__ for spectrum in spectra_train]
spectra_test_json = [spectrum.__dict__ for spectrum in spectra_test]

SpectraGenerator.save_spectra(spectra_train_json, "spectra_models/data/set_01/train_01.pkl")
SpectraGenerator.save_spectra(spectra_test_json, "spectra_models/data/set_01/test_01.pkl")

In [42]:
train_path = 'spectra_models/data/set_01/train_01.pkl'
test_path = 'spectra_models/data/set_01/test_01.pkl'
train_preprocessor = SpectraPreprocessor(train_path)
test_preprocessor = SpectraPreprocessor(test_path)

## Model Experimentation

In [10]:
from keras.models import Sequential
from keras.layers import Dense, Conv2D, Flatten


model = Sequential()
model.add(Conv2D(32, kernel_size=3, activation='relu', input_shape=(1001, 10, 1)))
model.add(Conv2D(16, kernel_size=3, activation='relu'))
model.add(Flatten())
model.add(Dense(5, activation='sigmoid'))

Using TensorFlow backend.


In [14]:
compile_dict = {'optimizer': 'adam', 'loss': 'categorical_crossentropy', 'metrics':['accuracy']}

In [15]:
baseline_model = BaseModel(model)

In [16]:
baseline_model.fit(X_train, y_train, epochs=3, batch_size=16, compile_dict=compile_dict)

Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where

Train on 6000 samples, validate on 1500 samples
Epoch 1/3
Epoch 2/3
Epoch 3/3


In [124]:
compile_dict = {'optimizer': 'adam', 'loss': 'categorical_crossentropy', 'lr': 0.03}
model.compile(**compile_dict)

In [125]:
model.compile(optimizer='sgd', loss = 'categorical_crossentropy')

In [126]:
X_train, y_train = train_preprocessor.transform()
X_test, y_test = test_preprocessor.transform()

In [127]:
from sklearn.preprocessing import OneHotEncoder
one_hot = OneHotEncoder(sparse=False)
y_train_enc = one_hot.fit_transform(y_train.reshape(y_train.shape[0], 1))
y_test_enc = one_hot.transform(y_test.reshape(y_test.shape[0], 1))

In case you used a LabelEncoder before this OneHotEncoder to convert the categories to integers, then you can now use the OneHotEncoder directly.


In [None]:
class PeakModel:
    def __init__(self, model, batch_size, epochs):
        self.model = model
        self.batch_size = batch_size
        self.epochs = epochs
        
    def train(self):
        self.model.fit(X_train, )
        
    def save(self):
        configs = self.model.get_cofig()
        

In [103]:
from keras.models import model_from_json
m = model_from_json(model.to_json())

In [128]:
model.fit(X_train, y_train_enc, validation_split=0.20, epochs=1, batch_size=16)

Train on 6000 samples, validate on 1500 samples
Epoch 1/1


<keras.callbacks.callbacks.History at 0x1d091f3e48>

In [129]:
model.evaluate(X_test, y_test_enc)



1.60618284740448