# Prototyping an ML Model on Tensorflow Datasets
## Prerequisites

In [37]:
import gc
import glob
import json
import os
import random
import shutil
import time
from typing import Iterable, Callable, Dict, Any, Tuple, Optional, List, Union

import numpy as np
import pandas as pd
import tensorflow as tf
import tensorflow_datasets as tfds

from mmproteo.utils import log, utils, visualization
from mmproteo.utils.formats.mz import FilteringProcessor
from mmproteo.utils.formats.tf_dataset import Parquet2DatasetFileProcessor
from mmproteo.utils.processing import ItemProcessor

In [2]:
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', 1000)

In [3]:
logger = log.DummyLogger(verbose=False)

INFO: Printing to Stdout


## Configuration

In [4]:
pwd

'/tf/workspace/notebooks'

In [5]:
PROJECT = "PXD010000"
DUMP_PATH = os.path.join("..", "dumps", PROJECT)
TRAINING_COLUMNS_DUMP_PATH = os.path.join(DUMP_PATH, "training_columns")
FILES_PATH = os.path.join(TRAINING_COLUMNS_DUMP_PATH, "*_mzmlid.parquet")
STATISTICS_FILE_PATH = os.path.join(TRAINING_COLUMNS_DUMP_PATH, "statistics.parquet")
DATASET_DUMP_PATH = os.path.join(TRAINING_COLUMNS_DUMP_PATH, "tf_datasets")

In [6]:
MZMLID_FILE_PATHS = glob.glob(FILES_PATH)
len(MZMLID_FILE_PATHS)

235

In [7]:
MZMLID_FILE_PATHS[0]

'../dumps/PXD010000/training_columns/Biodiversity_B_fragilis_01_28Jul15_Arwen_14-12-03_mzmlid.parquet'

In [8]:
df = pd.read_parquet(MZMLID_FILE_PATHS[1])
df.head(2)

Unnamed: 0,peptide_sequence,mz_array,intensity_array,species,istrain
21,"[C, K, P, T, S, P, G, R]","[102.0558, 115.05197, 116.971794, 119.907036, 129.1024, 136.06175, 152.05682, 157.84837, 159.22517, 171.11295, 175.119, 175.95169, 199.10796, 202.6932, 215.08527, 228.88432, 232.11212, 244.87819, 286.14047, 307.6665, 312.16718, 329.19223, 360.2081, 378.2132, 385.92047, 400.78918, 401.78973, 416.22388, 422.8325, 440.8446, 441.84528, 517.2766, 614.3271, 615.3258]","[723.529, 569.4288, 659.1485, 599.0097, 19982.768, 4909.943, 771.28937, 596.6283, 593.3602, 1262.0436, 868.29816, 581.3835, 721.64886, 752.1542, 2492.1565, 3854.2283, 1364.17, 615.11633, 746.43365, 1512.8475, 1474.3188, 1069.4283, 762.6549, 744.29315, 925.18164, 7245.0005, 2374.2295, 3248.2861, 4047.135, 21597.44, 5534.1826, 4359.906, 13269.387, 2903.926]",Citrobacter_freundii,Train
70,"[K, H, I, T, A, G, A, K]","[101.1075, 110.07151, 111.04457, 111.619194, 112.050735, 116.972084, 118.967834, 122.29705, 129.05539, 129.10248, 129.11131, 129.92657, 130.08653, 136.06192, 136.07182, 136.07652, 137.06726, 139.98817, 147.11304, 152.05687, 171.00543, 173.09312, 189.01633, 197.12833, 200.14093, 212.10458, 218.14975, 223.15533, 230.11382, 231.12407, 232.88867, 239.08455, 249.13492, 251.15112, 275.1718, 283.13745, 299.95496, 301.1428, 302.81696, 309.96753, 313.8611, 315.81067, 316.8158, 318.8151, 334.8159, 335.81232, 336.81036, 337.8101, 340.79953, 343.80972, 344.8009, 346.20868, 349.20425, 354.82224, 355.82004, 360.81204, 361.81488, 362.81155, 363.81027, 370.8382, 372.79752, 389.83908, 394.83862, 407.8483, 408.7495, 408.8483, 412.79782, 412.8495, 413.26614, 413.85025, 414.26913, 419.80457, 430.79797, 431.7977, 447.25662, 448.2613, 465.94623, 560.34186, 561.3432, 697.4013, 787.23486]","[1244.104, 18248.63, 747.18225, 672.4936, 3284.768, 5824.9575, 1207.1666, 563.56824, 1090.989, 18666.379, 1132.0656, 547.7189, 8010.2773, 9944.686, 717.7685, 909.038, 927.90424, 1259.5803, 9798.942, 12360.792, 777.4666, 711.51215, 1365.1267, 669.6005, 718.6803, 724.85455, 1516.5447, 6849.315, 1172.2983, 11597.979, 882.7782, 954.53986, 1087.7533, 5462.294, 3395.8171, 717.6081, 663.439, 7134.955, 748.11066, 1207.3207, 3609.01, 838.3727, 1179.3096, 1473.9382, 2907.0327, 3263.6355, 4049.7156, 4270.7646, 793.22906, 1597.9222, 4802.7974, 4149.7407, 6089.5537, 7634.4062, 5610.0933, 1050.6061, 957.7547, 6195.684, 1396.489, 866.404, 846.26697, 1433.4541, 1076.0883, 5400.0293, 1063.56, 1220.6185, 1581.0791, 21550.523, 21930.604, 7990.5386, 3053.2961, 754.4101, 2840.6213, 1415.5275, 9367.521, 1103.6198, 957.9087, 6343.26, 795.6793, 2594.3503, 750.4295]",Citrobacter_freundii,Train


In [9]:
SEQ = 'peptide_sequence'
MZ = 'mz_array'
INT = 'intensity_array'

In [10]:
TRAINING_DATA_COLUMNS = [MZ, INT]
TARGET_DATA_COLUMNS = [SEQ]
SPLIT_VALUE_COLUMNS = ['species', 'istrain']

## Calculating Statistics over all MZMLID Files

In [11]:
file_path_count = len(MZMLID_FILE_PATHS)

def get_mzmlid_file_stats(item: Tuple[int, str]) -> Dict[str, Any]:
    idx, path = item
    info_text = f"Processing item {idx + 1}/{file_path_count} '{path}'"
    if idx % 10 == 0:
        logger.info(info_text)
    else:
        logger.debug(info_text)
    df = pd.read_parquet(path)
    max_sequence_length = df[SEQ].str.len().max()
    max_array_length = df[INT].str.len().max()
    alphabet = set.union(*df[SEQ].apply(set))
    item_count = len(df)
    del df
    gc.collect()
    
    return {
        "file_path": path,
        "max_sequence_length": max_sequence_length,
        "max_array_length": max_array_length,
        "alphabet": alphabet,
        "item_count": item_count
    }

if os.path.exists(STATISTICS_FILE_PATH):
    file_stats = pd.read_parquet(STATISTICS_FILE_PATH)
    file_stats.alphabet = file_stats.alphabet.apply(set)
    print(f"loaded previous statistics file '{STATISTICS_FILE_PATH}'")
else:
    file_stats = pd.DataFrame(
        ItemProcessor(
            items=enumerate(MZMLID_FILE_PATHS),
            item_processor=get_mzmlid_file_stats,
            action_name="analyse",
            subject_name="mzmlid file",
            thread_count=0,
            logger=logger
        ).process()
    )
    
    file_stats_writable = file_stats.copy()
    file_stats_writable.alphabet = file_stats_writable.alphabet.apply(list) # cannot store sets
    file_stats_writable.to_parquet(STATISTICS_FILE_PATH)

loaded previous statistics file '../dumps/PXD010000/training_columns/statistics.parquet'


In [12]:
file_stats.head(2)

Unnamed: 0,file_path,max_sequence_length,max_array_length,alphabet,item_count
0,../dumps/PXD010000/training_columns/Biodiversity_B_fragilis_01_28Jul15_Arwen_14-12-03_mzmlid.parquet,50,1845,"{R, C, W, Q, M, F, K, P, A, L, Y, M(Oxidation), E, N, H, G, S, I, T, D, V}",26943
1,../dumps/PXD010000/training_columns/Biodiversity_Cibrobacter_freundii_LB_aerobic_01_01Feb16_Arwen_15-07-13_mzmlid.parquet,50,1697,"{R, C, W, Q, M, F, K, P, A, L, Y, M(Oxidation), E, N, H, G, S, I, T, D, V}",27516


In [13]:
PADDING_LENGTHS = {
    MZ: file_stats.max_array_length.max(),
    INT: file_stats.max_array_length.max(),
    SEQ: file_stats.max_sequence_length.max()
}

In [14]:
print("padding lengths =", PADDING_LENGTHS)

TOTAL_ITEM_COUNT = file_stats.item_count.sum()
print(f"TOTAL_ITEM_COUNT = {TOTAL_ITEM_COUNT}")

ALPHABET = set.union(*file_stats.alphabet)
print(f"ALPHABET = {', '.join(sorted(ALPHABET))}")

padding lengths = {'mz_array': 2354, 'intensity_array': 2354, 'peptide_sequence': 50}
TOTAL_ITEM_COUNT = 5513185
ALPHABET = A, C, D, E, F, G, H, I, K, L, M, M(Oxidation), N, P, Q, R, S, T, V, W, Y


## Data Normalization, Padding, and Conversion to Tensorflow Datasets

In [15]:
def l2_normalize(values: np.ndarray) -> np.ndarray:
    return tf.keras.utils.normalize(x=values, order=2)

def base_peak_normalize(values: np.ndarray) -> np.ndarray:
    return values / values.max(initial=0)

# by Tom, probably
# don't know, what it's based on
def ion_current_normalize(intensities: np.ndarray) -> np.ndarray:
    total_sum = np.sum(intensities**2)
    normalized = intensities/total_sum
    return normalized

NORMALIZATION = {
    INT: base_peak_normalize
}

In [16]:
PADDING_CHARACTERS = {
    SEQ: '_',
    MZ: 0.0,
    INT: 0.0,
}

ALPHABET.add(PADDING_CHARACTERS[SEQ])

In [17]:
char_to_idx = {char: idx for idx, char in enumerate(sorted(ALPHABET))}
idx_to_char = {idx: char for char, idx in char_to_idx.items()}
INDEX_ALPHABET = idx_to_char.keys()
char_to_idx

{'A': 0,
 'C': 1,
 'D': 2,
 'E': 3,
 'F': 4,
 'G': 5,
 'H': 6,
 'I': 7,
 'K': 8,
 'L': 9,
 'M': 10,
 'M(Oxidation)': 11,
 'N': 12,
 'P': 13,
 'Q': 14,
 'R': 15,
 'S': 16,
 'T': 17,
 'V': 18,
 'W': 19,
 'Y': 20,
 '_': 21}

In [18]:
Parquet2DatasetFileProcessor(
    training_data_columns=TRAINING_DATA_COLUMNS,
    target_data_columns=TARGET_DATA_COLUMNS,
    padding_lengths=PADDING_LENGTHS,
    padding_characters=PADDING_CHARACTERS,
    column_normalizations=NORMALIZATION,
    dataset_dump_path_prefix=DATASET_DUMP_PATH,
    char_to_idx_mapping_functions={
        SEQ: char_to_idx.get
    },
    item_count=len(MZMLID_FILE_PATHS),
    skip_existing=True,
    split_on_column_values_of=SPLIT_VALUE_COLUMNS,
    logger=logger
).process(parquet_file_paths=MZMLID_FILE_PATHS,
          thread_count=3)[:3]

INFO: Processing item 1/235: '../dumps/PXD010000/training_columns/Biodiversity_B_fragilis_01_28Jul15_Arwen_14-12-03_mzmlid.parquet'
INFO: Processing item 11/235: '../dumps/PXD010000/training_columns/Biodiversity_P_polymyxa_TBS_aerobic_3_17July16_Samwise_16-04-10_mzmlid.parquet'
INFO: Processing item 21/235: '../dumps/PXD010000/training_columns/M_alcali_copp_CH4_B2_T1_09_QE_23Mar18_Oak_18-01-07_mzmlid.parquet'
INFO: Processing item 31/235: '../dumps/PXD010000/training_columns/Cj_media_MH_R4_23Feb15_Arwen_14-12-03_mzmlid.parquet'
INFO: Processing item 41/235: '../dumps/PXD010000/training_columns/Biodiversity_C_Baltica_T240_R2_C_27Jan16_Arwen_15-07-13_mzmlid.parquet'
INFO: Processing item 51/235: '../dumps/PXD010000/training_columns/Biodiversity_M_xanthus_DZ2_plates_1_03May16_Samwise_16-03-32_mzmlid.parquet'
INFO: Processing item 61/235: '../dumps/PXD010000/training_columns/Biodiversity_B_thet_CMgluc_anaerobic_02_01Feb16_Arwen_15-07-13_mzmlid.parquet'
INFO: Processing item 71/235: '../dum

[]

## Loading Tensorflow Datasets

In [19]:
TRAINING_DATA_TYPES = {path.split(os.path.sep)[-1] for path in glob.glob(
    os.path.join(
        DATASET_DUMP_PATH, 
        '*',  # filename
        '*',  # species
        '*'   # istrain
    ))}
TRAINING_DATA_TYPES

{'Train'}

In [20]:
SPECIES = {path.split(os.path.sep)[-2] for path in glob.glob(
    os.path.join(
        DATASET_DUMP_PATH, 
        '*',  # filename
        '*',  # species
        '*'   # istrain
    ))}
SPECIES

{'Acidiphilium_cryptum_JF-5',
 'Agrobacterium_tumefaciens_IAM_12048',
 'Alcaligenes_faecalis',
 'Algoriphagus_marincola_HL-49',
 'Anaerococcus_hydrogenalis_DSM_7454',
 'Bacillus_cereus_ATCC14579',
 'Bacillus_subtilis_168',
 'Bacillus_subtilis_NCIB3610',
 'Bacteroides_fragilis_638R',
 'Bacteroides_thetaiotaomicron_VPI-5482',
 'Bifidobacterium_bifidum_ATCC29521',
 'Bifidobacterium_longum_infantis_ATCC15697',
 'Campylobacter_jejuni',
 'Cellulomonas_gilvus_ATCC13127',
 'Cellulophaga_baltica_18',
 'Chryseobacterium_indologenes',
 'Citrobacter_freundii',
 'Clostridium_ljungdahlii_DMS_13528',
 'Coprococcus_comes_ATCC27758',
 'Cupriavidus_necator_N-1',
 'Cyanobacterium_stanieri',
 'Delftia_acidovorans_SPH1',
 'Dorea_longicatena_DSM13814',
 'Erythrobacter_HL-111',
 'Faecalibacterium_prausnitzii',
 'Fibrobacter_succinogenes_S85',
 'Francisella_novicida_U112',
 'Halomonas_HL-48',
 'Halomonas_HL-93',
 'Lactobacillales_casei',
 'Legionella_pneumophila',
 'Listeria_monocytogenes_10403S',
 'Methylomi

In [21]:
len(SPECIES)

51

In [22]:
SHUFFLED_SPECIES = list(SPECIES)
random.shuffle(SHUFFLED_SPECIES)

SPECIES_TYPES = {
    "Train": SHUFFLED_SPECIES[:int(0.8 * len(SHUFFLED_SPECIES))],
    "Test": SHUFFLED_SPECIES[int(0.8 * len(SHUFFLED_SPECIES)) : int(0.94 * len(SHUFFLED_SPECIES))],
    "Eval": SHUFFLED_SPECIES[int(0.94 * len(SHUFFLED_SPECIES)):],
}

for training_type, species in SPECIES_TYPES.items():
    print(f"#{training_type} = {len(species)}")

#Train = 40
#Test = 7
#Eval = 4


In [23]:
def flatten(lists: List[List[Any]]) -> List[Any]:
    res = []
    for item in lists:
        res += item
    return res

dataset_file_paths = {
    training_type: flatten(
        [
            glob.glob(os.path.join(DATASET_DUMP_PATH, '*', specie, '*')) for specie in species
        ]
    ) for training_type, species in SPECIES_TYPES.items()
}

In [24]:
for training_data_type, paths in dataset_file_paths.items():
    print(f"#{training_data_type} = {len(paths)}")
    print(f"e.g.: {paths[0]}")
    print()

#Train = 182
e.g.: ../dumps/PXD010000/training_columns/tf_datasets/Biodiversity_B_subtilis_NCIB3610_24h_plates_1_13Jun16_Pippin_16-03-39_mzmlid.parquet/Bacillus_subtilis_NCIB3610/Train

#Test = 39
e.g.: ../dumps/PXD010000/training_columns/tf_datasets/Biodiversity_S_elongatus_BG11NaCl_aerobic_1_05Oct16_Pippin_16-05-06_mzmlid.parquet/Synechococcus_elongatus_PCC7942/Train

#Eval = 14
e.g.: ../dumps/PXD010000/training_columns/tf_datasets/Biodiversity_Cibrobacter_freundii_LB_aerobic_02_01Feb16_Arwen_15-07-13_mzmlid.parquet/Citrobacter_freundii/Train



In [25]:
# store the training file paths
DATASET_FILE_PATHS_DUMP_PATH = os.path.join(
    DATASET_DUMP_PATH, 
    f"dataset_file_paths.{time.strftime('%Y-%m-%d--%H-%M-%S')}.json"
)
                                
with open(DATASET_FILE_PATHS_DUMP_PATH, 'w') as file:
    file.write(visualization.pretty_print_json(dataset_file_paths))
    
print(f"stored current dataset file paths to '{DATASET_FILE_PATHS_DUMP_PATH}'")

stored current dataset file paths to '../dumps/PXD010000/training_columns/tf_datasets/dataset_file_paths.2021-06-01--15-47-49.json'


In [26]:
element_spec = ((tf.TensorSpec(shape=(PADDING_LENGTHS[MZ],), dtype=tf.float32), 
  tf.TensorSpec(shape=(PADDING_LENGTHS[INT],), dtype=tf.float32)),
(tf.TensorSpec(shape=(PADDING_LENGTHS[SEQ],), dtype=tf.int8)))
element_spec

((TensorSpec(shape=(2354,), dtype=tf.float32, name=None),
  TensorSpec(shape=(2354,), dtype=tf.float32, name=None)),
 TensorSpec(shape=(50,), dtype=tf.int8, name=None))

In [28]:
typed_datasets = {
    training_data_type: [
        tf.data.experimental.load(
            path=path, 
            element_spec=element_spec, 
            compression='GZIP'
        ) for path in paths
    ] for training_data_type, paths in dataset_file_paths.items()
}

typed_datasets

{'Train': [<_LoadDataset shapes: (((2354,), (2354,)), (50,)), types: ((tf.float32, tf.float32), tf.int8)>,
  <_LoadDataset shapes: (((2354,), (2354,)), (50,)), types: ((tf.float32, tf.float32), tf.int8)>,
  <_LoadDataset shapes: (((2354,), (2354,)), (50,)), types: ((tf.float32, tf.float32), tf.int8)>,
  <_LoadDataset shapes: (((2354,), (2354,)), (50,)), types: ((tf.float32, tf.float32), tf.int8)>,
  <_LoadDataset shapes: (((2354,), (2354,)), (50,)), types: ((tf.float32, tf.float32), tf.int8)>,
  <_LoadDataset shapes: (((2354,), (2354,)), (50,)), types: ((tf.float32, tf.float32), tf.int8)>,
  <_LoadDataset shapes: (((2354,), (2354,)), (50,)), types: ((tf.float32, tf.float32), tf.int8)>,
  <_LoadDataset shapes: (((2354,), (2354,)), (50,)), types: ((tf.float32, tf.float32), tf.int8)>,
  <_LoadDataset shapes: (((2354,), (2354,)), (50,)), types: ((tf.float32, tf.float32), tf.int8)>,
  <_LoadDataset shapes: (((2354,), (2354,)), (50,)), types: ((tf.float32, tf.float32), tf.int8)>,
  <_LoadDat

## Concatenating Tensorflow Datasets

In [29]:
BATCH_SIZE = 256
SHUFFLE_BUFFER_SIZE = 10_000_000
KEEP_CACHE = True

In [30]:
CACHED_DATASET_DUMP_PATH = os.path.join(DATASET_DUMP_PATH, "cache")

try:
    if not KEEP_CACHE:
        shutil.rmtree(CACHED_DATASET_DUMP_PATH)
except FileNotFoundError:
    pass

utils.ensure_dir_exists(CACHED_DATASET_DUMP_PATH)
CACHED_DATASET_DUMP_PATH

'../dumps/PXD010000/training_columns/tf_datasets/cache'

In [31]:
def concatenate_datasets(datasets: List[tf.data.Dataset]) -> tf.data.Dataset:
    dataset = datasets[0]
    for ds in datasets[1:]:
        dataset = dataset.concatenate(ds)
    return dataset

merged_datasets = {
    training_data_type: concatenate_datasets(datasets)
        .cache(os.path.join(CACHED_DATASET_DUMP_PATH, training_data_type))
    for training_data_type, datasets in typed_datasets.items()
}

merged_datasets

{'Train': <CacheDataset shapes: (((2354,), (2354,)), (50,)), types: ((tf.float32, tf.float32), tf.int8)>,
 'Test': <CacheDataset shapes: (((2354,), (2354,)), (50,)), types: ((tf.float32, tf.float32), tf.int8)>,
 'Eval': <CacheDataset shapes: (((2354,), (2354,)), (50,)), types: ((tf.float32, tf.float32), tf.int8)>}

In [32]:
def get_minimal_model():
    input_layers = [tf.keras.layers.Input(shape=(PADDING_LENGTHS[col],)) for col in TRAINING_DATA_COLUMNS]
    
    x = input_layers[0]
    for input_layer in input_layers[1:]:
        x = x + input_layer
    
    x = tf.keras.layers.Flatten()(x)
    x = tf.keras.layers.Dense(PADDING_LENGTHS[SEQ]*len(ALPHABET))(x)
    x = tf.reshape(x,(-1, PADDING_LENGTHS[SEQ], len(ALPHABET)))
    x = tf.keras.activations.softmax(x)
    model = tf.keras.Model(input_layers,x)
    model.compile(optimizer=tf.keras.optimizers.Adam(),
                  loss=tf.keras.losses.SparseCategoricalCrossentropy())
    return model
        
def fill_cache(dataset):
    model = get_minimal_model()
    model.fit(dataset.batch(BATCH_SIZE), epochs=1)
    del model
    gc.collect()
    logger.info("filled a cache - waiting 10 seconds")
    time.sleep(10)
    return dataset

In [33]:
if not KEEP_CACHE:
    merged_datasets = {
        training_data_type: fill_cache(dataset)
        for training_data_type, dataset in merged_datasets.items()
    }

In [34]:
merged_datasets = {
    training_data_type: dataset
        .shuffle(SHUFFLE_BUFFER_SIZE, reshuffle_each_iteration=True)
        .batch(BATCH_SIZE, drop_remainder=True)
        .prefetch(tf.data.experimental.AUTOTUNE)
    for training_data_type, dataset in merged_datasets.items()
}

In [None]:
for training_data_type, dataset in merged_datasets.items():
    print(training_data_type)
    tfds.benchmark(dataset, batch_size=BATCH_SIZE)

Train


In [37]:
TRAINING_TYPE = 'Train'
TEST_TYPE = 'Test'
EVAL_TYPE = 'Eval'

## Building the Tensorflow Model

In [38]:
input_layers = [tf.keras.layers.Input(shape=(PADDING_LENGTHS[col],)) for col in TRAINING_DATA_COLUMNS]
input_layers

[<KerasTensor: shape=(None, 2354) dtype=float32 (created by layer 'input_9')>,
 <KerasTensor: shape=(None, 2354) dtype=float32 (created by layer 'input_10')>]

In [39]:
x = input_layers[0]
for input_layer in input_layers[1:]:
    x = x + input_layer

x = tf.keras.layers.Flatten()(x)

x = tf.keras.layers.Dense(PADDING_LENGTHS[SEQ]*len(ALPHABET))(x)

x = tf.reshape(x,(-1, PADDING_LENGTHS[SEQ], len(ALPHABET)))

x = tf.keras.activations.softmax(x)

model = tf.keras.Model(input_layers,x)
model.compile(optimizer=tf.keras.optimizers.Adam(),
              loss=tf.keras.losses.SparseCategoricalCrossentropy(),
              metrics=['accuracy']
             )
model.summary()

Model: "model_4"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_9 (InputLayer)            [(None, 2354)]       0                                            
__________________________________________________________________________________________________
input_10 (InputLayer)           [(None, 2354)]       0                                            
__________________________________________________________________________________________________
tf.__operators__.add_4 (TFOpLam (None, 2354)         0           input_9[0][0]                    
                                                                 input_10[0][0]                   
__________________________________________________________________________________________________
flatten_4 (Flatten)             (None, 2354)         0           tf.__operators__.add_4[0][0

## Training the Tensorflow Model

In [None]:
model.fit(x=merged_datasets[TRAINING_TYPE],
          validation_data=merged_datasets[TEST_TYPE], 
          epochs=12)

Epoch 1/12


## Evaluating the Tensorflow Model