In [1]:
## For platforms
import os


def get_platform():
    platform = ''

    # Windows
    if os.name == 'nt':
        try:
            get_ipython().__class__.__name__
            platform = 'jupyter'
        except NameError:
            platform = 'python'

    elif os.name == 'posix':
        # Kaggle
        if 'KAGGLE_DATA_PROXY_TOKEN' in os.environ.keys():
            platform = 'kaggle'

    # Google Colab
        else:
            try:
                from google.colab import drive
                platform = 'colab'
            except ModuleNotFoundError:
                platform = None

    print(f'Use: {platform}')
    return platform


def colab_action():
    from google.colab import drive
    drive.mount('/content/drive/', force_remount=True)
    os.chdir(f'/content/drive/My Drive/LOB/Pipeline')
    os.system('pip install automodinit keras_tuner')
    os.system('nohup /usr/bin/python3 Colab_saver.py &')


def kaggle_action():
    ...


platform = get_platform()
if platform == 'colab':
    colab_action()
elif platform == 'kaggle':
    kaggle_action()

import backend as B

B.set_backend(platform)

import numpy as np
import pandas as pd
import tensorflow as tf

from backend import DataBack, ModelBack, DataClass

seq_len = 100

Use: jupyter
Using TensorFlow backend
Dataset  : ../dataset/saved_data
Callbacks: ../Temp/callbacks


In [2]:
from models import m_base as test_model

In [3]:
## Load data
data_back = DataBack()
proportion = input('Data proportion 100-0 in % (press enter for all): ')
if proportion == '': proportion = 1
else: proportion = float(proportion) / 100

train, val, test = data_back.read_saved_data(
    proportion=proportion,
    train_indexes=[0],
    val_indexes=[0],
)
print(data_back.last_data_info)
data_back.inspect_data(train=train, val=val, test=test)

ds_train = data_back.data_to_dataset(
    data=train,
    seq_len=seq_len,
    batch_size=100,
)
ds_val = data_back.data_to_dataset(data=val, seq_len=seq_len, batch_size=100)
data_back.inspect_dataset(train=ds_train, val=ds_val)

Read saved data, info writen to last_data_info.
{'read_from': 'saved', 'proportion': 0.5, 'train_indexes': [0], 'val_indexes': [0], 'test_indexes': []}
    Datas:
train     : x= (15804, 40)     | y= (15804,)       
val       : x= (3951, 40)      | y= (3951,)        
    Datasets:
train : [158, 100, 40]
val   : [39, 100, 40]


In [6]:
DEFAULT_PARAMETRS = DataClass(test_model.PARAMETRS)
print(DEFAULT_PARAMETRS)

convolutional
├─ dilation_steps: 4
└─ filters: 14
feed_forward
├─ activation: <function relu at 0x0000027FA6ABADD0>
├─ dropout_rate: 0.1
├─ kernel_initializer: glorot_uniform
├─ kernel_regularizer_class: <class 'keras.regularizers.L2'>
├─ out_activation: softmax
└─ units: 64
optimizer
├─ beta_1: 0.9
├─ beta_2: 0.999
└─ learning_rate: 0.0001
seq_len: 100
transformer
├─ blocks: 2
├─ heads: 3
└─ share_weights: False



In [126]:
class DataClass:
    """
    make only lover case parametrs and not start with _
    All this methods (exept __call__) only for beauty representation :)
    """
    _data_nested={}
    _data_expanded={}
    
    @staticmethod
    def __not_data(field=None, get=False, not_data_fields: set = set()):
        if not get:
            not_data_fields.add(field.__name__)
            return field
        else:
            return not_data_fields

    def __init__(
        self,
        target_dict: dict = {},
        name: str = '',
    ):
        for field_name in self.__get_all_fields():
            field = getattr(self, field_name)
            if type(self.__init__) == type(field):
                # TODO: add signature
                field_result = field.__func__
                setattr(self, field_name, field_result)
        self._data_nested_update()
        self._data_expanded_update()
        
    def __new__(
        cls,
        target_dict: dict = {},
        name: str = '',
    ):
        """
        build from nested dict
        """
        if target_dict != {}:
            new_dataclass = DataClass()
            result_dataclass = new_dataclass.__rec_build(name, target_dict)
            return result_dataclass

        return super().__new__(cls)

    def __rec_build(self, field_name: str, field):
        if not isinstance(field, dict):
            return field

        result_dataclass = DataClass()
        for inner_field_name, inner_field in field.items():
            inner_result = result_dataclass.__rec_build(
                inner_field_name,
                inner_field,
            )
            setattr(result_dataclass, inner_field_name, inner_result)
        result_dataclass._data_nested_update()
        result_dataclass._data_expanded_update()
        return result_dataclass

    def __call__(self, **kwargs: dict):
        """
        Set up parametrs
        """
        for key, value in kwargs.items():
            setattr(self, key, value)

    @__not_data
    def COPY(self):
        return DataClass(self.DATA_NESTED)

    def __get_all_fields(self):
        filter_func = lambda x: (x[0] != '_') and (x not in self.__not_data(
            get=True))
        fields = [field for field in self.__dir__() if filter_func(field)]
        return fields

    def __repr__(self) -> str:
        return f'<DataClass object: {[field for field in self.__get_all_fields()]}>'

    def __str__(self) -> str:
        """
        Representation of options
        """
        return self.__rec_print()[4:]

    def __rec_print(
        self,
        self_name: str = '',
        self_header: str = '',
        last=True,
    ):
        end = "└─ "
        pipe = "│  "
        tee = "├─ "
        blank = "   "

        result = ''
        if not isinstance(self, DataClass):
            result = self
            # if '<' in repr(self):
            #     result = repr(self).split(' at ')[0].replace('<', '').strip()

            return f'{self_header}{end if last else tee}{self_name}: {result}\n'

        result = f'{self_header}{end if last else tee}{self_name}\n'
        fields = self.__get_all_fields()
        for field_name in fields:
            inner_result = DataClass.__rec_print(
                getattr(self, field_name),
                self_name=field_name,
                self_header=f'{self_header}{blank if last else pipe}',
                last=field_name == fields[-1])

            result += inner_result[6:]

        return result

    @property
    @__not_data
    def DATA_NESTED(self):
        """
        Containing options dict
        """
        return self._data_nested

    def _data_nested_update(self):
        self._data_nested = self.__rec_nest()

    def __rec_nest(self, self_name=None):
        if not isinstance(self, DataClass):
            return {self_name: self}

        result = {}
        for field_name in self.__get_all_fields():
            inner_result = DataClass.__rec_nest(
                getattr(self, field_name),
                field_name,
            )
            result.update(inner_result)
        return {self_name: result} if self_name is not None else result

    @property
    @__not_data
    def DATA_EXPANDED(self):
        return self._data_expanded

    def _data_expanded_update(self):
        self._data_expanded = {
            compound_key.strip()[2:]: value
            for value, compound_key in self.__rec_expand()
        }

    def __rec_expand(self, composite_key=''):
        if not isinstance(self, DataClass):
            yield (self, composite_key)
        else:
            for field_name in self.__get_all_fields():
                for inner_result in DataClass.__rec_expand(
                        getattr(self, field_name),
                        str(composite_key) + '__' + str(field_name),
                ):
                    yield inner_result

    def __getitem__(self, value):
        if isinstance(value, list):
            result = {}
            for i_value in value:
                result.update({i_value: getattr(self, i_value, None)})
            result = DataClass(result)

        elif isinstance(value, tuple):
            result = self
            for i_value in value:
                result = getattr(result, i_value, None)
        else:
            result = getattr(self, value, None)

        return result

    @__not_data
    def COMPARE(self, compared):
        return DataClass(self.__rec_compare(compared))

    def __rec_compare(self, compared, self_name=None):
        if not isinstance(self, DataClass):
            return {self_name: (self, compared)}

        result = {}
        for field_name in self.__get_all_fields():
            inner_result = DataClass.__rec_compare(
                getattr(self, field_name),
                getattr(compared, field_name, None),
                field_name,
            )
            result.update(inner_result)

        if self_name is None:
            return result
        else:
            return {self_name: result}


In [127]:
class T(DataClass):
    v=2
    c={'g':4,'h':7}
t =T()
print(t)

v: 2
c: {'g': 4, 'h': 7}



In [129]:
t.DATA_EXPANDED

{}

In [123]:
print(
    DataClass(t.DATA_NESTED)
)

AttributeError: 'T' object has no attribute '_data_nested'

In [119]:
r= {
    'a': {
        'd': 3
    },
    
    'b': {
        'c': 4,
        'e': {
            'v': 5,
            'c': 4,
            
            'd': {
                'd': 3
            },
        }
    }
}
a = DataClass(r)
print(a)

a
└─ d: 3
b
├─ c: 4
└─ e
   ├─ v: 5
   ├─ c: 4
   └─ d
      └─ d: 3



In [116]:
print(a['b','e'])

v: 5
c: 4
d
└─ d: 3



In [105]:
print(a['b'])

c: 4
e
├─ v: 5
├─ c: 4
└─ d
   └─ d: 3



In [106]:
%%timeit
a['b']['e']['d']['d']

4.21 µs ± 363 ns per loop (mean ± std. dev. of 7 runs, 100,000 loops each)


In [113]:
%%timeit
a[['b','e','d','d']]

868 µs ± 11.2 µs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)


In [108]:
%%timeit
a.__getattribute__('b').__getattribute__('e').__getattribute__('d').__getattribute__('d')

645 ns ± 12.6 ns per loop (mean ± std. dev. of 7 runs, 1,000,000 loops each)


In [109]:
%%timeit
r['b']['e']['d']['d']

238 ns ± 8.81 ns per loop (mean ± std. dev. of 7 runs, 1,000,000 loops each)


In [5]:
## Build
tf.keras.backend.clear_session()
restore = True if input('Restore? (y-yes, enter-no): ') == 'y' else False
input_name = ''
while input_name == '':
    input_name = input(
        f"Input train name to {'restore' if restore else 'build new'}: ")

if restore:
    model, train_name = ModelBack.restore_model(input_name)

else:
    ## Set up parametrs
    PARAMETRS = DEFAULT_PARAMETRS.COPY()
    model = test_model.blocks.build_model(**PARAMETRS.DATA_NESTED)
    train_name = ModelBack.get_training_name(input_name)
    print(
        f'Pattern model: {test_model.__name__}',
        f'Train name: {train_name}',
        'Parametrs:',
        DEFAULT_PARAMETRS.COMPARE(PARAMETRS),
        sep='\n',
    )

model.summary()

TypeError: optimazer_block() got an unexpected keyword argument 'beta_1'

In [None]:
## Callbacks
callback_freq = 1
train_dir = f'{ModelBack.callback_path}/{train_name}'
callbacks = [
    tf.keras.callbacks.TensorBoard(
        log_dir=train_dir,
        histogram_freq=1,
        update_freq=callback_freq,
    ),
    tf.keras.callbacks.ModelCheckpoint(
        f'{train_dir}/checkpoints/' + '{epoch:04d}.keras',
        monitor="val_sp_acc",
        verbose=0,
        save_best_only=False,
        save_weights_only=False,
        mode="auto",
        save_freq=callback_freq,
    )
]
ModelBack.dump(
    data_info=data_back.last_data_info,
    parametrs=DEFAULT_PARAMETRS.COMPARE(PARAMETRS),
    model_path=train_dir,
)
print(
    f"Callbacks:\n{[str(type(callback)).split('.')[-1] for callback in callbacks]}",
    f'Directory: {train_dir}',
    sep='\n',
)

In [None]:
# %tensorboard

In [None]:
## Train
training_question = ''
while training_question not in ['y', 'n']:
    training_question = input(f'Start training now (y-yes) (n-exit): ')
if training_question == 'y':
    model.fit(
        ds_train,
        epochs=20,
        validation_data=ds_val,
        callbacks=callbacks,
    )
