In [1]:
cd '~/Desktop/AIQC'

/Users/layne/Desktop/AIQC


Makes sure local tests are running, not an installed pypi module

In [2]:
from aiqc.orm import destroy_db

In [3]:
destroy_db(confirm=True, rebuild=True)


=> Success - deleted database file at path:
/Users/layne/Library/Application Support/aiqc/aiqc.sqlite3


=> Success - created database file at path:
/Users/layne/Library/Application Support/aiqc/aiqc.sqlite3


💾  Success - created all database tables.  💾



---

## Example Data

This dataset is comprised of:

* *Features* = sonar readings that have been bounced off a distant object. 
* *Label* = either a rock or metal structure (potentially a naval mine).

Reference [Example Datasets](example_datasets.ipynb) for more information.

In [4]:
from aiqc import datum

In [5]:
df = datum.to_pandas('sonar.csv')

In [6]:
df.head()

Unnamed: 0,a,b,c,d,e,f,g,h,i,j,...,az,ba,bb,bc,bd,be,bf,bg,bh,object
0,0.02,0.0371,0.0428,0.0207,0.0954,0.0986,0.1539,0.1601,0.3109,0.2111,...,0.0027,0.0065,0.0159,0.0072,0.0167,0.018,0.0084,0.009,0.0032,R
1,0.0453,0.0523,0.0843,0.0689,0.1183,0.2583,0.2156,0.3481,0.3337,0.2872,...,0.0084,0.0089,0.0048,0.0094,0.0191,0.014,0.0049,0.0052,0.0044,R
2,0.0262,0.0582,0.1099,0.1083,0.0974,0.228,0.2431,0.3771,0.5598,0.6194,...,0.0232,0.0166,0.0095,0.018,0.0244,0.0316,0.0164,0.0095,0.0078,R
3,0.01,0.0171,0.0623,0.0205,0.0205,0.0368,0.1098,0.1276,0.0598,0.1264,...,0.0121,0.0036,0.015,0.0085,0.0073,0.005,0.0044,0.004,0.0117,R
4,0.0762,0.0666,0.0481,0.0394,0.059,0.0649,0.1209,0.2467,0.3564,0.4459,...,0.0031,0.0054,0.0105,0.011,0.0015,0.0072,0.0048,0.0107,0.0094,R


---

## Pipeline

Reference [High-Level API Docs](api_high_level.ipynb) for more information including how to work with non-tabular data.

In [7]:
from aiqc.orm import Dataset
from aiqc.mlops import Pipeline, Stratifier, Target, Input
from sklearn.preprocessing import LabelBinarizer, PowerTransformer

In [8]:
shared_data = Dataset.Tabular.parse_data(df)

In [9]:
pipeline = Pipeline(
    Input(
        dataset  = shared_data,
        encoders = Input.Encoder(
            sklearn_preprocess = PowerTransformer(method='yeo-johnson', copy=False)
        )
    ),

    Target(
        dataset = shared_data,
        column  = 'object',
        encoder = Target.Encoder(
            sklearn_preprocess = LabelBinarizer(sparse_output=False)
        )
    ),
    
    Stratifier(
        size_test       = 0.12, 
        size_validation = 0.22
    )    
)


___/ featurecoder_index: 0 \_________
=> The column(s) below matched your filter(s) featurecoder filters.

['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', 'aa', 'ab', 'ac', 'ad', 'ae', 'af', 'ag', 'ah', 'ai', 'aj', 'ak', 'al', 'am', 'an', 'ao', 'ap', 'aq', 'ar', 'as', 'at', 'au', 'av', 'aw', 'ax', 'ay', 'az', 'ba', 'bb', 'bc', 'bd', 'be', 'bf', 'bg', 'bh']

=> Done. All feature column(s) have featurecoder(s) associated with them.
No more FeatureCoders can be added to this Encoderset.



---

## Modeling

Reference this great blog for machine learning cookbooks: [MachineLearningMastery.com "Binary Classification"](https://machinelearningmastery.com/binary-classification-tutorial-with-the-keras-deep-learning-library/).

In [10]:
import tensorflow as tf
from tensorflow.keras import layers as l

In [11]:
def fn_build(features_shape, label_shape, **hp):
    m = tf.keras.models.Sequential()
    m.add(l.Input(shape=features_shape))
    m.add(l.Dense(hp['neuron_count'], activation='relu', kernel_initializer='he_uniform'))
    m.add(l.Dropout(0.30))
    m.add(l.Dense(hp['neuron_count'], activation='relu', kernel_initializer='he_uniform'))
    m.add(l.Dense(units=label_shape[0], activation='sigmoid', kernel_initializer='glorot_uniform'))
    return m

In [12]:
def fn_train(model, loser, optimizer, samples_train, samples_evaluate, **hp):
    model.compile(
        loss=loser
        , optimizer=optimizer
        , metrics=['accuracy']
    )
    model.fit(
        samples_train['features'], samples_train['labels']
        , validation_data = (samples_evaluate['features'], samples_evaluate['labels'])
        , verbose = 0
        , batch_size = 3
        , epochs = hp['epochs']
        , callbacks = [tf.keras.callbacks.History()]
    )
    return model

In [13]:
hyperparameters = dict(
    neuron_count=[25, 50], epochs=[75, 150]
)

In [14]:
from aiqc.mlops import Experiment, Architecture, Trainer

In [15]:
experiment = Experiment(
    Architecture(
        library           = "keras"
        , analysis_type   = "classification_binary"
        , fn_build        = fn_build
        , fn_train        = fn_train
        , hyperparameters = hyperparameters
    ),
    
    Trainer(
        pipeline_id       = pipeline.id
        , repeat_count    = 2
        , search_percent  = None
        , hide_test       = False
    )
)

In [16]:
experiment.run_jobs()

🔮 Training Models 🔮: 100%|██████████████████████████████████████████| 8/8 [02:34<00:00, 19.27s/it]


---

## Visualization & Interpretation

For more information on visualization of performance metrics, reference the [Dashboard](dashboard.html) documentation.