In [0]:
import os
import autokeras as ak
import pandas as pd
import tensorflow as tf
from mlflow.models.signature import infer_signature
import mlflow

**GPU/TPU Setup**

In [0]:
try:
    tpu = tf.distribute.cluster_resolver.TPUClusterResolver()
    tf.config.experimental_connect_to_cluster(tpu)
    tf.tpu.experimental.initialize_tpu_system(tpu)

    strategy = tf.distribute.experimental.TPUStrategy
except ValueError:
    strategy = tf.distribute.get_strategy()
    print('Number of replicas:', strategy.num_replicas_in_sync)
    
try:
    tpu = tf.distribute.cluster_resolver.TPUClusterResolver()  # TPU detection
except ValueError:
    tpu = None
    gpus = tf.config.experimental.list_logical_devices("GPU")
    
if tpu:
    tf.tpu.experimental.initialize_tpu_system(tpu)
    strategy = tf.distribute.experimental.TPUStrategy(tpu, )
    print('Running on TPU ', tpu.cluster_spec().as_dict()['worker'])
elif len(gpus) > 1:
    strategy = tf.distribute.MultiWorkerMirroredStrategy([gpu.name for gpu in gpus])
    print('Running on multiple GPUs ', [gpu.name for gpu in gpus])
elif len(gpus) == 1:
    strategy = tf.distribute.get_strategy()
    print('Running on single GPU ', gpus[0].name)
else:
    strategy = tf.distribute.get_strategy()
    print('Running on CPU')
print("Number of accelerators: ", strategy.num_replicas_in_sync)

**AutoML**

In [0]:
train_df = spark.read.options(inferSchema=True).table("traincleantable")

In [0]:
train_df.columns

In [0]:
label = train_df.pop('Etiquette_DPE')

In [0]:
test_df = spark.read.options(inferSchema=True).table("testcleantable")

In [0]:
id = train_df.pop('N°DPE')

In [0]:
with strategy.scope():
    clf = ak.StructuredDataClassifier(
        overwrite=True, max_trials=10
    )

    clf.fit(
        train_df[:500000],
        label[:500000],
        epochs=10,
        validation_split=0.2,
        callbacks=[tf.keras.callbacks.EarlyStopping(patience=3),
                   tf.keras.callbacks.TensorBoard(log_dir='logs')]
    )

**Submission**

In [0]:
val_df = spark.read.options(inferSchema=True).table("valtable")

In [0]:
id = val_df.pop('N°DPE')

In [0]:
with strategy.scope():
    predicted_y = clf.predict(val_df)

In [0]:
predicted_y

In [0]:
submission = pd.DataFrame()
submission['N°DPE'] = id
submission['Etiquette_DPE'] = predicted_y

In [0]:
submission.to_csv("Sub.csv", index=False)