In [6]:
!pip install autokeras

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting autokeras
  Downloading autokeras-1.1.0-py3-none-any.whl (148 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m148.6/148.6 KB[0m [31m16.8 MB/s[0m eta [36m0:00:00[0m
Collecting keras-tuner>=1.1.0
  Downloading keras_tuner-1.3.0-py3-none-any.whl (167 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m167.3/167.3 KB[0m [31m14.8 MB/s[0m eta [36m0:00:00[0m
Collecting keras-nlp>=0.4.0
  Downloading keras_nlp-0.4.1-py3-none-any.whl (466 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m466.8/466.8 KB[0m [31m40.3 MB/s[0m eta [36m0:00:00[0m
Collecting tensorflow-text
  Downloading tensorflow_text-2.11.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (5.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m5.8/5.8 MB[0m [31m93.4 MB/s[0m eta [36m0:00:00[0m
Collecting kt-legacy
  Downloading kt_le

In [7]:
import pandas as pd
import tensorflow as tf
import autokeras as ak

In [8]:
TRAIN_DATA_URL = "https://storage.googleapis.com/tf-datasets/titanic/train.csv"
TEST_DATA_URL = "https://storage.googleapis.com/tf-datasets/titanic/eval.csv"

train_file_path = tf.keras.utils.get_file("train.csv", TRAIN_DATA_URL)
test_file_path = tf.keras.utils.get_file("eval.csv", TEST_DATA_URL)

Downloading data from https://storage.googleapis.com/tf-datasets/titanic/train.csv
Downloading data from https://storage.googleapis.com/tf-datasets/titanic/eval.csv


In [9]:
# Initialize the structured data classifier
clf = ak.StructuredDataClassifier(
    overwrite=True, max_trials=3
) # It tries 3 different models
# Feed the structured data classifier with training data
clf.fit(
    train_file_path,
    "survived",
    epochs = 10,
)

Trial 3 Complete [00h 00m 04s]
val_accuracy: 0.895652174949646

Best val_accuracy So Far: 0.895652174949646
Total elapsed time: 00h 00m 19s
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x7f08c81d9b20>

In [10]:
#Predict with the best model
predicted_y = clf.predict(test_file_path)
#Evaluate the best model with testing data
print(clf.evaluate(test_file_path,"survived"))

[0.4627113938331604, 0.7954545617103577]


In [11]:
#x_train as pandas DataFrame, y_train as pandas.Series
x_train = pd.read_csv(train_file_path)
print(type(x_train))
y_train = x_train.pop("survived")
print(type(y_train))

<class 'pandas.core.frame.DataFrame'>
<class 'pandas.core.series.Series'>


In [12]:
#You can alsoi use padas.DataFrame for y_train
y_train = pd.DataFrame(y_train)
print(type(y_train))

<class 'pandas.core.frame.DataFrame'>


In [13]:
#You can also use numpy.ndarray for x_train and y_train
x_train = x_train.to_numpy()
y_train = y_train.to_numpy()
print(type(x_train))
print(type(y_train))

<class 'numpy.ndarray'>
<class 'numpy.ndarray'>


In [17]:
#Prepare testing data
x_test = pd.read_csv(test_file_path)
y_test = x_test.pop("survived")

In [19]:
#It tries 10 different models
clf = ak.StructuredDataClassifier(overwrite=True, max_trials=3)
clf.fit(x_train, y_train, epochs=10)
predicted_y=clf.predict(x_test)
print(clf.evaluate(x_test, y_test))

Trial 3 Complete [00h 00m 04s]
val_accuracy: 0.843478262424469

Best val_accuracy So Far: 0.852173924446106
Total elapsed time: 00h 00m 14s
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
[0.44815272092819214, 0.8030303120613098]


In [22]:
train_set=tf.data.Dataset.from_tensor_slices((x_train.astype(str), y_train))
test_set = tf.data.Dataset.from_tensor_slices(
    (x_test.to_numpy().astype(str), y_test)
)

clf = ak.StructuredDataClassifier(overwrite=True, max_trials=3)
clf.fit(train_set, epochs=10)
predicted_y = clf.predict(test_set)
print(clf.evaluate(test_set))

Trial 3 Complete [00h 00m 06s]
val_accuracy: 0.8608695864677429

Best val_accuracy So Far: 0.886956512928009
Total elapsed time: 00h 00m 17s
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
[0.43472427129745483, 0.7765151262283325]


In [25]:
# Initialize the structured data classifier
clf = ak.StructuredDataClassifier(
    column_names=[
        "sex",
        "age",
        "n_siblings_spouses",
        "parch",
        "fare",
        "class",
        "deck",
        "embark_town",
        "alone",
    ],
    column_types={"sex":"categorical","fare":"numerical"},
    max_trials=10,
    overwrite=True,
)

In [26]:
clf.fit(
    x_train,
    y_train,
    validation_split=0.15,
    epochs=10,
)

Trial 10 Complete [00h 00m 05s]
val_accuracy: 0.8554216623306274

Best val_accuracy So Far: 0.891566276550293
Total elapsed time: 00h 00m 53s
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x7f08320a5910>

In [28]:
split = 500
x_val = x_train[split:]
y_val = y_train[split:]
x_train = x_train[:split]
y_train = y_train[:split]
clf.fit(
    x_train,
    y_train,
    validation_data=(x_val,y_val),
    epochs=10,
)

In [29]:
input_node = ak.StructuredDataInput()
output_node = ak.StructuredDataBlock(categorical_encoding=True)(input_node)
output_node = ak.ClassificationHead()(output_node)
clf = ak.AutoModel(
    inputs = input_node, outputs = output_node, overwrite = True, max_trials = 3
)
clf.fit(x_train, y_train, epochs = 10)

Trial 3 Complete [00h 00m 04s]
val_loss: 0.5019729137420654

Best val_loss So Far: 0.5019729137420654
Total elapsed time: 00h 00m 13s
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x7f08400f8670>

In [31]:
input_node = ak.StructuredDataInput()
output_node = ak.CategoricalToNumerical()(input_node)
output_node = ak.DenseBlock()(output_node)
output_node = ak.ClassificationHead()(output_node)
clf = ak.AutoModel(
    inputs = input_node, outputs = output_node, overwrite = True, max_trials = 1
)
clf.fit(x_train, y_train, epochs = 1)
clf.predict(x_train)

Trial 1 Complete [00h 00m 04s]
val_loss: 3.25933837890625

Best val_loss So Far: 3.25933837890625
Total elapsed time: 00h 00m 04s


array([[1.],
       [0.],
       [1.],
       [0.],
       [1.],
       [1.],
       [1.],
       [0.],
       [0.],
       [1.],
       [0.],
       [1.],
       [0.],
       [1.],
       [1.],
       [1.],
       [0.],
       [0.],
       [0.],
       [1.],
       [0.],
       [1.],
       [1.],
       [0.],
       [0.],
       [1.],
       [1.],
       [0.],
       [0.],
       [1.],
       [0.],
       [1.],
       [0.],
       [1.],
       [0.],
       [1.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [1.],
       [1.],
       [0.],
       [1.],
       [0.],
       [0.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [0.],
       [0.],
       [1.],
       [1.],
       [1.],
       [0.],
       [1.],
       [1.],
       [1.],
       [1.],
       [0.],
       [0.],
       [1.],
       [0.],
       [1.],
       [0.],
       [1.],
       [1.],
       [1.],
       [0.],
       [0.],

In [32]:
model = clf.export_model()
model.summary()
print(x_train.dtype)
model.predict(x_train.astype(str))

Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 9)]               0         
                                                                 
 multi_category_encoding (Mu  (None, 9)                0         
 ltiCategoryEncoding)                                            
                                                                 
 dense (Dense)               (None, 32)                320       
                                                                 
 re_lu (ReLU)                (None, 32)                0         
                                                                 
 dense_1 (Dense)             (None, 16)                528       
                                                                 
 re_lu_1 (ReLU)              (None, 16)                0         
                                                             

array([[6.77757084e-01],
       [9.97949392e-02],
       [6.53574526e-01],
       [9.69436690e-02],
       [5.23400068e-01],
       [5.82139313e-01],
       [5.51790893e-01],
       [4.51255858e-01],
       [4.86647010e-01],
       [6.03349626e-01],
       [2.44319260e-01],
       [5.42793214e-01],
       [4.31434333e-01],
       [6.64918780e-01],
       [5.85408390e-01],
       [6.11467719e-01],
       [3.46868277e-01],
       [3.26384544e-01],
       [2.20033303e-01],
       [6.10203683e-01],
       [2.02468837e-05],
       [5.32597721e-01],
       [6.67921603e-01],
       [3.17332417e-01],
       [4.48740879e-03],
       [5.34351945e-01],
       [8.88467371e-01],
       [9.81062129e-02],
       [7.26345032e-02],
       [6.10149026e-01],
       [4.37603652e-01],
       [7.75051057e-01],
       [4.11950856e-01],
       [6.01465762e-01],
       [3.23323458e-01],
       [6.66053295e-01],
       [4.80562240e-01],
       [4.13536400e-01],
       [3.55606198e-01],
       [3.66870373e-01],
