In [1]:
import pandas as pd
import numpy as np
np.set_printoptions(precision=3, suppress=True)
import tensorflow as tf
from tensorflow.keras import layers

2024-04-18 21:46:52.891480: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [8]:
%ls ../MURA-v1.1/

[0m[01;34mtrain[0m/                 train_labeled_studies.csv  valid_image_paths.csv
train_image_paths.csv  [01;34mvalid[0m/                     valid_labeled_studies.csv


In [None]:
mura_file_path = "../MURA-v1.1/train_labeled_studies.csv"

---
## Load_Data in a file; including preprocessing in a model

In [2]:
titanic = pd.read_csv("https://storage.googleapis.com/tf-datasets/titanic/train.csv")
titanic.head()

Unnamed: 0,survived,sex,age,n_siblings_spouses,parch,fare,class,deck,embark_town,alone
0,0,male,22.0,1,0,7.25,Third,unknown,Southampton,n
1,1,female,38.0,1,0,71.2833,First,C,Cherbourg,n
2,1,female,26.0,0,0,7.925,Third,unknown,Southampton,y
3,1,female,35.0,1,0,53.1,First,C,Southampton,n
4,0,male,28.0,0,0,8.4583,Third,unknown,Queenstown,y


In [3]:
titanic_features = titanic.copy()
titanic_labels = titanic_features.pop('survived')

In [None]:
# make a dict of tensors using dtypes for each input column
inputs = {}
for name, column in titanic_features.items():
  dtype = column.dtype
  if dtype == object:
    dtype = tf.string
  else:
    dtype = tf.float32
  inputs[name] = tf.keras.Input(shape=(1,), name=name, dtype=dtype)


# collect numeric inputs
numeric_inputs = {name:input for name,input in inputs.items()
                  if input.dtype==tf.float32}
x = layers.Concatenate()(list(numeric_inputs.values()))
norm = layers.Normalization()
norm.adapt(np.array(titanic[numeric_inputs.keys()]))
all_numeric_inputs = norm(x)  # shape(None, 4) tensors

preprocessed_inputs = [all_numeric_inputs]


# collect categorical inputs
for name, input in inputs.items():
  if input.dtype == tf.float32:
    continue
  lookup = layers.StringLookup(vocabulary=np.unique(titanic_features[name]))
  one_hot = layers.CategoryEncoding(num_tokens=lookup.vocabulary_size())
  x = lookup(input)
  x = one_hot(x)
  preprocessed_inputs.append(x)  # ending up appending shape(None, 24) input tensors

preprocessed_inputs_cat = layers.Concatenate()(preprocessed_inputs)  # shape=(None, 28); num+cat
titanic_preprocessing = tf.keras.Model(inputs=inputs, outputs=preprocessed_inputs_cat)


# create a model
def titanic_model(preprocessing_head, inputs):
  body = tf.keras.Sequential([
    layers.Dense(64, activation='relu'),
    layers.Dense(1)
  ])

  preprocessed_inputs = preprocessing_head(inputs)
  result = body(preprocessed_inputs)
  model = tf.keras.Model(inputs, result)

  model.compile(loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),
                optimizer=tf.keras.optimizers.Adam())
  return model

titanic_model = titanic_model(titanic_preprocessing, inputs)


# run a model
titanic_features_dict = {name: np.array(value) 
                         for name, value in titanic_features.items()}
titanic_model.fit(x=titanic_features_dict, y=titanic_labels, epochs=10)