In [40]:
import os
import pyarrow
import pandas as pd
import tensorflow as tf
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Dense

import numpy as np
import json

from sklearn.model_selection import train_test_split
from sklearn.utils import shuffle
from google.cloud import bigquery
from witwidget.notebook.visualization import WitWidget, WitConfigBuilder

In [63]:
os.environ["GOOGLE_APPLICATION_CREDENTIALS"]="../sublime-lens-298123-5721215a5509.json"

query="""
SELECT
  weight_pounds,
  is_male,
  mother_age,
  plurality,
  gestation_weeks
FROM
  publicdata.samples.natality
WHERE year > 2000
LIMIT 10000
"""
label_col = [
  'weight_pounds',
  'is_male',
  'mother_age',
  'plurality',
  'gestation_weeks'
]

try:
  df = bigquery.Client().query(query).result()
  result = [dict(row) for row in df]
  df = pd.DataFrame(result, columns=label_col)
except ValueError:
  print('google services not available or invalid credentials')
  exit()

df.head()


Unnamed: 0,weight_pounds,is_male,mother_age,plurality,gestation_weeks
0,7.063611,True,32,1,37.0
1,4.687028,True,30,3,33.0
2,7.561856,True,20,1,39.0
3,7.561856,True,31,1,37.0
4,7.312733,True,32,1,40.0


In [64]:
df.describe()

Unnamed: 0,weight_pounds,mother_age,plurality,gestation_weeks
count,9997.0,10000.0,10000.0,9886.0
mean,7.303938,27.2265,1.0335,38.70089
std,1.313289,6.194646,0.192825,2.538734
min,0.595248,13.0,1.0,18.0
25%,6.624891,22.0,1.0,38.0
50%,7.374463,27.0,1.0,39.0
75%,8.126239,32.0,1.0,40.0
max,12.625874,52.0,4.0,47.0


In [65]:
df['is_male'].value_counts()

True     5096
False    4904
Name: is_male, dtype: int64

In [66]:
df = df.dropna()
df = shuffle(df, random_state=2)

In [67]:
labels = df['weight_pounds']
data = df.drop(columns=['weight_pounds'])
data['is_male'] = data['is_male'].astype(int)
data.head()

Unnamed: 0,is_male,mother_age,plurality,gestation_weeks
40,0,18,1,39.0
5976,0,22,1,47.0
6902,0,29,1,38.0
4838,0,32,1,37.0
8335,1,29,1,39.0


In [68]:
x,y = data,labels
x_train,x_test,y_train,y_test = train_test_split(x,y)

In [69]:
model = Sequential([
    Dense(64, activation='relu', input_shape=(len(x_train.iloc[0]),)),
    Dense(32, activation='relu'),
    Dense(1)]
)

2021-10-15 23:01:07.444317: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcuda.so.1'; dlerror: libcuda.so.1: cannot open shared object file: No such file or directory
2021-10-15 23:01:07.444859: W tensorflow/stream_executor/cuda/cuda_driver.cc:269] failed call to cuInit: UNKNOWN ERROR (303)
2021-10-15 23:01:07.445311: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:156] kernel driver does not appear to be running on this host (IFEANYI-EZE): /proc/driver/nvidia/version does not exist
2021-10-15 23:01:07.449981: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [71]:
model.compile(optimizer=tf.keras.optimizers.RMSprop(),
              loss=tf.keras.losses.MeanSquaredError(),
              metrics=['mae', 'mse'])

model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, 64)                320       
_________________________________________________________________
dense_1 (Dense)              (None, 32)                2080      
_________________________________________________________________
dense_2 (Dense)              (None, 1)                 33        
Total params: 2,433
Trainable params: 2,433
Non-trainable params: 0
_________________________________________________________________


In [72]:
model.fit(x_train, y_train, epochs=10, validation_split=0.1)

2021-10-15 23:02:29.287495: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:185] None of the MLIR Optimization Passes are enabled (registered 2)


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x7f22c889cdc0>

In [73]:
num_examples = 10
predictions = model.predict(x_test[:num_examples])

In [74]:
for i in range(num_examples):
    print('Predicted val: ', predictions[i][0])
    print('Actual val: ',y_test.iloc[i])
    print()

Predicted val:  5.3522534
Actual val:  6.4374980503999994

Predicted val:  7.076668
Actual val:  6.96881210182

Predicted val:  7.2372007
Actual val:  8.7523518014

Predicted val:  7.003402
Actual val:  7.5618555866

Predicted val:  7.397735
Actual val:  8.50102482272

Predicted val:  6.824552
Actual val:  6.2501051276999995

Predicted val:  7.0333247
Actual val:  10.24929056038

Predicted val:  5.457179
Actual val:  5.732018812

Predicted val:  6.4841084
Actual val:  7.8815258665

Predicted val:  7.1050286
Actual val:  8.5098433132



In [75]:
wit_data = pd.concat([x_test, y_test], axis=1)

In [76]:
def custom_predict(examples_to_infer):
    preds = model.predict(examples_to_infer)
    return preds

In [None]:
config_builder = WitConfigBuilder(wit_data[:500].values.tolist(), data.columns.tolist() + ['weight_pounds'])\
    .set_custom_predict_fn(custom_predict)\
        .set_target_feature('weight_pounds')\
            .set_model_type('regression')

WitWidget(config_builder, height=800)