In this first notebook, we use tensorflow estimator API, using no extra tensorflow features (checkpoint, tensorboard, etc.). Also, we use in memory data, retrieved from BigQuery.

This dataset has been imported from Kaggle into Google Storage. Then it has been explored and cleaned in Dataprep. Finally, the output has been stored in a BigQuery table

STEP 1 : import dataset from BigQuery

In [None]:
import google.datalab.bigquery as bq

In [None]:
from google.cloud import bigquery

client = bigquery.Client()

query =  """SELECT
   *
 FROM
   `customer-churn-prediction.data_flow_reworked_data.reworked_data_V1`"""

df = client.query(query).to_dataframe()
df['Churn'] = df["Churn"].astype("float64")

STEP 2 : basic tensorflow model

In [3]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf

  from ._conv import register_converters as _register_converters


In [4]:
print(df.columns)
print("\n")
print("===================================================================================")
print(df.info())

Index(['gender', 'SeniorCitizen', 'Partner', 'Dependents', 'tenure',
       'PhoneService', 'MultipleLines', 'InternetService', 'OnlineSecurity',
       'OnlineBackup', 'DeviceProtection', 'TechSupport', 'StreamingTV',
       'StreamingMovies', 'Contract', 'PaperlessBilling', 'PaymentMethod',
       'MonthlyCharges', 'TotalCharges', 'Churn'],
      dtype='object')


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7032 entries, 0 to 7031
Data columns (total 20 columns):
gender              7032 non-null object
SeniorCitizen       7032 non-null object
Partner             7032 non-null object
Dependents          7032 non-null object
tenure              7032 non-null float64
PhoneService        7032 non-null object
MultipleLines       7032 non-null object
InternetService     7032 non-null object
OnlineSecurity      7032 non-null object
OnlineBackup        7032 non-null object
DeviceProtection    7032 non-null object
TechSupport         7032 non-null object
StreamingTV         7032 non-nu

In [5]:
featcols = [
  tf.feature_column.numeric_column("TotalCharges"),
  tf.feature_column.numeric_column("MonthlyCharges"),
  tf.feature_column.numeric_column("tenure"),
  tf.feature_column.categorical_column_with_vocabulary_list("gender", ["Female", "Male"]),
  #tf.feature_column.categorical_column_with_vocabulary_list("SeniorCitizen", ["0", "1"]),
  tf.feature_column.categorical_column_with_vocabulary_list("Partner", ["Yes", "No"]),
  tf.feature_column.categorical_column_with_vocabulary_list("Dependents", ["Yes", "No"]),
  tf.feature_column.categorical_column_with_vocabulary_list("PhoneService", ["Yes", "No"]),
  tf.feature_column.categorical_column_with_vocabulary_list("MultipleLines", ["Yes", "No", "No·phone·service"]),
  tf.feature_column.categorical_column_with_vocabulary_list("InternetService", ["Yes", "No", "Fiber·optic"]),
  tf.feature_column.categorical_column_with_vocabulary_list("OnlineSecurity", ["Yes", "No", "No·internet·service"]),
  tf.feature_column.categorical_column_with_vocabulary_list("OnlineBackup", ["Yes", "No", "No·internet·service"]),
  tf.feature_column.categorical_column_with_vocabulary_list("DeviceProtection", ["Yes", "No", "No·internet·service"]),
  tf.feature_column.categorical_column_with_vocabulary_list("TechSupport", ["Yes", "No", "No·internet·service"]),
  tf.feature_column.categorical_column_with_vocabulary_list("StreamingTV", ["Yes", "No", "No·internet·service"]),
  tf.feature_column.categorical_column_with_vocabulary_list("StreamingMovies", ["Yes", "No", "No·internet·service"]),
  tf.feature_column.categorical_column_with_vocabulary_list("Contract", ["Month-to-month", "Two·year", "One·year"]),
  tf.feature_column.categorical_column_with_vocabulary_list("PaperlessBilling", ["Yes", "No"]),
  tf.feature_column.categorical_column_with_vocabulary_list("PaymentMethod", ["Electronic·check", "Mailed·check", "Bank·transfer·(automatic)", "Credit·card·(automatic)"])
]

def pandas_train_input_fn(df):
  return tf.estimator.inputs.pandas_input_fn(
    x = df,
    y = df['Churn'],
    batch_size=128,
    num_epochs=1,
    shuffle=True,
    queue_capacity=1000
  )

model = tf.estimator.LinearClassifier(featcols)

model.train(pandas_train_input_fn(df))

INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_keep_checkpoint_max': 5, '_evaluation_master': '', '_model_dir': '/tmp/tmp7nz4q6dr', '_service': None, '_task_type': 'worker', '_global_id_in_cluster': 0, '_is_chief': True, '_keep_checkpoint_every_n_hours': 10000, '_save_summary_steps': 100, '_log_step_count_steps': 100, '_num_ps_replicas': 0, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x7ff1b59a0208>, '_tf_random_seed': None, '_save_checkpoints_secs': 600, '_train_distribute': None, '_master': '', '_save_checkpoints_steps': None, '_session_config': None, '_task_id': 0, '_num_worker_replicas': 1}
INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Saving checkpoints for 1 into /tmp/tmp7nz4q6dr/model.ckpt.
INFO:tensorflow:step = 1, lo

<tensorflow.python.estimator.canned.linear.LinearClassifier at 0x7ff1b5998fd0>