<a href="https://colab.research.google.com/github/Djensonsan/Spotify-Sequential-Skip-Prediction-Challenge/blob/main/GBT.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# GBT

## Imports and Constants

In [None]:
# Install your required packages here
!pip install pandas numpy matplotlib sklearn fsspec gcsfs tqdm



In [None]:
# Path to credentials for cloud bucket:
%env GOOGLE_APPLICATION_CREDENTIALS=/content/drive/My Drive/CS/AI/Credentials/ai-project-2020-f4dfbc25326c.json

env: GOOGLE_APPLICATION_CREDENTIALS=/content/drive/My Drive/CS/AI/Credentials/ai-project-2020-f4dfbc25326c.json


In [None]:
import numpy as np
import time
import pandas as pd
import os

import sklearn
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics.pairwise import cosine_similarity

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.python.keras.utils.vis_utils import plot_model

from glob import glob
from tqdm.notebook import tqdm

from google.cloud import storage
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
pd.set_option('display.max_rows', 800)
pd.set_option('display.max_columns', 800)

In [None]:
bucket_name = "ai-project-2020-spotify"
client = storage.Client()
bucket = client.get_bucket(bucket_name)

## Utility Functions

In [None]:
def ave_pre(submission, groundtruth):
    """ Calculate average accuracy (which is the same as average precision in this context) """
    s = 0.0
    t = 0.0
    c = 1.0
    for x, y in zip(submission, groundtruth):
        if x != 0 and x != 1:
            raise ValueError()
        if x == y:
            s += 1.0
            t += s / c
        c += 1
    return t / len(groundtruth)

def evaluate(submission, groundtruth):
    """ Calculate metrics for prediction and ground thruth lists (source: starter kit) """
    ap_sum = 0.0
    first_pred_acc_sum = 0.0
    counter = 0
    for sub, tru in zip(submission, groundtruth):
        # if len(sub) != len(tru):
        #     raise Exception('Line {} should contain {} predictions, but instead contains '
        #                     '{}'.format(counter + 1, len(tru), len(sub)))
        try:
            ap_sum += ave_pre(sub, tru)
        except ValueError as e:
            raise ValueError('Invalid prediction in line {}, should be 0 or 1'.format(counter))
        first_pred_acc_sum += sub[0] == tru[0]
        counter += 1
    ap = ap_sum / counter
    first_pred_acc = first_pred_acc_sum / counter
    return ap, first_pred_acc

def normalize(df,feature_name):
    result = df.copy()
    for name in feature_name:
        max_value = df[name].max()
        min_value = df[name].min()
        result[name] = (df[name] - min_value) / (max_value - min_value)
    return result

def categorical_to_dummies(df, categorical_cols):
    """ Create dummies (one hot encoding) for each categorical variables """
    dummies = pd.get_dummies(df[categorical_cols], prefix=categorical_cols)
    return df.drop(columns=categorical_cols).join(dummies)

def split_sessions(data, perc_in=0.6):
    """ Split interactions into train and test sessions. """
    sessions = data['session_id'].unique()
    amt_in = int(perc_in * len(sessions))
    sessions_in = np.random.choice(sessions, amt_in, replace=False)
    sessions_out = np.array(list(set(sessions) - set(sessions_in)))
    indexed_data = data.set_index('session_id')
    data_in = indexed_data.loc[sessions_in]
    data_out = indexed_data.loc[sessions_out]
    return data_in, data_out

## Import Session Logs

In [None]:
# Cloud bucket contains larger datasets:
files = []
train_files = list(bucket.list_blobs(prefix='training_set/'))
for blob in [blob for blob in train_files if '20180715' in blob.name]:
  files.append(f"gs://{bucket_name}/"+blob.name)
  print(blob.name)
print(files)

training_set/log_0_20180715_000000000000.csv.gz
training_set/log_1_20180715_000000000000.csv.gz
training_set/log_2_20180715_000000000000.csv.gz
training_set/log_3_20180715_000000000000.csv.gz
training_set/log_4_20180715_000000000000.csv.gz
training_set/log_5_20180715_000000000000.csv.gz
training_set/log_6_20180715_000000000000.csv.gz
training_set/log_7_20180715_000000000000.csv.gz
['gs://ai-project-2020-spotify/training_set/log_0_20180715_000000000000.csv.gz', 'gs://ai-project-2020-spotify/training_set/log_1_20180715_000000000000.csv.gz', 'gs://ai-project-2020-spotify/training_set/log_2_20180715_000000000000.csv.gz', 'gs://ai-project-2020-spotify/training_set/log_3_20180715_000000000000.csv.gz', 'gs://ai-project-2020-spotify/training_set/log_4_20180715_000000000000.csv.gz', 'gs://ai-project-2020-spotify/training_set/log_5_20180715_000000000000.csv.gz', 'gs://ai-project-2020-spotify/training_set/log_6_20180715_000000000000.csv.gz', 'gs://ai-project-2020-spotify/training_set/log_7_201807

## Import Track Features

In [None]:
track_features_1 = pd.read_csv('/content/drive/My Drive/CS/AI/Data/track_features/tf_000000000000.csv').set_index('track_id')
track_features_2 = pd.read_csv('/content/drive/My Drive/CS/AI/Data/track_features/tf_000000000001.csv').set_index('track_id')
track_features = track_features_1.append(track_features_2)

In [None]:
track_features = categorical_to_dummies(track_features, ['mode'])

## Track Pre-processing

In [None]:
def duration_binning(track_features):
  ''' Bin the duration of each track. 
  '''
  cut_bins = [0, 60, 90, 120, 150, 180, 210, 240, 270, 300, 600, 999999]
  bin_names = ['60', '90', '120', '150', '180', '210', '240', '270', '300', 'long', 'very_long']
  track_features['duration'] = pd.cut(track_features['duration'], bins=cut_bins, labels=bin_names)
  track_features = categorical_to_dummies(track_features, ['duration'])
  return track_features

In [None]:
def decades_binning(track_features):
  cut_bins = [0, 1960, 1970, 1980, 1990, 2000, 2010, 2020]
  bin_names = ['60s', '70s', '80s', '90s', '00s', '10s', '20s']
  track_features['release_year'] = pd.cut(track_features['release_year'], bins=cut_bins, labels=bin_names)
  track_features = categorical_to_dummies(track_features, ['release_year'])
  return track_features

In [None]:
track_features = duration_binning(track_features)
track_features = decades_binning(track_features)

### Sanity Check

In [None]:
track_features.head()

Unnamed: 0_level_0,us_popularity_estimate,acousticness,beat_strength,bounciness,danceability,dyn_range_mean,energy,flatness,instrumentalness,key,liveness,loudness,mechanism,organism,speechiness,tempo,time_signature,valence,acoustic_vector_0,acoustic_vector_1,acoustic_vector_2,acoustic_vector_3,acoustic_vector_4,acoustic_vector_5,acoustic_vector_6,acoustic_vector_7,mode_major,mode_minor,duration_60,duration_90,duration_120,duration_150,duration_180,duration_210,duration_240,duration_270,duration_300,duration_long,duration_very_long,release_year_60s,release_year_70s,release_year_80s,release_year_90s,release_year_00s,release_year_10s,release_year_20s
track_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1
t_2e8f4b71-8a0b-4b9c-b7d8-fb5208e87f9f,99.582885,0.716209,0.366495,0.332605,0.439835,5.805774,0.238847,1.0107,0.6533861,0,0.769258,-17.094,0.19917,0.759699,0.03394,100.370003,4,0.223395,0.146012,-0.706908,0.259496,0.481157,0.238427,-0.098389,-0.25496,-0.227383,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0
t_dae2ec0e-ec7b-4b3e-b60c-4a884d0eccb0,97.272035,0.83946,0.362212,0.389829,0.50758,6.845427,0.420476,1.000398,3.94155e-09,0,0.085844,-11.295,0.357639,0.747436,0.049856,141.334,4,0.484702,0.039554,-0.539554,0.105141,0.692589,0.226047,-0.468162,0.164389,-0.769024,1,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0
t_cf0164dd-1531-4399-bfa6-dec19cd1fedc,99.620384,0.054673,0.495002,0.589378,0.552311,9.361949,0.842938,0.957766,0.1041595,0,0.407325,-9.31,0.304721,0.493154,0.071753,138.889999,4,0.818441,0.083863,-0.242108,-0.014258,0.096396,0.417641,-0.050576,-0.204757,-0.172563,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0
t_0f90acc7-d5c5-4e53-901d-55610fbd090c,96.79683,0.042606,0.389634,0.359044,0.585673,6.068578,0.665398,0.947322,1.444963e-05,0,0.251502,-12.159,0.702948,0.212197,0.029425,133.139008,4,0.594829,0.192498,0.340039,0.034846,-0.389794,0.518381,0.185008,-0.079907,-0.016978,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0
t_36b9ad02-095a-443d-a697-6c7285d9410a,97.905891,0.249982,0.51364,0.485435,0.635095,7.198735,0.408715,1.014063,0.526688,0,0.21837,-13.813,0.888889,0.193438,0.032178,152.212006,4,0.591289,0.270586,-0.411061,0.165898,0.225652,0.335518,-0.036643,-0.0163,-0.44687,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0


In [None]:
track_features.columns

Index(['us_popularity_estimate', 'acousticness', 'beat_strength', 'bounciness',
       'danceability', 'dyn_range_mean', 'energy', 'flatness',
       'instrumentalness', 'key', 'liveness', 'loudness', 'mechanism',
       'organism', 'speechiness', 'tempo', 'time_signature', 'valence',
       'acoustic_vector_0', 'acoustic_vector_1', 'acoustic_vector_2',
       'acoustic_vector_3', 'acoustic_vector_4', 'acoustic_vector_5',
       'acoustic_vector_6', 'acoustic_vector_7', 'mode_major', 'mode_minor',
       'duration_60', 'duration_90', 'duration_120', 'duration_150',
       'duration_180', 'duration_210', 'duration_240', 'duration_270',
       'duration_300', 'duration_long', 'duration_very_long',
       'release_year_60s', 'release_year_70s', 'release_year_80s',
       'release_year_90s', 'release_year_00s', 'release_year_10s',
       'release_year_20s'],
      dtype='object')

## Logs Pre-processing

In [None]:
def day_binning(logs):
  ''' Binning function.
  '''
  cut_bins = [0, 12, 18, 24]
  bin_names = ['morning', 'afternoon', 'evening']
  logs['hour_of_day'] = pd.cut(logs['hour_of_day'], bins=cut_bins, labels=bin_names)
  logs = categorical_to_dummies(logs, ['hour_of_day'])
  return logs

In [None]:
def cosine_skipped(data):
  ''' Calculate cosine distance between mean of second part of session and each track.
  '''
  cosine_distance_columns = ['duration_60', 'duration_90', 'duration_120', 'duration_150',
       'duration_180', 'duration_210', 'duration_240', 'duration_270',
       'duration_300', 'duration_long', 'duration_very_long', 'release_year_60s', 'release_year_70s', 'release_year_80s',
       'release_year_90s', 'release_year_00s', 'release_year_10s',
       'release_year_20s', 'us_popularity_estimate', 'acousticness',
       'beat_strength', 'bounciness', 'danceability', 'dyn_range_mean',
       'energy', 'flatness', 'instrumentalness', 'key', 'liveness', 'loudness',
       'mechanism', 'organism', 'speechiness', 'tempo', 'time_signature',
       'valence', 'acoustic_vector_0', 'acoustic_vector_1',
       'acoustic_vector_2', 'acoustic_vector_3', 'acoustic_vector_4',
       'acoustic_vector_5', 'acoustic_vector_6', 'acoustic_vector_7']
  second_half_songs_data = data[data['session_position'] > 0.5 * data['session_length']]
  mean_second_half_songs = second_half_songs_data[cosine_distance_columns].mean().tolist()
  data['similarity_mean_second_half_songs'] = data[cosine_distance_columns].apply(lambda x: cosine_similarity([mean_second_half_songs], [x.tolist()])[0][0], axis=1)
  return data

In [None]:
def logs_cleaning(data):
  ''' Cleans data. 
  args:
    data: dataframe to clean.
  returns:
    data: cleaned dataframe.
  '''
  # remove date for convenience (could encode this as well)
  data.drop(columns=['date'], inplace=True)
  # Create dummies (one hot encoding) for each categorical variable in logs
  categorical_cols = ['context_type', 'hist_user_behavior_reason_start', 'hist_user_behavior_reason_end']
  data = categorical_to_dummies(data, categorical_cols)

  # Convert booleans to ints
  data['premium'] = data['premium']*1
  data['hist_user_behavior_is_shuffle'] = data['hist_user_behavior_is_shuffle']*1
  data['skip_1'] = data['skip_1']*1
  data['skip_2'] = data['skip_2']*1
  data['skip_3'] = data['skip_3']*1

  # Normalize
  feature_name = [
  'us_popularity_estimate',
  'flatness',
  'loudness',
  'tempo',
  'acoustic_vector_0',
  'acoustic_vector_1',
  'acoustic_vector_2',
  'acoustic_vector_3',
  'acoustic_vector_4',
  'acoustic_vector_5',
  'acoustic_vector_6',
  'acoustic_vector_7',
  'key']

  data = normalize(data, feature_name)
  return data

In [None]:
def logs_feature_joining(data):
  ''' Joins a chunk of data from the session logs with the track features. 
  args:
    data: dataframe to join features to.
  returns:
    data: dataframe including logs and track feature columns.
  '''
  data = data.join(track_features, on='track_id_clean', how='left')
  return data

In [None]:
def data_generator(files, chunksize=10000):
  ''' Will infinitely generate chunks of data from all csv files.
  args:
    files: list of path names to CSV files holding session logs.
    chunksize: CSV files will be read in chunks of size chunksize.

  Note:
    Chunk size isn't uniform, will only return full sessions.
    So, one chunk might be of length 997, next might be 1005.
    Reason being that chunks can split a session in two, this is unwanted behavior.
  '''
  assert isinstance(files, list), "files argument should be list of paths"
  while True:
    iterator_generator = (pd.read_csv(f, iterator=True, chunksize=chunksize) for f in files)
    dummy = pd.DataFrame()
    for iterator in iterator_generator:
      print('\n### Opened new file')
      for chunk in iterator:
        # Get position of last row element
        last_position = chunk.iloc[-1]['session_position']
        # Slice last session
        last_session = chunk.iloc[-last_position:]
        # Drop last session from current chunk
        chunk.drop(last_session.index, inplace=True)
        # Append chunk to previous last session (to get a full session)
        dummy = dummy.append(chunk)
        yield dummy
        # Assign this last session to dummy for next session
        dummy = last_session
  print('\n### Processed all Files')

In [None]:
def data_processor(files, filename, chunksize=20000, delete=False):
  ''' Pipeline: will clean, join,... each chunk and save to csv.
  args:
    files (list): list of path names to CSV files holding session logs.
    chunksize (int): CSV files will be read in chunks of size chunksize.
    filename (string): path to save file to.
    delete (bool): delete file at filename first or not.
  '''
  generator = data_generator(files, chunksize)
  header = True
  if delete:
    os.remove(filename)
  for chunk in generator:
    chunk = logs_feature_joining(chunk)
    chunk = logs_cleaning(chunk)
    chunk = day_binning(chunk)
    chunk = cosine_skipped(chunk)
    chunk.to_csv(filename, header=True, mode='a')
    header = False

In [None]:
# For files 1-7:
data_processor([files[0]], filename='/content/drive/MyDrive/CS/AI/Data/large_logs_processed.csv')

KeyboardInterrupt: ignored

In [None]:
# For file 8:
data_processor([files[0]], filename='/content/drive/MyDrive/CS/AI/Data/large_logs_processed.csv')

## Training & Testing 

In [None]:
# What proably needs to be done:
# 1) create two pre-processed files: train and test
# 2) performs spitting and stacking as usual -> maybe even do this as another pre-processing step
# 3) Do imputing and stuff
# 4) train the tensorflow GBT model whatever way tf does it (dataset API probably)

In [None]:
def stack_sessions(df):
    """
    Turn matrix representation into vector by stacking the listen events together (as columns) 
    For example:
    session_id session_position feature1 feature2
    a          1                ~        ~
    a          2                ~        ~
    b          1                ~        ~
    b          2                ~        ~
    b          3                ~        ~
    
    Turns into:
    session_id 1_feature1 1_feature2 2_feature1 2_feature2 3_feature1 3_feature2
    a          ~          ~          ~          ~          Nan        Nan
    b          ~          ~          ~          ~          ~          ~
    """
    columns = list(df.columns)
    columns.remove('session_id')
    columns.remove('session_position')
    sessions = df.pivot(index='session_id', columns='session_position', values=columns)
    return sessions

In [None]:
def split_df(df):
    """
    Split df in data and labels part. First half of session is stacked and joined to each song in the second half of the session.
    Listening information is removed from second half, as it will not be available for prediction.
    """
    drop_cols = ['track_id_clean', 'Unnamed: 0']
    df.drop(columns=drop_cols, inplace=True)

    first = df.loc[df['session_position']*2 <= df['session_length']].reset_index().drop(columns=['session_length'])
    second = df.loc[df['session_position']*2 > df['session_length']].reset_index()
    truth = second['skip_2']

    # After resetting index, need to remove index from chunk
    first.drop(columns=['index'], inplace=True)
    second.drop(columns=['index'], inplace=True)

    drop_cols = list(second.columns)
    drop_cols.remove('session_id')
    drop_cols.remove('session_position')
    # Need to specify which columns you have access to in the second half:
    # print(second.columns)
    columns_second = ['us_popularity_estimate',
       'acousticness', 'beat_strength', 'bounciness', 'danceability',
       'dyn_range_mean', 'energy', 'flatness', 'instrumentalness', 'key',
       'liveness', 'loudness', 'mechanism', 'organism', 'speechiness', 'tempo',
       'time_signature', 'valence', 'acoustic_vector_0', 'acoustic_vector_1',
       'acoustic_vector_2', 'acoustic_vector_3', 'acoustic_vector_4',
       'acoustic_vector_5', 'acoustic_vector_6', 'acoustic_vector_7',
       'mode_major', 'mode_minor', 'duration_60', 'duration_90',
       'duration_120', 'duration_150', 'duration_180', 'duration_210',
       'duration_240', 'duration_270', 'duration_300', 'duration_long',
       'duration_very_long', 'release_year_60s', 'release_year_70s',
       'release_year_80s', 'release_year_90s', 'release_year_00s',
       'release_year_10s', 'release_year_20s', 'similarity_mean_second_half_songs']
    for elem in columns_second:
      if elem in drop_cols:
        drop_cols.remove(elem)
    second.drop(columns=drop_cols, inplace=True)
    first_stacked = stack_sessions(first)

    data = second.join(first_stacked, how='left', on='session_id')
    data.drop(columns=['session_id'], inplace=True)
    return data, truth

In [None]:
def dataset_generator(chunksize=10000):
  file = '/content/drive/MyDrive/CS/AI/Data/large_logs_processed.csv'
  generator = data_generator([file], chunksize=10000)
  for chunk in generator:
    chunk, truth = split_df(chunk)
    # Need to impute too!
    # NEED TO CREATE THE WEIRD FORMAT HERE
    features = chunk.to_dict(orient='list')
    labels = truth
    # print(type(truth))
    yield features, labels

In [None]:
# Testing
dataset = tf.data.Dataset.from_generator(dataset_generator, (tf.float32, tf.float32))
list(dataset.take(1).as_numpy_iterator())


### Opened new file




In [None]:
# Problem is in this function, don't understand how input_fn works exactly
def make_input_fn(n_epochs=None, shuffle=False):
  def input_fn():
    # INPUT_FN should return: 
    # A tf.data.Dataset object: Outputs of Dataset object must be a tuple (features, labels) with same constraints as below.
    # A tuple (features, labels): Where features is a tf.Tensor or a dictionary of string feature name to Tensor and labels is a Tensor or a dictionary of string label name to Tensor. Both features and labels are consumed by model_fn. They should satisfy the expectation of model_fn from inputs. 
    dataset = tf.data.Dataset.from_generator(dataset_generator, (tf.float64, tf.int32), (tf.TensorShape([878,]), tf.TensorShape([])))
    # For training, cycle thru dataset as many times as need (n_epochs=None).
    dataset = dataset.repeat(n_epochs)
    return dataset
  return input_fn

# Training and evaluation input functions.
train_input_fn = make_input_fn()

In [None]:
# Need to determine all feature columns that will be used:
# Loop through all column names, and create fc.numeric_column objects -> append to list.
# We ony have numeric columns.
def get_columns(chunksize=1000):
  file = '/content/drive/MyDrive/CS/AI/Data/large_logs_processed.csv'
  generator = data_generator([file], chunksize=1000)
  for chunk in generator:
    chunk, truth = split_df(chunk)
    for index, row in chunk.iterrows():
      return row.index.tolist()
columns = get_columns()

fc = tf.feature_column
feature_columns = []

for feature_name in columns:
  feature_columns.append(fc.numeric_column(str(feature_name), dtype=tf.float64))

# Futher explanation about this function:
# Tensorflow BoostedTreesClassifier is very picky in what data it will use.
# 1) You need to specify for each column in the input what datatype it is. (see the feature_columns parameter)
# 2) When you use generators to create a tf dataset, each column needs to be the same datatype.


### Opened new file


NameError: ignored

In [None]:
params = {
  'n_trees': 100,
  'max_depth': 5,
  'n_batches_per_layer': 1,
  'center_bias': True
}
classifier = tf.estimator.BoostedTreesClassifier(feature_columns, **params)
classifier.train(train_input_fn)

INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_model_dir': '/tmp/tmpxcbj4qey', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': allow_soft_placement: true
graph_options {
  rewrite_options {
    meta_optimizer_iterations: ONE
  }
}
, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_device_fn': None, '_protocol': None, '_eval_distribute': None, '_experimental_distribute': None, '_experimental_max_worker_delay_secs': None, '_session_creation_timeout_secs': 7200, '_service': None, '_cluster_spec': ClusterSpec({}), '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}
INFO:tensorflow:Calling model_fn.


AttributeError: ignored