In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
import tensorflow as tf

In [2]:
# Load the train and test data
df1 = pd.read_csv("train.csv")
df2 = pd.read_csv("test.csv")


In [3]:
# Transform the data. Job -1 is defaulted to 16 cores
def transformdata(data):
    data.loc[data['n_jobs']==-1,'n_jobs']= 16
    data['n1'] = (data['n_classes'] * data['n_clusters_per_class'])/data['n_jobs']
    data['n2'] = (data['max_iter'] * data['n_samples'])/data['n_jobs']
    data['n3'] = data['n1']/data['n_informative']
    
    return data

In [4]:
# Transform both the train and the test data
df1=transformdata(df1)
df2=transformdata(df2)

In [5]:
# Take the target varibale the penalty categorical varible
traintarget = df1['time']
trainpenalty = df1['penalty']

# Remove unnecessary features. 
# Penalty is requuired but it will be attached later using the above variable
dropfeatures  = ['id','l1_ratio','scale','random_state','alpha','flip_y', 'penalty']
df1 = df1.drop(dropfeatures, axis=1)
df1=df1.drop(['time'],axis=1)

# Drop the unnecessary features from the test data
testpenalty=df2['penalty']
df2  = df2.drop(dropfeatures, axis=1)

In [6]:
# Function to normalize the data
def isnum(df, istrain):
    numerics = ['int16', 'int32', 'int64', 'float16', 'float32', 'float64']
    df=df.select_dtypes(include=numerics)
  
    if istrain:
        # Assign the train penalty later to attach to the dataframe
        penalty=trainpenalty
        df = (df - df2.mean())/df2.std(ddof=0)
    else:
        penalty = testpenalty
        df = (df - df.mean())/df.std(ddof=0)
    
    df=df.join(penalty)
    
    return df

In [7]:
# Normalize the data
train= isnum(df1, True)
test = isnum(df2, False)

In [8]:
# Train and validation split
BATCH_SIZE = 10
num_epochs = 1000
X_train, X_test, y_train, y_test = train_test_split(train, traintarget, test_size=0.15) 

InputTrain = tf.estimator.inputs.pandas_input_fn(x=train,y=traintarget,batch_size=BATCH_SIZE,num_epochs=num_epochs,shuffle=True)

input_test = tf.estimator.inputs.pandas_input_fn(x=X_test,y=y_test,batch_size=BATCH_SIZE,num_epochs=num_epochs,shuffle=True)

In [9]:
max_iter = tf.feature_column.numeric_column("max_iter")
n_jobs = tf.feature_column.numeric_column("n_jobs")
n_samples = tf.feature_column.numeric_column("n_samples")
n_features = tf.feature_column.numeric_column("n_features")
n_classes = tf.feature_column.numeric_column("n_classes")
n_clusters_per_class = tf.feature_column.numeric_column("n_clusters_per_class")
n_informative = tf.feature_column.numeric_column("n_informative")
penalty = tf.feature_column.categorical_column_with_vocabulary_list(key="penalty", vocabulary_list=["l2", "l1", "none", "elasticnet"])
n1 = tf.feature_column.numeric_column("n1")
n2 = tf.feature_column.numeric_column("n2")
n3 = tf.feature_column.numeric_column("n3")


In [10]:
Features = [max_iter,n_jobs,n_samples,n_features,n_classes,n_informative,n_clusters_per_class,n1,n2,n3,
    tf.feature_column.indicator_column(penalty),]

w_cols = [max_iter,n_jobs, n_samples, n_features, n_classes,n_informative,n_clusters_per_class,n1,n2,n3]

In [11]:
m = tf.estimator.DNNLinearCombinedRegressor(model_dir = "model",
    linear_feature_columns = w_cols,
    dnn_feature_columns = Features,
    dnn_hidden_units = [1000, 500, 240, 150, 75, 25, 14,2],
    dnn_activation_fn = tf.nn.leaky_relu
    )


INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_model_dir': 'model', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': None, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_device_fn': None, '_service': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x118485d30>, '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}


In [12]:
m.train(input_fn = InputTrain)

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Saving checkpoints for 0 into model/model.ckpt.
INFO:tensorflow:loss = 28.028662, step = 1
INFO:tensorflow:global_step/sec: 88.5819
INFO:tensorflow:loss = 72.34206, step = 101 (1.130 sec)
INFO:tensorflow:global_step/sec: 138.258
INFO:tensorflow:loss = 14.471495, step = 201 (0.724 sec)
INFO:tensorflow:global_step/sec: 138.65
INFO:tensorflow:loss = 6.078404, step = 301 (0.721 sec)
INFO:tensorflow:global_step/sec: 157.764
INFO:tensorflow:loss = 8.711626, step = 401 (0.634 sec)
INFO:tensorflow:global_step/sec: 158.095
INFO:tensorflow:loss = 18.874521, step = 501 (0.632 sec)
INFO:tensorflow:global_step/sec: 151.237
INFO:tensorflow:loss = 3.9481387, step = 601 (0.661 sec)
INFO:tensorflow:global_step/sec: 142.716
INFO:tensorflow:loss = 15.08515

INFO:tensorflow:global_step/sec: 156.886
INFO:tensorflow:loss = 0.41743606, step = 8101 (0.637 sec)
INFO:tensorflow:global_step/sec: 123.43
INFO:tensorflow:loss = 0.66433525, step = 8201 (0.811 sec)
INFO:tensorflow:global_step/sec: 153.697
INFO:tensorflow:loss = 2.6503782, step = 8301 (0.650 sec)
INFO:tensorflow:global_step/sec: 113.157
INFO:tensorflow:loss = 1.5939479, step = 8401 (0.883 sec)
INFO:tensorflow:global_step/sec: 141.714
INFO:tensorflow:loss = 1.0179234, step = 8501 (0.706 sec)
INFO:tensorflow:global_step/sec: 147.001
INFO:tensorflow:loss = 3.033056, step = 8601 (0.681 sec)
INFO:tensorflow:global_step/sec: 142.157
INFO:tensorflow:loss = 0.14874846, step = 8701 (0.703 sec)
INFO:tensorflow:global_step/sec: 144.868
INFO:tensorflow:loss = 0.3094266, step = 8801 (0.691 sec)
INFO:tensorflow:global_step/sec: 95.2224
INFO:tensorflow:loss = 0.15957204, step = 8901 (1.051 sec)
INFO:tensorflow:global_step/sec: 132.542
INFO:tensorflow:loss = 2.5056565, step = 9001 (0.752 sec)
INFO:ten

INFO:tensorflow:global_step/sec: 92.135
INFO:tensorflow:loss = 0.23617661, step = 16301 (1.081 sec)
INFO:tensorflow:global_step/sec: 100.413
INFO:tensorflow:loss = 0.14027195, step = 16401 (0.995 sec)
INFO:tensorflow:global_step/sec: 136.762
INFO:tensorflow:loss = 0.1499193, step = 16501 (0.732 sec)
INFO:tensorflow:global_step/sec: 158.854
INFO:tensorflow:loss = 0.26352218, step = 16601 (0.629 sec)
INFO:tensorflow:global_step/sec: 157.089
INFO:tensorflow:loss = 0.26112786, step = 16701 (0.638 sec)
INFO:tensorflow:global_step/sec: 154.42
INFO:tensorflow:loss = 0.33371353, step = 16801 (0.647 sec)
INFO:tensorflow:global_step/sec: 148.549
INFO:tensorflow:loss = 0.15768318, step = 16901 (0.673 sec)
INFO:tensorflow:global_step/sec: 142.42
INFO:tensorflow:loss = 0.29700455, step = 17001 (0.702 sec)
INFO:tensorflow:global_step/sec: 136.713
INFO:tensorflow:loss = 0.10016758, step = 17101 (0.732 sec)
INFO:tensorflow:global_step/sec: 146.196
INFO:tensorflow:loss = 0.41629055, step = 17201 (0.684

INFO:tensorflow:loss = 0.16157492, step = 24401 (0.863 sec)
INFO:tensorflow:global_step/sec: 102.526
INFO:tensorflow:loss = 0.04253025, step = 24501 (0.977 sec)
INFO:tensorflow:global_step/sec: 113.406
INFO:tensorflow:loss = 0.071246, step = 24601 (0.882 sec)
INFO:tensorflow:global_step/sec: 103.97
INFO:tensorflow:loss = 0.17177424, step = 24701 (0.960 sec)
INFO:tensorflow:global_step/sec: 135.874
INFO:tensorflow:loss = 0.029075125, step = 24801 (0.737 sec)
INFO:tensorflow:global_step/sec: 52.5019
INFO:tensorflow:loss = 0.37043524, step = 24901 (1.911 sec)
INFO:tensorflow:global_step/sec: 56.2611
INFO:tensorflow:loss = 0.03612635, step = 25001 (1.774 sec)
INFO:tensorflow:global_step/sec: 57.2041
INFO:tensorflow:loss = 0.117009535, step = 25101 (1.748 sec)
INFO:tensorflow:global_step/sec: 73.8943
INFO:tensorflow:loss = 0.196951, step = 25201 (1.349 sec)
INFO:tensorflow:global_step/sec: 151.335
INFO:tensorflow:loss = 0.17517486, step = 25301 (0.661 sec)
INFO:tensorflow:global_step/sec: 1

INFO:tensorflow:global_step/sec: 158.403
INFO:tensorflow:loss = 0.073279664, step = 32601 (0.631 sec)
INFO:tensorflow:global_step/sec: 157.951
INFO:tensorflow:loss = 0.0828764, step = 32701 (0.633 sec)
INFO:tensorflow:global_step/sec: 157.754
INFO:tensorflow:loss = 0.07074684, step = 32801 (0.634 sec)
INFO:tensorflow:global_step/sec: 157.249
INFO:tensorflow:loss = 0.08372709, step = 32901 (0.637 sec)
INFO:tensorflow:global_step/sec: 156.958
INFO:tensorflow:loss = 0.05478832, step = 33001 (0.637 sec)
INFO:tensorflow:global_step/sec: 139.084
INFO:tensorflow:loss = 0.08279221, step = 33101 (0.718 sec)
INFO:tensorflow:global_step/sec: 149.931
INFO:tensorflow:loss = 0.070743, step = 33201 (0.667 sec)
INFO:tensorflow:global_step/sec: 152.066
INFO:tensorflow:loss = 0.033134867, step = 33301 (0.658 sec)
INFO:tensorflow:global_step/sec: 155.46
INFO:tensorflow:loss = 0.07562356, step = 33401 (0.643 sec)
INFO:tensorflow:global_step/sec: 159.116
INFO:tensorflow:loss = 0.041255027, step = 33501 (0.

<tensorflow.python.estimator.canned.dnn_linear_combined.DNNLinearCombinedRegressor at 0x1184a5cc0>

In [13]:
prediction = tf.estimator.inputs.pandas_input_fn(x=test,batch_size=1, num_epochs=1, shuffle=False)

predictions = m.predict(input_fn=prediction)
result = []
for i in predictions:
    result.append(i["predictions"][0])
print(result)


INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from model/model.ckpt-40000
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
[1.1257185, 10.962858, 0.3775821, 1.21106, 2.1545312, 7.127759, 2.9744148, 0.47258186, 11.6227455, 0.39495844, 4.702225, 11.607059, 0.8155822, 32.735775, 0.46760178, 0.7087463, 0.5744325, 9.205743, 3.165218, 1.1894393, 0.41674826, 0.30453306, 0.34110546, 0.61195034, 1.1012733, 1.7155226, 1.1130819, 3.1303537, 2.028663, 2.6537652, 18.946907, 3.464551, 0.39134797, 6.300104, 6.9197826, 0.69307333, 4.244805, 1.9700763, 0.7321279, 1.6103672, 4.7405686, 10.418643, 1.4650317, 5.6371484, 0.77304924, 2.751853, 4.6700835, 0.7642481, 8.048653, 0.7581669, 0.10865374, 20.236101, 0.15654105, 7.6410847, 26.301165, 0.19501176, 0.9148604, 0.926707, 0.8504816, 0.8114489, 1.1388891, 6.624264, 0.4449992, 9.659738, 2.725188, 11.467154, 1.108264, 0.55403

In [14]:
#Save the results to the file 
prediction = pd.DataFrame(result, columns=['time']).to_csv('submission.csv', index=True, index_label='id')
