In [20]:
TRAINER_FILE = "catjobheart.py"
KUBERNETES_FILE = "katib-catboost-experiment.yaml"

In [2]:
import re

from IPython.utils.capture import CapturedIO


def get_resource(captured_io: CapturedIO) -> str:
    """
    Gets a resource name from `kubectl apply -f <configuration.yaml>`.

    :param str captured_io: Output captured by using `%%capture` cell magic
    :return: Name of the Kubernetes resource
    :rtype: str
    :raises Exception: if the resource could not be created
    """
    out = captured_io.stdout
    matches = re.search(r"^(.+)\s+created", out)
    if matches is not None:
        return matches.group(1)
    else:
        raise Exception(f"Cannot get resource as its creation failed: {out}. It may already exist.")

In [18]:
%%writefile $TRAINER_FILE
import argparse
import os
from catboost import CatBoostClassifier as cat

import pandas as pd

#from sklearn.datasets import fetch_openml
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
from sklearn.utils import resample
from sklearn.preprocessing import StandardScaler
#from timeit import default_timer as timestamp

if __name__ == '__main__':
    parser = argparse.ArgumentParser()
    parser.add_argument('--iterations',
                        type = int,
                        default = 500,
                        help = 'Max count of trees.')
    parser.add_argument('--depth',
                        type = int,
                        default = 6,
                        help = 'Depth of a tree.')
    parser.add_argument('--l2_leaf_reg',
                        type = float,
                        default = 3.0,
                        help = 'Coefficient at the L2 regularization term of the cost function.')
    args = parser.parse_args()
    
    df=pd.read_csv('https://raw.githubusercontent.com/Soot3/testing/master/heart.csv')

    def removeOutlier(att, df):

      lowerbound = att.mean() - 3 * att.std()
      upperbound = att.mean() + 3 * att.std()

      df1 = df[(att > lowerbound) & (att < upperbound)]

      df = df1.copy()

      return df
    df = removeOutlier(df.trtbps, df)
    df = removeOutlier(df.chol, df)   

    # Separate Target Classes
    df_1 = df[df.output==1]
    df_2 = df[df.output==0]

    # Upsample minority class
    df_upsample_1 = resample(df_2, 
                                  replace=True,     # sample with replacement
                                  n_samples=163,    # to match majority class
                                  random_state=123) # reproducible results

    # Combine majority class with upsampled minority class
    df_upsampled = pd.concat([df_1, df_upsample_1])
    x = df_upsampled.drop('output', axis = 1)
    y = df_upsampled['output']  

    x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.2, random_state = 111)
    scaler = StandardScaler()

    x_train = scaler.fit_transform(x_train)
    x_test = scaler.fit_transform(x_test)


    params = {
        'iterations': args.iterations,
        'depth': args.depth,
        'l2_leaf_reg': args.l2_leaf_reg
    }
    

    #start = timestamp()
    model = cat()
    model.fit(x_train, y_train)
    #stop = timestamp()

    #print('time=%.3f' % (stop - start))

    predictions = model.predict(x_test)

    print('accuracy=%.3f' % accuracy_score(y_test, predictions))

Overwriting catjobheart.py


In [19]:
%run $TRAINER_FILE

Learning rate set to 0.005796
0:	learn: 0.6885506	total: 695us	remaining: 694ms
1:	learn: 0.6842097	total: 1.39ms	remaining: 694ms
2:	learn: 0.6799674	total: 2.07ms	remaining: 688ms
3:	learn: 0.6752305	total: 2.87ms	remaining: 715ms
4:	learn: 0.6705942	total: 3.63ms	remaining: 723ms
5:	learn: 0.6664000	total: 4.38ms	remaining: 725ms
6:	learn: 0.6624560	total: 5.11ms	remaining: 725ms
7:	learn: 0.6566516	total: 5.81ms	remaining: 721ms
8:	learn: 0.6524107	total: 6.57ms	remaining: 724ms
9:	learn: 0.6481271	total: 7.24ms	remaining: 717ms
10:	learn: 0.6435849	total: 7.9ms	remaining: 711ms
11:	learn: 0.6392303	total: 8.57ms	remaining: 705ms
12:	learn: 0.6349618	total: 9.22ms	remaining: 700ms
13:	learn: 0.6309459	total: 9.97ms	remaining: 702ms
14:	learn: 0.6273948	total: 14ms	remaining: 916ms
15:	learn: 0.6232728	total: 14.7ms	remaining: 902ms
16:	learn: 0.6182015	total: 15.4ms	remaining: 890ms
17:	learn: 0.6149066	total: 16.1ms	remaining: 880ms
18:	learn: 0.6117788	total: 16.9ms	remaining: 87