In [None]:
#cell type: imports
import pandas as pd
import numpy as np

from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split

In [None]:
#cell type: parameters
C = 1.0

In [None]:
#pipeline step: load_data
url = 'https://gist.githubusercontent.com/netj/8836201/raw/6f9306ad21398ea43cba4f7d537619d0e07d5ae3/iris.csv'
df = pd.read_csv(url)
df.head()

In [None]:
#pipeline step: feature_eng
#depends on: load_data
def feature_engineering(df):
    df = df.rename(columns={
        'sepal.length': 'sepal_length',
        'sepal.width': 'sepal_width',
        'petal.length': 'petal_length',
        'petal.width': 'petal_width'
    })
    
    #add 2 more columns
    df['sepal'] = df.sepal_length + df.sepal_width
    df['petal'] = df.petal_length + df.petal_width
    
    #modify target
    cond_list = [df.variety == 'Setosa']
    cond_choice = [1]
    df['new_variety'] = np.select(cond_list, cond_choice, default=0)
    
    return df

In [None]:
df = feature_engineering(df)

In [None]:
#pipeline step: train_test_split
#depends on: feature_eng
X = df.loc[:, ['sepal_length', 'sepal_width', 'petal_length', 'petal_width', 'sepal', 'petal']]
y = df.loc[:, ['new_variety']]
X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y, 
                                                    test_size = 40)

In [None]:
#pipeline step: log_reg
#depends on: train_test_split
log_reg = LogisticRegression(C=float(C)).fit(X_train, y_train)

In [None]:
acc = log_reg.score(X_test, y_test)

In [None]:
'''create a pipeline step, that print the model, 
to make sure that resulted image will have the trained model stored in-memory'''

In [None]:
#pipeline step: random_step
#depends on: log_reg
print(log_reg)

In [None]:
print(acc)

In [None]:
'''pipeline-metrics must be the last cell, 
only print the metric [should not be any comment within the cell, for parsing purpose]'''

In [None]:
print(acc)