In [1]:
import kfp
import kfp.components as comp
import requests
import kfp.dsl as dsl

In [2]:
!pip show kfp


Name: kfp
Version: 1.8.18
Summary: KubeFlow Pipelines SDK
Home-page: https://github.com/kubeflow/pipelines
Author: The Kubeflow Authors


In [3]:
def Prepare_data():
    import pandas as pd
    print('---Inside Prepre_data components--- ')
    #Load Dataset
    df = pd.read_csv("D:\Battery_Model\Battery_New.csv")
    df = df.dropna()
    df.to_csv(f'data/final_df.csv',index=False)
    print('\n ---data csv is saved to PV location /data/final_df.csv---')


Required-by: 


In [4]:
def train_test_split():
    import pandas as pd
    import numpy as np
    from sklearn.model_selection import train_test_split
    print('--- Inside train_test_split component---')
    final_data = pd.read_csv(f'data/final_df.csv')
    target_column = "Final_Time"
    X = final_data_loc[:, final_data.columns != target_column]
    y = final_data_loc[:, final_data.columns == target_column]
    X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.2,startify = y,random_state=20)
    
    np.save(f'data/X_train.npy',X_train)
    np.save(f'data/X_test.npy',X_test)
    np.save(f'data/y_train.npy',y_train)
    np.save(f'data/y_test.npy',y_test)
    
    print('\n --- X_train ---')
    print('\n')
    print(X_train)
    
    print('\n --- X_test ---')
    print('\n')
    print(X_test)
    
    print('\n --- y_train ---')
    print('\n')
    print(y_train)
    
    
    print('\n --- y_test---')
    print('\n')
    print(y_test)

In [5]:
def training_basic_Classifier():
    import pandas as pd
    import numpy as np
    from sklearn.linear_model import LogisticRegression
    print("--- Inside training_basic_classifier component ---")
    
    X_train = np.load(f'data/X_train.npy', allow_pickle =True)
    y_train = np.load(f'data/y_train.npy', allow_pickle =True)
    
    
    classifier= LogisticRegression(max_iter=500)
    classifier.fit(X_train,y_train)
    
    import pickle
    with open(f'data/model.pkl', 'wb') as f:
        pickle.dump(classifier,f)
        print("\n logistic regression classifier is trained on Battery_Model data and saved to PV location/data/model.pkl ---")
    

In [6]:
def predict_on_test_data():
    import pandas as pd
    import numpy as np
    import pickle
    
    print('--- Inside predict_on_test_data component ---')
    
    with open('fdata/model.pkl', 'rb') as f:
        logistic_reg_model = pickle_load(f)
        X_test = np.load(f'data/X_test.npy', allow_pickle =True)
        y_pred = logistic_reg_model.predict(X_test)
        np.save(f'data/y_pred.npy',y_pred)
        print('\n --- predicted classes ---')
        print('\n')
        print(y_pred)

In [7]:
def predict_prob_on_test_data():
    import pandas as pd
    import numpy as np
    import pickle
    
    print('--- Inside predict_prob_on_test_data component ---')
    
    with open('fdata/model.pkl', 'rb') as f:
        logistic_reg_model = pickle_load(f)
        X_test = np.load(f'data/X_test.npy', allow_pickle =True)
        y_pred_prob = logistic_reg_model.predict_prob(X_test)
        np.save(f'data/y_pred_prob.npy',y_pred_prob)
        print('\n --- predicted Probabilities ---')
        print('\n')
        print(y_pred_prob)
    

In [8]:
def get_metrics():
    import pandas as pd
    import numpy as np
    import pickle
    from sklearn.metrics import accuracy_score,precision_score,recall_score,log_loss
    from sklearn import metrics
    print('--- Inside get_metrics component ---')
    
    y_test = np.load(f'data/y_test.npy',allow_pickle =True)
    y_pred = np.load(f'data/y_pred_prob.npy',allow_pickle =True)
    
    acc  = accuracy_score(y_test,y_pred)
    prec = precision_score(y_test,y_pred,average = 'micro')
    recall   = recall_score(y_test,y_pred,average = 'micro')
    entropy  = log_loss(y_test,y_pred_prob)

    y_test       = np.load(f'data/y_test.npy',allow_pickle =True)

    y_pred        = np.load(f'data/y_pred.npy', allow_pickle =True)

    print(metrics.classification_report(y_test,y_pred))

    print("\n Model Metrics :", {'accuracy': round(acc,2),'precision': round(prec,2),'recall': round(recall,2),'entropy': round(entropy,2)})



In [9]:
!python --version

Python 3.9.13


In [10]:
import pandas as pd
pd.__version__

'1.4.4'

In [11]:
import numpy as np
print(np.__version__)

1.21.5


In [12]:
import sklearn
print(sklearn.__version__)

1.0.2


In [13]:
create_step_Prepare_data = kfp.components.create_component_from_func(
    func = Prepare_data,
    base_image ='python:3.9.13',
    packages_to_install=['pandas==1.4.4','numpy==1.21.5']
)

In [14]:
create_step_train_test_split = kfp.components.create_component_from_func(
    func = train_test_split,
    base_image ='python:3.9.13',
    packages_to_install=['pandas==1.4.4','numpy==1.21.5','scikit-learn==1.0.2']
)

In [15]:
create_step_training_basic_Classifier = kfp.components.create_component_from_func(
    func = training_basic_Classifier,
    base_image ='python:3.9.13',
    packages_to_install=['pandas==1.4.4','numpy==1.21.5','scikit-learn==1.0.2']
)

In [16]:
create_step_predict_on_test_data = kfp.components.create_component_from_func(
    func = predict_on_test_data,
    base_image ='python:3.9.13',
    packages_to_install=['pandas==1.4.4','numpy==1.21.5','scikit-learn==1.0.2']
)

In [17]:
create_step_predict_prob_on_test_data = kfp.components.create_component_from_func(
    func =predict_prob_on_test_data,
    base_image ='python:3.9.13',
    packages_to_install=['pandas==1.4.4','numpy==1.21.5','scikit-learn==1.0.2']
)

In [18]:
create_step_get_metrics = kfp.components.create_component_from_func(
    func =get_metrics,
    base_image ='python:3.9.13',
    packages_to_install=['pandas==1.4.4','numpy==1.21.5','scikit-learn==1.0.2']
)

In [19]:
@dsl.pipeline(
    name='battery_model on kubeflow pipeline',
    description = 'A simple pipeline that performs the Battery_Model task'
)
# Define Parameters feed into the pipeline
def Battery_Model_pipeline(data_path: str):
    vop = dsl.VolumeOp(
   name = "t-vol",
resource_name="t-vol",    
   size ="100Gi",
 modes  = dsl.VOLUME_MODE_RWO)
    
    
    
    
    Prepare_data = create_step_Prepare_data().add_pvolumes({data_path: vop.volume})
    train_test_split  = create_step_train_test_split().add_pvolumes({data_path: vop.volume}).after(Prepare_data)
    classifier_training= create_step_training_basic_Classifier().add_pvolumes({data_path: vop.volume}).after(train_test_split)
    log_predicted_class = create_step_predict_on_test_data().add_pvolumes({data_path: vop.volume}).after(classifier_training)
    log_predicted_probabilities= create_step_predict_prob_on_test_data().add_pvolumes({data_path: vop.volume}).after(log_predicted_class)
    log_metrics_task =create_step_get_metrics().add_pvolumes({data_path: vop.volume}).after(log_predicted_probabilities)
    


In [20]:
kfp.compiler.Compiler().compile(
    pipeline_func = Battery_Model_pipeline,
    package_path  = 'Battery_Model_pipeline.yaml')