In [1]:
from prefect import task,flow

In [7]:
@task
def load_data(path:str):
    """
    Data Loading
    """
    import pandas as pd

    dataset=pd.read_csv(path)

    return dataset

@task
def data_split(input_data):
    """
    Split the data -> train and test sets.
    Train set -> model training.
    Test set -> model evaluation
    """
    from sklearn.model_selection import train_test_split

    train,test=train_test_split(input_data,test_size=0.2,random_state=42)

    return train,test

@task
def model_training(train_data):
    """
    Using the train data-> model training
    """
    from sklearn.linear_model import LogisticRegression

    X_train=train_data.drop("Target",axis=1)
    y_train=train_data["Target"]

    model=LogisticRegression(solver="liblinear")

    model.fit(X_train,y_train)

    return model

@task
def model_evaluation(test_data,model):
    """
    Using Cohen Kappa Score andd Matthews Correlation Coefficient to evaluate the model
    """
    from sklearn.metrics import cohen_kappa_score,matthews_corrcoef
    X_test=test_data.drop("Target",axis=1)
    y_test=test_data["Target"]

    y_preds=model.predict(X_test)
    kappa_score=cohen_kappa_score(y_test,y_preds)
    mat_coeff=matthews_corrcoef(y_test,y_preds)

    return {"kappa_score":kappa_score,"mat_score":mat_coeff}

@task
def save_model(model):
    """
    Just saving the model
    """
    import joblib
    joblib.dump(model,"prostate_model.pkl")

@flow
def model_pipeline():
    """
    Pipeline:
    Load data -> Data Split -> Model Training -> Model Fitting -> Saving the model
    """
    df=load_data("prostate.csv")
    train,test=data_split(df)
    model=model_training(train)
    model_evaluation(test_data=test,model=model)
    save_model(model)

if __name__=="__main__":
    model_pipeline()