### General Start
1. `kubectl`:
    * cmd, powershell

2. `minikube`:
    * cmd, powershell

3. `kind`
    * cmd, powershell

4. kubeflow pipelines:
    * `pip install kfp`

5. Start Docker Desktop

### QuickStart
6. Run in cmd: `kind create cluster --name=<clustername>`

7. Run: `kubectl cluster-info --context <kind-clustername>`

7. Run : `deploy_kubeflow_pipelines.zsh` 

8. Run the command below:D
    ```bash
    kubectl port-forward -n kubeflow svc/ml-pipeline-ui 8080:80
    ```  

9. Allow the Kubeflow Pipelines SDK to talk to the cluster via the following Python code:
    ```python
    import kfp
    client = kfp.Client(host="http://localhost:8080")
    ```


* Official Guides:
    * [Deploying Kubeflow Pipelines as stand alone service](https://www.kubeflow.org/docs/components/pipelines/operator-guides/installation/)

In [1]:
import kfp
client = kfp.Client(host="http://localhost:8080")



In [3]:
from typing import List

from kfp import Client
from kfp import dsl
from kfp.dsl import Dataset
from kfp.dsl import Input
from kfp.dsl import Model
from kfp.dsl import Output

### Defining the Components for the pipeline

In [4]:
@dsl.component(packages_to_install=['pandas'])
def create_dataset(iris_dataset: Output[Dataset]):
    import pandas as pd

    csv_url = "https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data"
    col_names = [
        "Sepal_Length", "Sepal_Width", "Petal_Length", "Petal_Width", "Labels"
    ]
    df = pd.read_csv(csv_url)
    df.columns = col_names

    with open(iris_dataset.path, 'w') as f:
        df.to_csv(f)

  return component_factory.create_component_from_func(


In [5]:
@dsl.component(packages_to_install=['pandas', 'scikit-learn'])
def normalize_dataset(
    input_iris_dataset: Input[Dataset],
    normalized_iris_dataset: Output[Dataset],
    standard_scaler: bool,
    min_max_scaler: bool,
):
    if standard_scaler is min_max_scaler:
        raise ValueError(
            'Exactly one of standard_scaler or min_max_scaler must be True.')

    import pandas as pd
    from sklearn.preprocessing import MinMaxScaler
    from sklearn.preprocessing import StandardScaler

    with open(input_iris_dataset.path) as f:
        df = pd.read_csv(f)
    labels = df.pop('Labels')

    if standard_scaler:
        scaler = StandardScaler()
    if min_max_scaler:
        scaler = MinMaxScaler()

    df = pd.DataFrame(scaler.fit_transform(df))
    df['Labels'] = labels
    with open(normalized_iris_dataset.path, 'w') as f:
        df.to_csv(f)

In [6]:
@dsl.component(packages_to_install=['pandas', 'scikit-learn'])
def train_model(
    normalized_iris_dataset: Input[Dataset],
    model: Output[Model],
    n_neighbors: int,
):
    import pickle

    import pandas as pd
    from sklearn.model_selection import train_test_split
    from sklearn.neighbors import KNeighborsClassifier

    with open(normalized_iris_dataset.path) as f:
        df = pd.read_csv(f)

    y = df.pop('Labels')
    X = df

    X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)

    clf = KNeighborsClassifier(n_neighbors=n_neighbors)
    clf.fit(X_train, y_train)
    with open(model.path, 'wb') as f:
        pickle.dump(clf, f)

### Pipeline workflow

In [7]:
@dsl.pipeline(name='iris-training-pipeline')
def my_pipeline(
    standard_scaler: bool,
    min_max_scaler: bool,
    neighbors: List[int],
):
    create_dataset_task = create_dataset()

    normalize_dataset_task = normalize_dataset(
        input_iris_dataset=create_dataset_task.outputs['iris_dataset'],
        standard_scaler=True,
        min_max_scaler=False)

    with dsl.ParallelFor(neighbors) as n_neighbors:
        train_model(
            normalized_iris_dataset=normalize_dataset_task.outputs['normalized_iris_dataset'],
            n_neighbors=n_neighbors)

### Submet the Pipeline to run

In [9]:
endpoint = 'http://localhost:8080' #as a result of port-forwarding.
# running kubectl cluster-info --context kind-testcluster (this is cluster name)
# endpoint = 'https://127.0.0.1:54463' 
kfp_client = Client(host=endpoint)
run = kfp_client.create_run_from_pipeline_func(
    my_pipeline,
    #mode=kfp.dsl.PipelineExecutionMode.V2_COMPATIBLE,
    arguments={
        'min_max_scaler': True,
        'standard_scaler': False,
        'neighbors': [3, 6, 9]
    },
)
url = f'{endpoint}/#/runs/details/{run.run_id}'
print(url)

http://localhost:8080/#/runs/details/dec36986-cf31-4733-948a-37497c8a2e53
