#### Arize avec ML stack

##### 1. Install Arize

### I. ARIZE

In [1]:
!pip install arize




[notice] A new release of pip is available: 23.3.1 -> 23.3.2
[notice] To update, run: python.exe -m pip install --upgrade pip


##### 2. Import and logging 

In [2]:
from arize.pandas.logger import Client, Schema
from arize.utils.types import ModelTypes, Environments, Schema, Metrics

API_KEY = '797c7dd9b8fccc2387d'
SPACE_KEY = '326c8cd'
arize_client = Client(space_key=SPACE_KEY, api_key=API_KEY)

##### 3. Download dataset 

In [3]:
from sklearn.datasets import load_breast_cancer
breast_cancer_dataset = load_breast_cancer()

##### 4. Extract features  

In [4]:
breast_cancer_features = breast_cancer_dataset['data'] # feature data
breast_cancer_feature_names = breast_cancer_dataset['feature_names'] # feature names
breast_cancer_targets = breast_cancer_dataset['target'] # actual data
breast_cancer_target_names = breast_cancer_dataset['target_names'] # actual labels

##### 3. Corresponding features Download dataset Arize

In [5]:
target_name_transcription = [] # this will become our list of actuals

for i in breast_cancer_targets:
    target_name_transcription.append(breast_cancer_target_names[i])

##### 4. Create pandas dataframe dataset

In [6]:
import pandas as pd

df = pd.DataFrame(breast_cancer_features, columns=breast_cancer_feature_names)
df['actual_label'] = target_name_transcription
df['prediction_label'] = target_name_transcription

# this is optional, but makes this example more interesting in the platform
df['prediction_label'] = df['prediction_label'].iloc[::-1].reset_index(drop=True) 

#### 5. Log Data to Arize

In [7]:
schema = Schema(
    actual_label_column_name="actual_label",
    prediction_label_column_name="prediction_label",
    
    feature_column_names=[
       'mean radius', 'mean texture', 'mean perimeter', 'mean area',
       'mean smoothness', 'mean compactness', 'mean concavity',
       'mean concave points', 'mean symmetry', 'mean fractal dimension',
       'radius error', 'texture error', 'perimeter error', 'area error',
       'smoothness error', 'compactness error', 'concavity error',
       'concave points error', 'symmetry error',
       'fractal dimension error', 'worst radius', 'worst texture',
       'worst perimeter', 'worst area', 'worst smoothness',
       'worst compactness', 'worst concavity', 'worst concave points',
       'worst symmetry', 'worst fractal dimension'
       ]
)

In [9]:
# Log the model data
response = arize_client.log(
    dataframe=df,
    schema=schema,
    model_id='breast_cancer_dataset', 
    model_version='v1',
    model_type=ModelTypes.BINARY_CLASSIFICATION,
    metrics_validation=[Metrics.CLASSIFICATION], 
    environment=Environments.PRODUCTION
) 

[38;21m  arize.utils.logging | INFO | Success! Check out your data at https://app.arize.com/organizations/QWNjb3VudE9yZ2FuaXphdGlvbjo1NDcx/spaces/U3BhY2U6NTc3Ng==/models/modelName/breast_cancer_dataset?selectedTab=dataIngestion[0m


### II. MLFLOW

##### A simple example of using params for model inference:

In [10]:
!pip install mlflow




[notice] A new release of pip is available: 23.3.1 -> 23.3.2
[notice] To update, run: python.exe -m pip install --upgrade pip





In [11]:
import mlflow
from mlflow.models import infer_signature


class MyModel(mlflow.pyfunc.PythonModel):
    def predict(self, ctx, model_input, params):
        return list(params.values())


params = {"str_param": "string", "int_array": [1, 2, 3]}
# params' default values are saved with ModelSignature
signature = infer_signature(["input"], params=params)

with mlflow.start_run():
    model_info = mlflow.pyfunc.log_model(
        python_model=MyModel(), artifact_path="my_model", signature=signature
    )

loaded_model = mlflow.pyfunc.load_model(model_info.model_uri)

# Not passing params -- predict with default values
loaded_predict = loaded_model.predict(["input"])
assert loaded_predict == ["string", [1, 2, 3]]

# Passing some params -- add default values
loaded_predict = loaded_model.predict(["input"], params={"str_param": "new_string"})
assert loaded_predict == ["new_string", [1, 2, 3]]

# Passing all params -- override
loaded_predict = loaded_model.predict(
    ["input"], params={"str_param": "new_string", "int_array": [4, 5, 6]}
)
assert loaded_predict == ["new_string", [4, 5, 6]]

The git executable must be specified in one of the following ways:
    - be included in your $PATH
    - be set via $GIT_PYTHON_GIT_EXECUTABLE
    - explicitly set via git.refresh()

All git commands will error until this is rectified.

$GIT_PYTHON_REFRESH environment variable. Use one of the following values:
    - error|e|raise|r|2: for a raised exception

Example:
    export GIT_PYTHON_REFRESH=quiet

