In [2]:
# !pip install fosforml

In [1]:
# pip show fosforml

## Sklearn Models Registration

### session creation

In [2]:
from fosforml.model_manager.snowflakesession import get_session, get_connection_params
session = get_session()
connection_params = get_connection_params()

### modeling

#### Multiclass classification

In [3]:
from sklearn.ensemble._forest import RandomForestClassifier
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
import pandas as pd

m_dataset = load_iris() 
feature_names = m_dataset.feature_names
u_features = [col.replace(" ","_").replace("(","").replace(")","") for col in feature_names]
m_df = pd.DataFrame(m_dataset.data,columns=u_features)
m_df["Target"] = m_dataset.target


x_train,x_test,y_train,y_test = train_test_split(m_df.iloc[:,:-1],m_df['Target'])
model = RandomForestClassifier()
model.fit(x_train,y_train)

y_pred = pd.DataFrame(model.predict(x_test),columns=["Predicted"])

## Custom Function

In [5]:
## user provide dataprocessing function
def pre_process(input_data_frame):
    return input_data_frame

In [9]:
class FDCCustomModel:
    def __init__(self,model,pre_process=None):
        self.pre_process = pre_process
        self.model = model

    def predict(self,input_data):
        if not self.pre_process:
            print("with preprocess")
            return self.model.predict(pre_process(input_data))
        print("without preprocess")
        return self.model.predict(input_data)

In [11]:
custom_model = FDCCustomModel(model=model,pre_process=pre_process)

In [15]:
# custom_model.predict(x_test)

In [17]:
## To be handled by Fosforml
from snowflake.ml.registry import Registry
reg = Registry(session=session, database_name="INSIGHT_DESIGNER_SPCS", schema_name="INSIGHT_SPCS_SCHEMA")
mv = reg.log_model(model,
            model_name="custom_model_sklearn",
            version_name="v10",
            conda_dependencies=["scikit-learn"],
            comment="My Custom ML Model Pipeline",
            sample_input_data=x_train)

In [332]:
from snowflake.ml.model import custom_model
from snowflake.ml.model.custom_model import ModelContext

## user provide dataprocessing function
def pre_process(input_data_frame):
    return input_data_frame

## To be created and handled within fosforml
mc = custom_model.ModelContext(
    models={
        'm1': model
    }
)

## To be created and handled within fosforml
class FDCCustomModel(custom_model.CustomModel):
    def __init__(self, context: ModelContext) -> None:
        super().__init__(context)

    @custom_model.inference_api
    def predict(self, input_data: pd.DataFrame) -> pd.DataFrame:
        input_features = pre_process(input_data)
        model_pred_output = self.context.model_ref('m1').predict(input_features)
        model_prob_output = self.context.model_ref('m1').predict_proba(input_features)
        return pd.concat([
            input_data.reset_index(drop=True),
            pd.Series(model_pred_output,name='output_col'),
            pd.DataFrame(model_prob_output,columns=['a','b','c']).reset_index(drop=True)
        ],axis=1
        )
        
        return input_data

## To be created and handled within fosforml
my_model_pipeline = FDCCustomModel(mc)
# my_model_pipeline.predict(x_test).head()


## To be handled by Fosforml
from snowflake.ml.registry import Registry
reg = Registry(session=session, database_name="INSIGHT_DESIGNER_SPCS", schema_name="INSIGHT_SPCS_SCHEMA")
mv = reg.log_model(my_model_pipeline,
            model_name="custom_model_sklearn",
            version_name="v7",
            conda_dependencies=["scikit-learn"],
            comment="My Custom ML Model Pipeline",
            sample_input_data=x_train)

Unnamed: 0,sepal_length_cm,sepal_width_cm,petal_length_cm,petal_width_cm,output_col,a,b,c
0,6.3,2.3,4.4,1.3,1,0.0,0.98,0.02
1,6.1,2.8,4.7,1.2,1,0.0,0.99,0.01
2,6.5,3.0,5.5,1.8,2,0.0,0.0,1.0
3,7.6,3.0,6.6,2.1,2,0.0,0.0,1.0
4,5.8,2.7,5.1,1.9,2,0.0,0.0,1.0


## Model registration

In [66]:
str(type(my_model_pipeline))

<class '__main__.ExamplePipelineModel'>


In [61]:
from fosforml import register_model
register_model(
  model_obj=my_model_pipeline,
  session=session,
  x_train=x_train,
  y_train=y_train,
  x_test=x_test,
  y_test=y_test,
  y_pred=y_pred,
  dataset_name="RandomMulticlassMOdelForTesting",
  dataset_source="Snowflake",
  name="RandomMultieefefefclassMOdelForTesting16July",
  description="This is a test sklearn model",
  flavour="sklearn",
  model_type="classification",
  conda_dependencies=["scikit-learn==1.3.2"]
)

Exception: Failed to get model performance metrics. 'feature_names'