In [8]:
from sklearn.datasets import make_classification

X, y = make_classification(n_samples=1000, n_features=110, random_state=42)
X = (X > 0).astype(int)
X.shape, y.shape

((1000, 110), (1000,))

In [9]:
import pandas as pd
feature_cols = [f"feat_{d}" for d in range(0, 110)]
label_col = "label"
column_types = {f:"numeric" for f in feature_cols}

dataset = pd.DataFrame(X, columns=feature_cols)
dataset[label_col] = y
dataset.describe()


Unnamed: 0,feat_0,feat_1,feat_2,feat_3,feat_4,feat_5,feat_6,feat_7,feat_8,feat_9,...,feat_101,feat_102,feat_103,feat_104,feat_105,feat_106,feat_107,feat_108,feat_109,label
count,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,...,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0
mean,0.5,0.507,0.519,0.521,0.496,0.523,0.52,0.531,0.46,0.518,...,0.505,0.512,0.496,0.499,0.509,0.48,0.484,0.519,0.541,0.496
std,0.50025,0.500201,0.499889,0.499809,0.500234,0.499721,0.49985,0.499288,0.498647,0.499926,...,0.500225,0.500106,0.500234,0.500249,0.500169,0.49985,0.499994,0.499889,0.498566,0.500234
min,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
50%,0.5,1.0,1.0,1.0,0.0,1.0,1.0,1.0,0.0,1.0,...,1.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,1.0,0.0
75%,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
max,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0


In [10]:
dataset.to_csv("./giskard_test_data.csv", index=False)

In [11]:
#import sys
#sys.path.append("giskardcustommodel")

In [12]:
from giskardcustommodel.model.GiskardCustomModel import get_model_pipeline
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)
X_train_df = pd.DataFrame(X_train, columns=feature_cols)
X_test_df = pd.DataFrame(X_test, columns=feature_cols)
y_train_df = pd.Series(y_train)
y_test_df = pd.Series(y_test)

clf = get_model_pipeline(feature_cols, n_estimators=100, max_depth=100)
clf.fit(X_train_df, y_train_df)

Created with [0, 1] feature_indexes


Pipeline(steps=[('columntransformer',
                 ColumnTransformer(transformers=[('feature_selection',
                                                  'passthrough',
                                                  ['feat_0', 'feat_1', 'feat_2',
                                                   'feat_3', 'feat_4', 'feat_5',
                                                   'feat_6', 'feat_7', 'feat_8',
                                                   'feat_9', 'feat_10',
                                                   'feat_11', 'feat_12',
                                                   'feat_13', 'feat_14',
                                                   'feat_15', 'feat_16',
                                                   'feat_17', 'feat_18',
                                                   'feat_19', 'feat_20',
                                                   'feat_21', 'feat_22',
                                                   'feat_23', 'feat_24',


In [13]:
from sklearn.metrics import f1_score
f1_score(y_test, clf.predict(X_test_df))

0.8155339805825242

In [14]:
import pickle

with open("giskard_model.pickle", 'wb') as f:
    pickle.dump(clf, f)

In [15]:
# !pip install giskard

In [None]:
# Giskard API
from giskard.giskard_client import GiskardClient

url = "http://localhost:9000" #if Giskard is installed locally (for installation, see: https://docs.giskard.ai/start/guides/installation) 
token = "eyJhbGciOiJIUzUxMiJ9.eyJzdWIiOiJhZG1pbiIsInRva2VuX3R5cGUiOiJBUEkiLCJhdXRoIjoiUk9MRV9BRE1JTiIsImV4cCI6MTY2NTQ4NDAzOX0.KBkXaLVGDDNCpsQEdJaCuAofPfFhIXxvVWIVddJ3HcJIw_LErGe8WSn6ETaoBWIO5a-qttSROQMV6s1yf-xqZQ" #you can generate your API token in the Admin tab of the Giskard application (for installation, see: https://docs.giskard.ai/start/guides/installation)
client = GiskardClient(url, token)

try:
    project = client.create_project(name="giskardcustommodel3", project_key="giskardcustommodel3")
except Exception:
    project = client.get_project("giskardcustommodel3")
project

In [None]:
!pip show giskardcustommodel

In [None]:
dataset[label_col] = dataset[label_col].astype(str)
column_types["label"]="category"
project.upload_model_and_df(
    prediction_function=clf.predict_proba,
    model_type='classification',
    df=dataset,
    column_types=column_types,
    target = label_col,
    feature_names=feature_cols,
    classification_labels=['0', '1']
    )