![giskard_logo.png](https://raw.githubusercontent.com/Giskard-AI/giskard/main/readme/Logo_full_darkgreen.png)


# About Giskard

Open-Source CI/CD platform for ML teams. Deliver ML products, better & faster. 

*   Collaborate faster with feedback from business stakeholders.
*   Deploy automated tests to eliminate regressions, errors & biases.

🏡 [Website](https://giskard.ai/)

📗 [Documentation](https://docs.giskard.ai/)

## Installing `giskard`

In [None]:
!pip install giskard

## Connect the external worker in daemon mode

In [None]:
!giskard worker start -d

# Start by creating a ML model 🚀🚀🚀

In [None]:
from sklearn import datasets 
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
import pandas as pd
import numpy as np

In [None]:
iris = datasets.load_iris()
df_iris = pd.DataFrame(iris.data,columns=iris.feature_names)

d = {0: 'setosa', 1: 'versicolor', 2: 'virginica'}
df_iris['target'] = pd.Series(iris.target).map(d)
#Create a new numerical variable: Sepal area
df_iris["sepal area"] = df_iris["sepal length (cm)"] * df_iris["sepal width (cm)"]

#Turn sepal width (cm) into a categorical variable
bins = [-np.inf, 2.5, 3.5, np.inf]
labels = ["small","medium","big"]
df_iris["cat_sepal_width"] = pd.cut(df_iris["sepal width (cm)"], bins=bins, labels=labels).astype(str)

In [None]:
column_types={"sepal length (cm)": "numeric",
                "sepal width (cm)": "numeric",
                "petal length (cm)": "numeric",
                "petal width (cm)": "numeric",
                "sepal area": "numeric",
                "cat_sepal_width": "category"}

feature_names = ['petal length (cm)', 'petal width (cm)', 'sepal area', 'cat_sepal_width']

## Preprocessing steps with multiple feature engineering steps:

*   Add / remove variables
*   Scaling of numeric variable
*   Onehot encoding of categorical variable

In [None]:
X = df_iris.drop(columns="target").copy()
X = X[feature_names]
y = df_iris['target']

#Scale all the numerical variables
std_slc = StandardScaler()
num_cols = [colname for colname, coltype in column_types.items() 
            if coltype=='numeric' and colname in feature_names]
X[num_cols] = std_slc.fit_transform(X[num_cols])


#Use OneHotEncoder with cat_sepal_width
one_hot_encoder = OneHotEncoder(handle_unknown='ignore')
arr =  one_hot_encoder.fit_transform(X[['cat_sepal_width']]).toarray()
X = X.join(pd.DataFrame(arr))
X = X.drop(columns='cat_sepal_width')
X = X.rename(columns={0: '0', 1: '1', 2: '2'}) 

## Train a model

In [None]:
knn = KNeighborsClassifier(n_neighbors=12)
knn.fit(X, y)
knn.score(X, y)

In [None]:
def my_preproc(df):
    X=df.copy()
    #Scale all the numerical variables
    num_cols = [colname for colname, coltype in column_types.items() 
                if coltype=='numeric' and colname in feature_names]
    X[num_cols] = std_slc.transform(X[num_cols])


    #Use OneHotEncoder with cat_sepal_width
    arr =  one_hot_encoder.transform(X[['cat_sepal_width']]).toarray()
    X = X.join(pd.DataFrame(arr))
    X = X.drop(columns='cat_sepal_width')
    X = X.rename(columns={0: '0', 1: '1', 2: '2'}) 
    return X

In [None]:
from giskard import SKLearnModel, Dataset
my_model = SKLearnModel(clf=knn,
                        name="K-Nearest Neighbors Classifier (sklearn)",
                        model_type="classification",
                        feature_names=feature_names,
                        data_preprocessing_function=my_preproc)

# Wrap your dataset with Dataset from Giskard
my_test_dataset = Dataset(df_iris, name="testing dataset", target="target", column_types=column_types)

# Upload the model in Giskard without pipeline 🚀🚀🚀

### Initiate a project


In [None]:
from giskard import GiskardClient

url = "http://localhost:19000"
token = "API Access Token"
client = GiskardClient(url, token)

iris = client.create_project("iris", "Classification of the Iris dataset", "Project to classify flowers")

### Upload your model and a dataset (see [documentation](https://docs.giskard.ai/start/guides/upload-your-model))

In [None]:
# save model and dataset to Giskard server
mid = my_model.upload(client, "iris", validate_ds=my_test_dataset)
did = my_test_dataset.upload(client, "iris")