## Import libraries

In [None]:
import torch
import numpy as np
import pandas as pd
from sklearn.metrics import f1_score
from torch.utils.data import Dataset as TorchDataset
from sklearn.model_selection import train_test_split
from transformers.integrations import MLflowCallback, TensorBoardCallback
from transformers import DistilBertTokenizer, DistilBertForSequenceClassification, TrainingArguments, Trainer

from giskard import Dataset, Model, scan, testing, GiskardClient

## Define constants

In [None]:
DATA_URL = 'https://raw.githubusercontent.com/Giskard-AI/examples/main/datasets/twitter_us_airline_sentiment_analysis.csv'

MODEL_NAME = "Souvikcmsa/SentimentAnalysisDistillBERT"

RANDOM_SEED = 0

FEATURE_COLUMN_NAME = "text"
TARGET_COLUMN_NAME = "airline_sentiment"

TARGET_MAPPING = {'negative': 0, 'neutral': 1, 'positive': 2}

## Dataset preparation

### Load data

In [None]:
def load_data():
    # Read data.
    df = pd.read_csv(DATA_URL, usecols=[FEATURE_COLUMN_NAME, TARGET_COLUMN_NAME])

    # Encode target.
    df[TARGET_COLUMN_NAME] = df[TARGET_COLUMN_NAME].map(TARGET_MAPPING)

    return df

In [None]:
data = load_data()

### Train-test split

In [None]:

X_train, X_test, y_train, y_test = train_test_split(data[[FEATURE_COLUMN_NAME]], data[TARGET_COLUMN_NAME], random_state=RANDOM_SEED)

### Wrap dataset with Giskard

In [None]:
wrapped_data = Dataset(df=pd.concat([X_test, y_test], axis=1),
                       name="Tweets sentiment dataset",
                       target=TARGET_COLUMN_NAME,
                       column_types={FEATURE_COLUMN_NAME: "text"})

## Model training

### Define 'torch.Dataset' objects.

In [None]:
class CustomDataset(TorchDataset):
    def __init__(self, encodings, labels=None):
        self.encodings = encodings
        self.labels = labels

    def __getitem__(self, idx):
        item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}

        if self.labels:
            item["labels"] = torch.tensor(self.labels[idx])

        return item

    def __len__(self):
        return len(self.encodings["input_ids"])

In [None]:
# Define tokenizer.
tokenizer = DistilBertTokenizer.from_pretrained(MODEL_NAME)

X_train_tokenized = tokenizer(list(X_train.text), padding=True, truncation=True, max_length=256)
X_test_tokenized = tokenizer(list(X_test.text), padding=True, truncation=True, max_length=256)

train_dataset = CustomDataset(X_train_tokenized, y_train.values.tolist())
val_dataset = CustomDataset(X_test_tokenized, y_test.values.tolist())

### Define model to train

In [None]:
model = DistilBertForSequenceClassification.from_pretrained(MODEL_NAME).train()

# Freeze 'DistillBert' feature extraction module.
for param in model.base_model.parameters():
    param.requires_grad = False

### Define trainer object

In [None]:
def compute_metrics(eval_pred):
    probs, y_true = eval_pred
    y_pred = np.argmax(probs, axis=1)

    f1 = f1_score(y_true, y_pred, average='macro')
    return {"f1": f1}

training_args = TrainingArguments(
    output_dir='output',
    learning_rate=2e-5,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=16,
    num_train_epochs=1,
    optim="adamw_torch",
    weight_decay=0.01,
    save_strategy="no",
    disable_tqdm=True
)

trainer = Trainer(
    model=model,
    # args=training_args,
    train_dataset=train_dataset,
    eval_dataset=val_dataset,
    compute_metrics=compute_metrics
)

In [None]:
trainer.remove_callback(MLflowCallback)
trainer.remove_callback(TensorBoardCallback)

### Train and evaluate model

In [None]:
trainer.train()
trainer.evaluate()

### Wrap model with Giskard

In [None]:
def prediction_function(df) -> np.ndarray:
    input_text = list(df[FEATURE_COLUMN_NAME])
    text_tokenized = tokenizer(input_text, padding=True, truncation=True, max_length=256)

    # Make prediction.
    raw_pred = model.forward(input_ids=torch.tensor(text_tokenized["input_ids"]), attention_mask=torch.tensor(X_test_tokenized["attention_mask"]))
    predictions = torch.nn.functional.softmax(raw_pred["logits"], dim=-1)
    predictions = predictions.cpu().detach().numpy()

    return predictions

wrapped_model = Model(prediction_function,
                      model_type="classification",
                      name="Twitter sentiment classifier",
                      feature_names=[FEATURE_COLUMN_NAME],
                      classification_labels=TARGET_MAPPING.values())

print(f"Wrapped Test F1-Score: {f1_score(y_test, wrapped_model.predict(wrapped_data).raw_prediction, average='macro')}")

## Scan model with Giskard

In [None]:
results = scan(wrapped_model, wrapped_data)

In [None]:
display(results)

## Generate a test suite from the Scan
The objects produced by the scan can be used as fixtures to generate a test suite that integrate domain-specific issues. To create custom tests, refer to the Test your ML Model page.

In [None]:
test_suite = results.generate_test_suite("My first test suite")
test_suite.run()

## Customize your suite by loading objects from the Giskard catalog

The Giskard open source catalog will enable to load:
* Tests such as metamorphic, performance, prediction & data drift, statistical tests, etc
* Slicing functions such as detectors of toxicity, hate, emotion, etc
* Transformation functions such as generators of typos, paraphrase, style tune, etc

For demo purposes, we will load a simple unit test (test_f1) that checks if the test F1 score is above the given threshold. For more examples of tests and functions, refer to the Giskard catalog.

In [None]:
test_suite.add_test(testing.test_f1(model=wrapped_model, dataset=wrapped_data, threshold=0.7)).run()

## Upload your suite to the Giskard server

Upload your suite to the Giskard server to:
* Compare models to decide which model to promote
* Debug your tests to diagnose the issues
* Create more domain-specific tests that are integrating business feedback
* Share your results

In [None]:
# Uploading the test suite will automatically save the model, dataset, tests, slicing & transformation functions inside the Giskard UI server
# Create a Giskard client after having install the Giskard server (see documentation)
token = "API_TOKEN"  # Find it in Settings in the Giskard server

client = GiskardClient(
    url="http://localhost:19000",  # URL of your Giskard instance
    token=token
)

my_project = client.create_project("my_project", "PROJECT_NAME", "DESCRIPTION")

# Upload to the current project ✉️
test_suite.upload(client, "my_project")