# Train a Model from Hugging Face

## Install and Import Dependencies

In [None]:
from warnings import filterwarnings

from transformers import AutoTokenizer, AutoModelForSequenceClassification

from configs import TOXIC_DB_PATH, BENIGN_DB_PATH, PROMPT_TEMPLATE
from utils import DatabaseInterface, Pipeline, get_device

filterwarnings("ignore")

## Initialize Database Interfaces

In [None]:
toxic_db = DatabaseInterface(data_path=TOXIC_DB_PATH, n_neighbors=2)
benign_db = DatabaseInterface(data_path=BENIGN_DB_PATH, n_neighbors=2)

## Import Model and Tokenizer

In [None]:
# Hugging Face checkpoint or local path to the model and tokenizer
checkpoint = "/Users/naman/Workspace/models-tokenizers/toxicity-classifiers/roberta-toxigen"

tokenizer = AutoTokenizer.from_pretrained(checkpoint)
model = AutoModelForSequenceClassification.from_pretrained(checkpoint)

## Send Model to Device

In [None]:
device = get_device()
print(f"Using {device} device")

model.to(device)

## Initialize Pipeline

In [None]:
pipeline = Pipeline(
  model=model,
  tokenizer=tokenizer,
  toxic_db=toxic_db,
  benign_db=benign_db,
  prompt_template=PROMPT_TEMPLATE,
  device=device
)

## Inference

In [None]:
texts = ["I hate you!", "I love you!"]

pipeline(texts)