# Building and Deploying a Scikit-Learn Text Classifier with ODSP

This notebook will showcase an example of how to build and deploy a Scikit-Learn text classification model trained on the `20NewsGroups` dataset

In this notebook, we will show how to download the data, train and register the model using `MLFlow`, and use that model as it is automatically deployed to the platform.

In [None]:
# Import all required packages

from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.datasets import fetch_20newsgroups
from sklearn.metrics import accuracy_score
from sklearn.pipeline import Pipeline
import requests
import mlflow

In [None]:
# user and default API key, if the default environment file is used (NOT RECOMMENDED FOR ANYTHING OTHER THAN TESTING PURPOSES)

user = 'odsp'
key = 'odsp-odsp'

In [None]:
# Prepare the data
data = fetch_20newsgroups()
x_data = data['data']
y_data = [
    data['target_names'][i] for i in data['target']
]

x_train, x_test, y_train, y_test = train_test_split(x_data, y_data)

In [None]:
mlflow.set_experiment('20NewsGroupsClassifierTutorial')
mlflow.sklearn.autolog()

with mlflow.start_run() as run:
    model = Pipeline(
        [
            ('tfidf', TfidfVectorizer(stop_words = 'english', min_df = 5, max_df = 0.8)),
            ('clf', RandomForestClassifier())
        ]
    )
    model.fit(x_train, y_train)
    test_preds = model.predict(x_test)
    mlflow.log_metric('test_accuracy', accuracy_score(y_test, test_preds))
    mlflow.sklearn.log_model(
        sk_model = model,
        artifact_path = 'model',
        input_example = x_train,
        registered_model_name = '20NewsGroupsClassifier'
    )

In [None]:
import requests

In [None]:
with requests.Session() as sess:
    resp = sess.post('http://model-server:4488/models/predict/20NewsGroupsClassifier/sklearn/1', json = {'data' : x_test}, auth = (user, key))

In [None]:
resp.json()['prediction'][:100]