# Sample model training: diabetes dataset classification

This notebook trains a sample model using the [diabetes toy problem](https://scikit-learn.org/stable/datasets/toy_dataset.html#diabetes-dataset), and saves a model artifact into this folder.

In [2]:
import joblib
import numpy as np
from sklearn.datasets import load_diabetes
from sklearn.model_selection import train_test_split
from sklearn.linear_model import Ridge
from sklearn.metrics import r2_score

## 1. Download data

In [3]:
X, y = load_diabetes(return_X_y=True)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

np.savez("../data/diabetes.npz", X_train=X_train, X_test=X_test, y_train=y_train, y_test=y_test)

## 2. Train model

In [4]:
X_train, y_train, X_test, y_test = [np.load("../data/diabetes.npz")[x] for x in ("X_train", "y_train", "X_test", "y_test")]

alpha=0.1

model = Ridge(alpha=alpha).fit(X_train, y_train)

## 3. Save model

In [5]:
joblib.dump(model, "model.pkl")

['model.pkl']