# Predict Iris Dataset

Create predictions from Iris dataset.

## Acknowledgements and Citations:
Data Creator(s):
- R.A. Fisher

Data Donor(s):
- Michael Marshall (MARSHALL%PLU@io.arc.nasa.gov)

Data Source(s):
- scikit-learn: https://scikit-learn.org/stable/datasets/toy_dataset.html#iris-dataset

In [None]:
import os
import pickle

import pandas as pd
from sklearn.preprocessing import StandardScaler

In [None]:
# checkout versioned dataset
!dvc checkout -f

In [None]:
# read the checked out dataset
df = pd.read_parquet("./data/iris_test.parquet")
df.head()

In [None]:
df.info()

In [None]:
# load model
with open("data/iris_knn_model.pkl", "rb") as f:
    knn = pickle.load(f)
knn

In [None]:
# create filtered set for X to test the model
X_test = df.drop(["target", "species"], axis=1).reset_index(drop=True).copy()
X_test.head()

In [None]:
# scale features
scaler = StandardScaler()
X_scaled = pd.DataFrame(
    scaler.fit_transform(
        X=X_test[
            [
                "sepal length (cm)",
                "sepal width (cm)",
                "petal length (cm)",
                "petal width (cm)",
            ]
        ]
    )
)
X_scaled.head()

In [None]:
# create filtered set for y to test the model
y_test = df["target"].reset_index(drop=True).copy()
y_test.head()

In [None]:
# show score based on testing dataset
knn.score(X_scaled, y_test)

In [None]:
# gather predicted targets on testing dataset
y_pred = knn.predict(X_scaled)

In [None]:
# gather full result dataset
df_result = X_test.copy()
df_result["target"] = y_test
df_result["predicted_target"] = y_pred
df_result.head()

In [None]:
# export results
df_result.to_parquet("./data/iris_predictions.parquet")

In [None]:
# add the saved visualization using dvc
!dvc add data/iris_predictions.parquet

In [None]:
# push the dataset using dvc
!dvc push

In [None]:
# remove the dataset from export destination for demonstrational purposes
os.remove("data/iris_predictions.parquet")