# compare iterative imputation number of iterations for the horse colic dataset

In [None]:
from sklearn.experimental import enable_iterative_imputer

In [None]:
# load dataset
from pandas import read_csv
url = 'https://raw.githubusercontent.com/jbrownlee/Datasets/master/horse-colic.csv'
dataframe = read_csv(url, header=None, na_values='?')

In [None]:
# split into input and output elements
data = dataframe.values
ix = [i for i in range(data.shape[1]) if i != 23]
X, y = data[:, ix], data[:, 23]

In [None]:
# evaluate each strategy on the dataset
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import RepeatedStratifiedKFold
from sklearn.pipeline import Pipeline
from sklearn.impute import IterativeImputer
from numpy import mean
from numpy import std
results = list()
strategies = [str(i) for i in range(1, 21)]
for s in strategies:
	# create the modeling pipeline
	pipeline = Pipeline(steps=[('i', IterativeImputer(max_iter=int(s))), ('m', RandomForestClassifier())])
	# evaluate the model
	cv = RepeatedStratifiedKFold(n_splits=10, n_repeats=3, random_state=1)
	scores = cross_val_score(pipeline, X, y, scoring='accuracy', cv=cv, n_jobs=-1)
	# store results
	results.append(scores)
	print('>%s %.3f (%.3f)' % (s, mean(scores), std(scores)))

In [None]:
# plot model performance for comparison
from matplotlib import pyplot
pyplot.boxplot(results, labels=strategies, showmeans=True)
pyplot.xticks(rotation=45)
pyplot.show()

# iterative imputation strategy and prediction for the hose colic dataset

In [None]:
from sklearn.experimental import enable_iterative_imputer

In [None]:
# load dataset
from pandas import read_csv
url = 'https://raw.githubusercontent.com/jbrownlee/Datasets/master/horse-colic.csv'
dataframe = read_csv(url, header=None, na_values='?')

In [None]:
# split into input and output elements
data = dataframe.values
X, y = data[:, :-1], data[:, -1]

In [None]:
# create the modeling pipeline
from sklearn.ensemble import RandomForestClassifier
from sklearn.impute import IterativeImputer
from sklearn.pipeline import Pipeline
pipeline = Pipeline(steps=[('i', IterativeImputer()), ('m', RandomForestClassifier())])

In [None]:
# fit the model
pipeline.fit(X, y)

In [None]:
# define new data
from numpy import nan
row = [2, 1, 530101, 38.50, 66, 28, 3, 3, nan, 2, 5, 4, 4, nan, nan, nan, 3, 5, 45.00, 8.40, nan, nan, 2, 11300, 00000, 00000, 2]

In [None]:
# make a prediction
yhat = pipeline.predict([row])

In [None]:
# summarize prediction
print('Predicted Class: %d' % yhat[0])