In [1]:
import pandas as pd
import seaborn as sns

In [2]:
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import PolynomialFeatures,StandardScaler
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import accuracy_score

In [3]:
df = sns.load_dataset('iris')
df.head()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
0,5.1,3.5,1.4,0.2,setosa
1,4.9,3.0,1.4,0.2,setosa
2,4.7,3.2,1.3,0.2,setosa
3,4.6,3.1,1.5,0.2,setosa
4,5.0,3.6,1.4,0.2,setosa


In [4]:
X = df.drop('species', axis=1)
y = df['species']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
X_train.shape, y_test.shape

((120, 4), (30,))

In [5]:
pipeline = Pipeline([
    ('poly', PolynomialFeatures()),
    ('scaler', StandardScaler()),
    ('DT', DecisionTreeClassifier())
])

parameters = {
    'poly__degree':[1,2],
    'poly__interaction_only':[True, False]
}

model = GridSearchCV(pipeline, param_grid=parameters, 
                     cv=3, n_jobs=2, verbose=1)
model.fit(X_train, y_train)
model.best_params_

Fitting 3 folds for each of 4 candidates, totalling 12 fits
[Parallel(n_jobs=2)]: Using backend LokyBackend with 2 concurrent workers.
[Parallel(n_jobs=2)]: Done  12 out of  12 | elapsed:    1.3s finished


{'poly__degree': 1, 'poly__interaction_only': False}

In [6]:
pred_train = model.predict(X_train)
pred_test = model.predict(X_test)

# Train score
print(accuracy_score(y_train, pred_train))

# Test score
print(accuracy_score(y_test, pred_test))

1.0
0.9333333333333333


In [7]:
df.head()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
0,5.1,3.5,1.4,0.2,setosa
1,4.9,3.0,1.4,0.2,setosa
2,4.7,3.2,1.3,0.2,setosa
3,4.6,3.1,1.5,0.2,setosa
4,5.0,3.6,1.4,0.2,setosa


In [8]:
baris_1 = df.iloc[0][:-1].values
baris_1

array([5.1, 3.5, 1.4, 0.2], dtype=object)

In [9]:
model.predict(baris_1.reshape(1, -1))

array(['setosa'], dtype=object)

In [10]:
model.get_params()

{'cv': 3,
 'error_score': nan,
 'estimator__memory': None,
 'estimator__steps': [('poly', PolynomialFeatures()),
  ('scaler', StandardScaler()),
  ('DT', DecisionTreeClassifier())],
 'estimator__verbose': False,
 'estimator__poly': PolynomialFeatures(),
 'estimator__scaler': StandardScaler(),
 'estimator__DT': DecisionTreeClassifier(),
 'estimator__poly__degree': 2,
 'estimator__poly__include_bias': True,
 'estimator__poly__interaction_only': False,
 'estimator__poly__order': 'C',
 'estimator__scaler__copy': True,
 'estimator__scaler__with_mean': True,
 'estimator__scaler__with_std': True,
 'estimator__DT__ccp_alpha': 0.0,
 'estimator__DT__class_weight': None,
 'estimator__DT__criterion': 'gini',
 'estimator__DT__max_depth': None,
 'estimator__DT__max_features': None,
 'estimator__DT__max_leaf_nodes': None,
 'estimator__DT__min_impurity_decrease': 0.0,
 'estimator__DT__min_impurity_split': None,
 'estimator__DT__min_samples_leaf': 1,
 'estimator__DT__min_samples_split': 2,
 'estimator_

In [12]:
import pickle
pickle.dump(model, open('iris_decTree.pkl','wb'))

In [13]:
!ls | grep iris_decTree.pkl

'ls' is not recognized as an internal or external command,
operable program or batch file.
