In [None]:
import pandas as pd

from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import confusion_matrix, f1_score

In [None]:
iris_dataset = load_iris(as_frame=True)
X = iris_dataset['data']
Y = iris_dataset['target']

In [None]:
X.head()

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm)
0,5.1,3.5,1.4,0.2
1,4.9,3.0,1.4,0.2
2,4.7,3.2,1.3,0.2
3,4.6,3.1,1.5,0.2
4,5.0,3.6,1.4,0.2


### Предварительная обработка, создание новых признаков.

In [None]:
features = X.copy()
features.rename(
    columns=lambda s: s.replace('(cm)', '').strip().replace(' ', '_'),
    inplace=True
    )

features['sepal_length_to_sepal_width'] = features['sepal_length'] / features['sepal_width']
features['petal_length_to_petal_width'] = features['petal_length'] / features['petal_width']

#features['sepal_length_to_sepal_width'] = features['sepal length (cm)'] / features['sepal width (cm)']
#features['petal_length_to_petal_width'] = features['petal length (cm)'] / features['petal width (cm)']


In [None]:
X_featurized = features
features.head()


Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,sepal_length_to_sepal_width,petal_length_to_petal_width
0,5.1,3.5,1.4,0.2,1.457143,7.0
1,4.9,3.0,1.4,0.2,1.633333,7.0
2,4.7,3.2,1.3,0.2,1.46875,6.5
3,4.6,3.1,1.5,0.2,1.483871,7.5
4,5.0,3.6,1.4,0.2,1.388889,7.0


### Разделение выборки на train/test

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X_featurized, Y, random_state=0)

### Обучение модели

In [None]:
# Create an instance of Logistic Regression Classifier and fit the data.
clf = LogisticRegression(C=0.01, solver='lbfgs', max_iter=100)
clf.fit(X_train, y_train)

In [None]:
# SAVE using pickle
import pickle

with open('model.pkl','wb') as f:
    pickle.dump(clf, f)


In [None]:
!ls

model.pkl  sample_data


###Загрузка и тестирование модели

In [None]:
# load
with open('model.pkl', 'rb') as f:
    trained_clf = pickle.load(f)

prediction = trained_clf.predict(X_test)
cm = confusion_matrix(prediction, y_test)
f1 = f1_score(y_true=y_test, y_pred=prediction, average='macro')

In [None]:
print("confusion_matrix\n", cm)
print("f1_score", f1)

confusion_matrix
 [[13  0  0]
 [ 0  5  0]
 [ 0 11  9]]
f1_score 0.69896004378763
