In [32]:
import numpy as np
import pandas as pd
from sklearn.datasets import load_iris
from sklearn.feature_selection import mutual_info_classif, SelectKBest
from sklearn.model_selection import GridSearchCV, train_test_split
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import MinMaxScaler
import joblib

In [33]:
X= pd.DataFrame(load_iris()['data'],columns=load_iris()['feature_names'])
y= load_iris()['target']
X_train, X_test, y_train, y_test=train_test_split(X, y ,stratify=y, test_size=0.1, random_state=11)

In [34]:
X_train.shape, y_train.shape

((135, 4), (135,))

In [35]:
def model(X, y):
  X = X.copy()
  y = y.copy()
  pipeline =Pipeline(steps=[[ 'scaler', MinMaxScaler()],['feature_selection', SelectKBest(score_func=mutual_info_classif)], ['classifier', LogisticRegression (random_state=11, max_iter=1000)]])
  param_grid = {'feature_selection__k': range (1, X.shape[1]),'classifier__C': [0.001, 0.01, 0.1, 1, 10, 100, 1000]}
  grid_search= GridSearchCV (estimator=pipeline,param_grid=param_grid, scoring='accuracy', n_jobs=-1,cv=3)
  grid_search.fit(X, y)
  return grid_search

In [36]:
pipeline = Pipeline(steps=[['scaler', MinMaxScaler()],['feature_selection', SelectKBest(score_func=mutual_info_classif,k=3)], ['classifier', LogisticRegression(random_state=11, max_iter=1000,C=1000)]])


pipeline.fit(X_train, y_train)
pipeline

In [37]:
feature_selection =(pipeline['feature_selection']).scores_
feature_selection

array([0.46750868, 0.25558277, 0.99245801, 0.98408761])

In [38]:
feature_scores =  {key:value for key, value in zip(X_train.columns,(np.round(pipeline['feature_selection'].scores_,2)))}
feature_scores

{'sepal length (cm)': 0.47,
 'sepal width (cm)': 0.26,
 'petal length (cm)': 0.99,
 'petal width (cm)': 0.98}

In [40]:
iris_model = model(X_train, y_train)

print(f'Best params:{iris_model.best_params_}\n BestScore:{iris_model.best_score_}\n Features scores:')

Best params:{'classifier__C': 1000, 'feature_selection__k': 3}
 BestScore:0.9629629629629629
 Features scores:


In [41]:
joblib.dump(iris_model,'iris_model.pkl')


['iris_model.pkl']

In [42]:
from google.colab import files
files.download('iris_model.pkl')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>