In [3]:
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline

In [7]:
from sklearn.datasets import load_iris
iris = load_iris()
my_df = pd.DataFrame(iris.data, columns=iris.feature_names)
my_df['target'] = iris.target
my_df

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm),target
0,5.1,3.5,1.4,0.2,0
1,4.9,3.0,1.4,0.2,0
2,4.7,3.2,1.3,0.2,0
3,4.6,3.1,1.5,0.2,0
4,5.0,3.6,1.4,0.2,0
...,...,...,...,...,...
145,6.7,3.0,5.2,2.3,2
146,6.3,2.5,5.0,1.9,2
147,6.5,3.0,5.2,2.0,2
148,6.2,3.4,5.4,2.3,2


In [8]:
# Splitting the model into inputs and target dataset
inputs = my_df.drop(columns=['target'])
target = my_df['target']
inputs.shape, target.shape

((150, 4), (150,))

In [9]:
# Splitting the model into training and testing dataset
from sklearn.model_selection import train_test_split
inputs_train, inputs_test, target_train, target_test = train_test_split(inputs, target, test_size=0.2, random_state=88)
print("Training datasets: ", inputs_train.shape, target_train.shape)
print("Testing datasets: ", inputs_test.shape, target_test.shape)

Training datasets:  (120, 4) (120,)
Testing datasets:  (30, 4) (30,)


In [12]:
# Building the model
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression

model_params = {
    'svm': {
        'model': SVC(),
        'params': {
            'gamma': ['auto'],
            'C': [1, 10, 20],
            'kernel': ['rbf', 'linear']
        }
    },
    'random forest': {
        'model': RandomForestClassifier(),
        'params': {
            'n_estimators': [10, 20, 50],
            'max_depth': [None, 10, 20],
            'min_samples_split': [2, 5, 10],
            'min_samples_leaf': [1, 2, 4]
        }
    },
    'logistic regression': {
        'model': LogisticRegression(),
        'params': {
            'C': [0.001, 0.01, 0.1, 1, 10, 100],
            'solver': ['liblinear', 'saga']
        }
    }
}


In [None]:
from sklearn.model_selection import GridSearchCV
scores = []
for model_name, mp in model_params.items():
  clf = GridSearchCV(mp['model'], mp['params'], cv=5, return_train_score=False)
  clf.fit(inputs_train, target_train)
  scores.append({
      'model': model_name,
      'best score': clf.best_score_,
      'best params': clf.best_params_
  })

In [14]:
my_df2 = pd.DataFrame(scores, columns=['model', 'best score', 'best params'])
my_df2

Unnamed: 0,model,best score,best params
0,svm,0.966667,"{'C': 1, 'gamma': 'auto', 'kernel': 'linear'}"
1,random forest,0.958333,"{'max_depth': None, 'min_samples_leaf': 2, 'mi..."
2,logistic regression,0.975,"{'C': 0.1, 'solver': 'saga'}"
