In [1]:
# Importing Libraries

from sklearn import datasets
import pandas as pd
import seaborn as sns

from sklearn.model_selection import train_test_split
from sklearn import svm
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import confusion_matrix
from sklearn.naive_bayes import GaussianNB
from sklearn.naive_bayes import MultinomialNB
from sklearn.tree import DecisionTreeClassifier

In [2]:
# Loading dataset
iris = datasets.load_iris()
dir(iris)

['DESCR',
 'data',
 'data_module',
 'feature_names',
 'filename',
 'frame',
 'target',
 'target_names']

In [3]:
# Creating a dataframe

iris_df = pd.DataFrame(iris.data, columns = iris.feature_names)
iris_df['target'] = iris.target
iris_df.head()

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm),target
0,5.1,3.5,1.4,0.2,0
1,4.9,3.0,1.4,0.2,0
2,4.7,3.2,1.3,0.2,0
3,4.6,3.1,1.5,0.2,0
4,5.0,3.6,1.4,0.2,0


In [4]:
# Counting target
iris_df['target'].value_counts()

target
0    50
1    50
2    50
Name: count, dtype: int64

In [5]:
# Creating training and testing data from a single dataset
x = iris_df.drop(columns=['target'])
y = iris_df.target

X_train, X_test, y_train, y_test = train_test_split(x,y, test_size=0.2)

In [6]:
# Dictionary of the models and their parameters that will be used for training

model_params = {
    'svm': {
        'model': svm.SVC(gamma='auto'),
        'params' : {
            'C': [1,10,20],
            'kernel': ['rbf','linear']
        }  
    },
    'random_forest': {
        'model': RandomForestClassifier(),
        'params' : {
            'n_estimators': [1,5,10]
        }
    },
    'logistic_regression' : {
        'model': LogisticRegression(solver='liblinear',multi_class='auto'),
        'params': {
            'C': [1,5,10]
        }
    },
    'naive_bayes_gaussian': {
        'model': GaussianNB(),
        'params': {}
    },
    'naive_bayes_multinomial': {
        'model': MultinomialNB(),
        'params': {}
    },
    'decision_tree': {
        'model': DecisionTreeClassifier(),
        'params': {
            'criterion': ['gini','entropy'],
            
        }
    }     
}

In [7]:
# Using GridSearchCV to find the best model and parameters

scores = []

for model_name, model_p in model_params.items():
    clf =  GridSearchCV(model_p['model'], model_p['params'], cv=5, return_train_score=False)
    clf.fit(X_train, y_train)
    scores.append({
        'model': model_name,
        'best_score': clf.best_score_,
        'best_params': clf.best_params_
    })
    
scores_df = pd.DataFrame(scores,columns=['model','best_score','best_params'])
scores_df

Unnamed: 0,model,best_score,best_params
0,svm,0.975,"{'C': 1, 'kernel': 'rbf'}"
1,random_forest,0.958333,{'n_estimators': 10}
2,logistic_regression,0.975,{'C': 5}
3,naive_bayes_gaussian,0.966667,{}
4,naive_bayes_multinomial,0.7,{}
5,decision_tree,0.95,{'criterion': 'entropy'}


### From the above results, SVM performs best on this dataset

In [14]:
# check score on test data

svmmodel = svm.SVC(kernel='rbf',C=1,gamma='auto')
svmmodel.fit(X_train,y_train)
svmmodel.score(X_test, y_test)

1.0

In [15]:
rf_model = RandomForestClassifier(n_estimators=10)
rf_model.fit(X_train,y_train)
rf_model.score(X_test,y_test)

0.9333333333333333

In [16]:
Log_model = LogisticRegression(C=5)
Log_model.fit(X_train,y_train)
Log_model.score(X_test,y_test)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.9666666666666667

In [11]:
gau_nb = GaussianNB()
gau_nb.fit(X_train,y_train)
gau_nb.score(X_test,y_test)

0.9333333333333333

In [17]:

MultiNB = MultinomialNB()
MultiNB.fit(X_train,y_train)
MultiNB.score(X_test,y_test)

0.5666666666666667

In [18]:
DT_model = DecisionTreeClassifier(criterion= 'entropy')
DT_model.fit(X_train,y_train)
DT_model.score(X_test,y_test)

0.9333333333333333