In [1]:
import numpy as np
from sklearn.datasets import load_iris
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler, PolynomialFeatures
from sklearn.model_selection import train_test_split, GridSearchCV

In [2]:
#Load Iris dataset
iris = load_iris()
X_iris = iris.data
y_iris = iris.target

In [3]:
#Check the shape of data
print (X_iris.shape)
print (y_iris.shape)

(150, 4)
(150,)


In [4]:
#Check if sets balanced
print ('Test  1: {}, 2: {}, 3: {}'.format(np.sum(y_iris == 0), np.sum(y_iris == 1), np.sum(y_iris == 2) ) )
print ('Train 1: {}, 2: {}, 3: {}'.format(np.sum(y_iris == 0), np.sum(y_iris == 1), np.sum(y_iris == 2) ) )

Test  1: 50, 2: 50, 3: 50
Train 1: 50, 2: 50, 3: 50


In [5]:
#Create separate each feature array
a = X_iris[:,0]
b = X_iris[:,1]
c = X_iris[:,2]
d = X_iris[:,3]


#Scale X data
scaler = StandardScaler()
scaler.fit_transform (X_iris,y_iris)
X_scaled = scaler.transform (X_iris)


#Make modified features sets with squares of each feature
X_squares   =  np.vstack (([a**2], [b**2], [c **2], [d**2])).T

#Make modified features set with multiplied pairs of each feature
X_multi = np.vstack ((a*b, a*c, a*d, b*c, b*d, c*d)).T

#Make polynomial transformation n = 10
transform = PolynomialFeatures(10)
transform.fit_transform(X_iris)
X_poly = transform.transform(X_iris)

In [6]:
#Make split for original data
(X_train, X_test, y_train, y_test ) = train_test_split(X_iris, y_iris, stratify=y_iris, test_size= 0.3)



In [14]:
#Create estimator class
estimator = LogisticRegression()

#Create param grid
paramgrid = {'C': [0.01, 0.05, 0.1, 0.5, 1, 5, 10], 'penalty': ['l1','l2']}

#Create SearchGridCV optimizer
optimizer = GridSearchCV(estimator, paramgrid, cv=10)

#Fit it for original data
optimizer.fit(X_train, y_train)
predict = optimizer.best_estimator_.predict(X_test)
pre = accuracy_score(y_test,predict)




In [15]:
print ('Accuracy score for original: {}'.format(  pre) )


Accuracy score for original: 0.9166666666666666


# Naive bayes

In [9]:

from sklearn.model_selection import train_test_split 
X_train, X_test, y_train, y_test = train_test_split(X_iris, y_iris, test_size=0.4, random_state=1) 

In [11]:
from sklearn.naive_bayes import GaussianNB 
gnb = GaussianNB() 
gnb.fit(X_train, y_train) 

GaussianNB(priors=None)

In [12]:
y_pred = gnb.predict(X_test)
pre=accuracy_score(y_test,y_pred)

In [13]:
print ('Accuracy score for original: {}'.format(  pre) )

Accuracy score for original: 0.95
