import numpy
from sklearn.model_selection import GridSearchCV
from keras.models import Sequential
from keras.layers import Dense
from keras.wrappers.scikit_learn import KerasClassifier
from keras.optimizers import SGD
# Function to create model, required for KerasClassifier
def create_model(learn_rate=0.01, momentum=0):
	# create model
	model = Sequential()
	model.add(Dense(12, input_dim=29, activation='relu'))
	model.add(Dense(1, activation='sigmoid'))
	# Compile model
	optimizer = SGD(lr=learn_rate, momentum=momentum)
	model.compile(loss='binary_crossentropy', optimizer=optimizer, metrics=['accuracy'])
	return model
# fix random seed for reproducibility
seed = 7
numpy.random.seed(seed)
# load dataset
dataset = numpy.loadtxt("pima-indians-diabetes.csv", delimiter=",")
# split into input (X) and output (Y) variables
X = dataset[:,0:8]
Y = dataset[:,8]
# create model
model = KerasClassifier(build_fn=create_model, epochs=100, batch_size=10, verbose=0)
# define the grid search parameters
learn_rate = [0.001, 0.01, 0.1, 0.2, 0.3]
momentum = [0.0, 0.2, 0.4, 0.6, 0.8, 0.9]
param_grid = dict(learn_rate=learn_rate, momentum=momentum)
grid = GridSearchCV(estimator=model, param_grid=param_grid, n_jobs=-1)
grid_result = grid.fit(X, Y)
# summarize results
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
    print("%f (%f) with: %r" % (mean, stdev, param))

In [1]:
# Use scikit-learn to grid search the batch size and epochs
import numpy as np
import pandas as pd
from sklearn.model_selection import GridSearchCV
from keras.models import Sequential
from keras.layers import Dense
from keras.wrappers.scikit_learn import KerasClassifier
from sklearn.cluster import KMeans
from keras.optimizers import SGD

Using TensorFlow backend.


In [2]:
# Function to create model, required for KerasClassifier
def create_model(learn_rate=0.01, momentum=0):
	# create model
    model = Sequential()
    model.add(Dense(12, input_dim=29, activation='relu'))
    model.add(Dense(1, activation='sigmoid'))
    # Compile model
    optimizer = SGD(lr=learn_rate, momentum=momentum)
    model.compile(loss='binary_crossentropy', optimizer=optimizer, metrics=['accuracy'])
    return model

In [3]:
# fix random seed for reproducibility
seed = 7
np.random.seed(seed)

In [4]:
df_full = pd.read_csv("/home/dinesh/Downloads/creditcard.csv")

In [5]:
df_full.sort_values(by='Class', ascending=False, inplace=True) 
df_full.drop('Time', axis=1,  inplace = True)

In [6]:
df_cluster = df_full.iloc[493:,:]

In [7]:
kmeans=KMeans(n_clusters=10)

In [8]:
kmeans.fit(df_cluster.drop("Class",axis=1))

KMeans(algorithm='auto', copy_x=True, init='k-means++', max_iter=300,
    n_clusters=10, n_init=10, n_jobs=1, precompute_distances='auto',
    random_state=None, tol=0.0001, verbose=0)

In [9]:
k=10
labels = pd.Series(kmeans.labels_)
centroids = kmeans.cluster_centers_

In [10]:
df_cluster['clust']=labels

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """Entry point for launching an IPython kernel.


In [11]:
df_cluster.clust.value_counts()

3.0    203273
0.0     50401
7.0     18190
6.0      7153
2.0      3044
8.0      1182
1.0       380
5.0       167
9.0        28
4.0         3
Name: clust, dtype: int64

In [12]:
df_cluster_0=df_cluster.loc[df_cluster['clust'] == 0.0]
df_cluster_1=df_cluster.loc[df_cluster['clust'] == 1.0]
df_cluster_2=df_cluster.loc[df_cluster['clust'] == 2.0]
df_cluster_3=df_cluster.loc[df_cluster['clust'] == 3.0]
df_cluster_4=df_cluster.loc[df_cluster['clust'] == 4.0]
df_cluster_5=df_cluster.loc[df_cluster['clust'] == 5.0]
df_cluster_6=df_cluster.loc[df_cluster['clust'] == 6.0]
df_cluster_7=df_cluster.loc[df_cluster['clust'] == 7.0]
df_cluster_8=df_cluster.loc[df_cluster['clust'] == 8.0]
df_cluster_9=df_cluster.loc[df_cluster['clust'] == 9.0]

In [13]:
df_sample_cluster=pd.concat([df_cluster_3.sample(frac=0.01),df_cluster_0.sample(frac=0.01),df_cluster_7.sample(frac=0.01),df_cluster_6.sample(frac=0.01),df_cluster_2.sample(frac=0.1),df_cluster_8.sample(frac=0.1),df_cluster_1,df_cluster_5,df_cluster_9,df_cluster_4])

In [14]:
df_sample_cluster.clust.value_counts()

3.0    2033
0.0     504
1.0     380
2.0     304
7.0     182
5.0     167
8.0     118
6.0      72
9.0      28
4.0       3
Name: clust, dtype: int64

In [15]:
df_sample_cluster=df_sample_cluster.drop('clust',axis=1)

In [16]:
df_sample=pd.concat([df_full.iloc[:492,:],df_sample_cluster])

In [17]:
feature = np.array(df_sample.values[:,0:29])
label = np.array(df_sample.values[:,-1])

In [18]:
from sklearn.model_selection import train_test_split
df_train, df_test = train_test_split(df_sample, test_size=0.2, random_state=40)

In [19]:
train_feature = np.array(df_train.values[:,0:29])
train_label = np.array(df_train.values[:,-1])
test_feature = np.array(df_test.values[:,0:29])
test_label = np.array(df_test.values[:,-1])

In [20]:
from sklearn.preprocessing import MinMaxScaler

scaler = MinMaxScaler()

scaler.fit(train_feature)
train_feature_trans = scaler.transform(train_feature)
test_feature_trans = scaler.transform(test_feature)

In [21]:
# create model
model = KerasClassifier(build_fn=create_model, epochs=100, batch_size=20, verbose=0)


In [22]:

# define the grid search parameters
learn_rate = [0.001, 0.01, 0.1, 0.2, 0.3]
momentum = [0.0, 0.2, 0.4, 0.6, 0.8, 0.9]
param_grid = dict(learn_rate=learn_rate, momentum=momentum)
grid = GridSearchCV(estimator=model, param_grid=param_grid, n_jobs=-1)
grid_result = grid.fit(train_feature_trans, train_label )


In [23]:
# summarize results
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
    print("%f (%f) with: %r" % (mean, stdev, param))

Best: 0.980152 using {'learn_rate': 0.01, 'momentum': 0.9}
0.885873 (0.005414) with: {'learn_rate': 0.001, 'momentum': 0.0}
0.902510 (0.014763) with: {'learn_rate': 0.001, 'momentum': 0.2}
0.915645 (0.007162) with: {'learn_rate': 0.001, 'momentum': 0.4}
0.932283 (0.005461) with: {'learn_rate': 0.001, 'momentum': 0.6}
0.968768 (0.000413) with: {'learn_rate': 0.001, 'momentum': 0.8}
0.976065 (0.002977) with: {'learn_rate': 0.001, 'momentum': 0.9}
0.975773 (0.002707) with: {'learn_rate': 0.01, 'momentum': 0.0}
0.977233 (0.003783) with: {'learn_rate': 0.01, 'momentum': 0.2}
0.978109 (0.002477) with: {'learn_rate': 0.01, 'momentum': 0.4}
0.979568 (0.003224) with: {'learn_rate': 0.01, 'momentum': 0.6}
0.979860 (0.003575) with: {'learn_rate': 0.01, 'momentum': 0.8}
0.980152 (0.003302) with: {'learn_rate': 0.01, 'momentum': 0.9}
0.978984 (0.004465) with: {'learn_rate': 0.1, 'momentum': 0.0}
0.979860 (0.004953) with: {'learn_rate': 0.1, 'momentum': 0.2}
0.976649 (0.004761) with: {'learn_rate': 