In [1]:
from time import time
import warnings
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.feature_selection import RFE, RFECV
from sklearn.neighbors import KNeighborsClassifier, KNeighborsRegressor
from sklearn.svm import SVC, SVR
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
from sklearn.neural_network import MLPClassifier, MLPRegressor
from sklearn.linear_model import SGDClassifier, SGDRegressor
from sklearn.metrics import f1_score, r2_score, accuracy_score
print('Import functions')

Import functions


In [2]:
t0 = time()
warnings.filterwarnings('ignore')

print('Load Data')
data = pd.read_csv('creditcard.csv', sep=',', header=0)
colofans = ['Amount', 'Class']
labels = data[colofans]
data = data.drop(colofans, axis=1)

Load Data


In [3]:
print('Split Data')
Xtrain, Xtest, ytrain, ytest = train_test_split(data, labels, test_size=0.25)
train_class = ytrain['Class']
test_class = ytest['Class']
train_reg = ytrain['Amount']
test_reg = ytest['Amount']

print('Process data for Regressor')
scale = MinMaxScaler()
Xtrain = scale.fit_transform(Xtrain)
Xtest = scale.transform(Xtest)

train_reg = scale.fit_transform(np.reshape(train_reg, (-1, 1)))
test_reg = scale.transform(np.reshape(test_reg, (-1, 1)))
print('Xtrain_shape: {}'.format(Xtrain.shape))

Split Data
Process data for Regressor
Xtrain_shape: (213605, 29)


In [4]:
knr = KNeighborsRegressor()
knc = KNeighborsClassifier()
svr = SVR()
svm = SVC()
mlr = MLPRegressor(hidden_layer_sizes=(32, 8, ))
mlc = MLPClassifier(hidden_layer_sizes=(32, 8, ))
rtc = RandomForestClassifier()
rtr = RandomForestRegressor()

regressors = {'Kneighbor_regressor': knr, 'SGD_regressor': svr, 
              'MLP_regressor': mlr, 'RTree_regressor': rtr}
classifiers = {'Kneighbor_classifier': knc, 'SV_classifier': svm, 
               'MLP_classifier': mlc, 'RTree_classifier': rtc}

estimator_class = SGDClassifier()
estimator_reg = SGDRegressor()


In [5]:
refer = RFECV(estimator_reg, cv=5)
refer.fit(Xtrain, train_reg)
print('RFE score = ', refer.score(Xtrain, train_reg))
X_train = refer.transform(Xtrain)
X_test = refer.transform(Xtest)
print('No. of features = ', refer.n_features_)

for name in regressors:
    reg = regressors[name]
    reg.fit(X_train, train_reg)
    y = reg.predict(X_test)
    print('\t', name, '\tR2_score = ', r2_score(test_reg, y))
    

RFE score =  0.142300381222
No. of features =  26
	 Kneighbor_regressor 	R2_score =  0.690273741414
	 SGD_regressor 	R2_score =  -50.8950559499
	 MLP_regressor 	R2_score =  0.634448817622
	 RTree_regressor 	R2_score =  0.969184583031


In [6]:
refec = RFECV(estimator_class, cv=5)
refec.fit(Xtrain, train_class)
print('RFE score = ', refec.score(Xtrain, train_class))
X_train = refec.transform(Xtrain)
X_test = refec.transform(Xtest)
print('No. of features = ', refec.n_features_)


for clame in classifiers:
    clf = classifiers[clame]
    clf.fit(X_train, train_class)
    y_ = clf.predict(X_test)
    print('\t{0}\tF_score = {1}\tAccuracy = {2}'.format(clame, f1_score(test_class, y_), 
                                                        accuracy_score(test_class, y_)))



print('\n Time take for the code to run ==> %.2f sec' % (time()-t0))

RFE score =  0.998895156949
No. of features =  29
	Kneighbor_classifier	F_score = 0.8436018957345971	Accuracy = 0.9995365298727564
	SV_classifier	F_score = 0.6934673366834171	Accuracy = 0.9991432824920649
	MLP_classifier	F_score = 0.8	Accuracy = 0.9993679952810315
	RTree_classifier	F_score = 0.8229665071770336	Accuracy = 0.9994803516755147

 Time take for the code to run ==> 628.35 sec
