In [1]:
import pandas as pd
import numpy as np
from functools import partial
from sklearn import ensemble
from sklearn import metrics
from sklearn import model_selection
from skopt import gp_minimize
from skopt import space

In [2]:
x=pd.read_csv('finalx_train.csv')
y=pd.read_csv('y_train.csv')

In [3]:
x['Allcard'].head()

0    13181
1    11303
2     7917
3    13140
4     9648
Name: Allcard, dtype: int64

In [4]:
x.shape

(590540, 112)

In [5]:
X=x.values
Y=y.values.ravel()

In [6]:
classifier = ensemble.RandomForestClassifier(n_jobs=-1)

In [7]:
param_grid = {
 "n_estimators": np.arange(100, 1500, 100),
 "max_depth": np.arange(1, 31),
 "criterion": ["gini", "entropy"]
 }

In [10]:
model = model_selection.RandomizedSearchCV(
 estimator=classifier,
 param_distributions=param_grid,
 n_iter=4,
 scoring="accuracy",
 verbose=10,
 n_jobs=1,
 cv=5
 )

In [11]:
model.fit(X,Y)

Fitting 5 folds for each of 4 candidates, totalling 20 fits
[CV 1/5; 1/4] START criterion=gini, max_depth=14, n_estimators=900..............
[CV 1/5; 1/4] END criterion=gini, max_depth=14, n_estimators=900; total time= 3.6min
[CV 2/5; 1/4] START criterion=gini, max_depth=14, n_estimators=900..............
[CV 2/5; 1/4] END criterion=gini, max_depth=14, n_estimators=900; total time= 3.5min
[CV 3/5; 1/4] START criterion=gini, max_depth=14, n_estimators=900..............
[CV 3/5; 1/4] END criterion=gini, max_depth=14, n_estimators=900; total time= 3.5min
[CV 4/5; 1/4] START criterion=gini, max_depth=14, n_estimators=900..............
[CV 4/5; 1/4] END criterion=gini, max_depth=14, n_estimators=900; total time= 3.3min
[CV 5/5; 1/4] START criterion=gini, max_depth=14, n_estimators=900..............
[CV 5/5; 1/4] END criterion=gini, max_depth=14, n_estimators=900; total time= 3.3min
[CV 1/5; 2/4] START criterion=entropy, max_depth=25, n_estimators=200...........
[CV 1/5; 2/4] END criterion=e

RandomizedSearchCV(cv=5, estimator=RandomForestClassifier(n_jobs=-1), n_iter=4,
                   n_jobs=1,
                   param_distributions={'criterion': ['gini', 'entropy'],
                                        'max_depth': array([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17,
       18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30]),
                                        'n_estimators': array([ 100,  200,  300,  400,  500,  600,  700,  800,  900, 1000, 1100,
       1200, 1300, 1400])},
                   scoring='accuracy', verbose=10)

In [12]:
best_param=model.best_estimator_.get_params()
print(best_param)

{'bootstrap': True, 'ccp_alpha': 0.0, 'class_weight': None, 'criterion': 'gini', 'max_depth': 19, 'max_features': 'auto', 'max_leaf_nodes': None, 'max_samples': None, 'min_impurity_decrease': 0.0, 'min_impurity_split': None, 'min_samples_leaf': 1, 'min_samples_split': 2, 'min_weight_fraction_leaf': 0.0, 'n_estimators': 900, 'n_jobs': -1, 'oob_score': False, 'random_state': None, 'verbose': 0, 'warm_start': False}


In [13]:
df=pd.DataFrame.from_records([best_param])
df.head()

Unnamed: 0,bootstrap,ccp_alpha,class_weight,criterion,max_depth,max_features,max_leaf_nodes,max_samples,min_impurity_decrease,min_impurity_split,min_samples_leaf,min_samples_split,min_weight_fraction_leaf,n_estimators,n_jobs,oob_score,random_state,verbose,warm_start
0,True,0.0,,gini,19,auto,,,0.0,,1,2,0.0,900,-1,False,,0,False


In [14]:
df.to_csv('updatedbest_param.csv')

In [15]:
model2=ensemble.RandomForestClassifier(**best_param)

In [16]:
model2.fit(X,Y)

RandomForestClassifier(max_depth=19, n_estimators=900, n_jobs=-1)

In [17]:
tst_out=pd.read_csv('final_tst.csv')
tran_id=pd.read_csv('trans_id.csv')

In [18]:
ti=tran_id.values

In [19]:
to=tst_out.values

In [20]:
tp=model2.predict_proba(to)

In [21]:
submission3 = pd.DataFrame({
    'TransactionID' : ti.ravel() ,
    'isFraud' : tp[:,1]
})

In [22]:
tp[:5]

array([[0.89894595, 0.10105405],
       [0.92842352, 0.07157648],
       [0.98273377, 0.01726623],
       [0.93102252, 0.06897748],
       [0.74192999, 0.25807001]])

In [23]:
submission3.to_csv('updatedsubRFhpt.csv', index=False)

In [24]:
submission3.head()

Unnamed: 0,TransactionID,isFraud
0,3663586,0.101054
1,3663588,0.071576
2,3663597,0.017266
3,3663601,0.068977
4,3663602,0.25807
