In [1]:
import pandas as pd 
import numpy as np 

from sklearn.ensemble import RandomForestClassifier
from sklearn import metrics
from sklearn import model_selection

from sklearn import decomposition
from sklearn import preprocessing
from sklearn import pipeline


In [2]:
df = pd.read_csv('train.csv')
df.head()

Unnamed: 0,battery_power,blue,clock_speed,dual_sim,fc,four_g,int_memory,m_dep,mobile_wt,n_cores,...,px_height,px_width,ram,sc_h,sc_w,talk_time,three_g,touch_screen,wifi,price_range
0,842,0,2.2,0,1,0,7,0.6,188,2,...,20,756,2549,9,7,19,0,0,1,1
1,1021,1,0.5,1,0,1,53,0.7,136,3,...,905,1988,2631,17,3,7,1,1,0,2
2,563,1,0.5,1,2,1,41,0.9,145,5,...,1263,1716,2603,11,2,9,1,1,0,2
3,615,1,2.5,0,0,0,10,0.8,131,6,...,1216,1786,2769,16,8,11,1,0,0,2
4,1821,1,1.2,0,13,1,44,0.6,141,2,...,1208,1212,1411,8,2,15,1,1,0,1


In [3]:
X = df.drop('price_range',axis=1).values
y = df.price_range.values

In [4]:
scl = preprocessing.StandardScaler()
pca = decomposition.PCA()
rf = RandomForestClassifier(n_jobs=-1)

In [5]:
classifier = pipeline.Pipeline(
    [
        ('scaling',scl),
        ('pca',pca),
        ('rf',rf)
        
    ])

In [6]:
params_grid = {
    'pca__n_components':np.arange(5,10),
    'rf__n_estimators':np.arange(100,1500,100),
    'rf__max_depth':np.arange(1,20,1),
    'rf__criterion':['gini','entropy'],
}

In [7]:
model = model_selection.RandomizedSearchCV(
    estimator=classifier,
    param_distributions=params_grid,
    scoring='accuracy',
    verbose=10,
    n_iter=10,
    n_jobs=1,
    cv=5

)


In [8]:
model.fit(X,y)

Fitting 5 folds for each of 10 candidates, totalling 50 fits
[CV] rf__n_estimators=1400, rf__max_depth=18, rf__criterion=entropy, pca__n_components=7 


[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.


[CV]  rf__n_estimators=1400, rf__max_depth=18, rf__criterion=entropy, pca__n_components=7, score=0.388, total=   5.8s
[CV] rf__n_estimators=1400, rf__max_depth=18, rf__criterion=entropy, pca__n_components=7 


[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    5.8s remaining:    0.0s


[CV]  rf__n_estimators=1400, rf__max_depth=18, rf__criterion=entropy, pca__n_components=7, score=0.407, total=   3.1s
[CV] rf__n_estimators=1400, rf__max_depth=18, rf__criterion=entropy, pca__n_components=7 


[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:    8.9s remaining:    0.0s


[CV]  rf__n_estimators=1400, rf__max_depth=18, rf__criterion=entropy, pca__n_components=7, score=0.378, total=   3.3s
[CV] rf__n_estimators=1400, rf__max_depth=18, rf__criterion=entropy, pca__n_components=7 


[Parallel(n_jobs=1)]: Done   3 out of   3 | elapsed:   12.2s remaining:    0.0s


[CV]  rf__n_estimators=1400, rf__max_depth=18, rf__criterion=entropy, pca__n_components=7, score=0.425, total=   3.2s
[CV] rf__n_estimators=1400, rf__max_depth=18, rf__criterion=entropy, pca__n_components=7 


[Parallel(n_jobs=1)]: Done   4 out of   4 | elapsed:   15.5s remaining:    0.0s


[CV]  rf__n_estimators=1400, rf__max_depth=18, rf__criterion=entropy, pca__n_components=7, score=0.415, total=   3.2s
[CV] rf__n_estimators=700, rf__max_depth=11, rf__criterion=gini, pca__n_components=7 


[Parallel(n_jobs=1)]: Done   5 out of   5 | elapsed:   18.6s remaining:    0.0s


[CV]  rf__n_estimators=700, rf__max_depth=11, rf__criterion=gini, pca__n_components=7, score=0.407, total=   1.3s
[CV] rf__n_estimators=700, rf__max_depth=11, rf__criterion=gini, pca__n_components=7 


[Parallel(n_jobs=1)]: Done   6 out of   6 | elapsed:   19.9s remaining:    0.0s


[CV]  rf__n_estimators=700, rf__max_depth=11, rf__criterion=gini, pca__n_components=7, score=0.422, total=   1.3s
[CV] rf__n_estimators=700, rf__max_depth=11, rf__criterion=gini, pca__n_components=7 


[Parallel(n_jobs=1)]: Done   7 out of   7 | elapsed:   21.3s remaining:    0.0s


[CV]  rf__n_estimators=700, rf__max_depth=11, rf__criterion=gini, pca__n_components=7, score=0.385, total=   1.3s
[CV] rf__n_estimators=700, rf__max_depth=11, rf__criterion=gini, pca__n_components=7 


[Parallel(n_jobs=1)]: Done   8 out of   8 | elapsed:   22.5s remaining:    0.0s


[CV]  rf__n_estimators=700, rf__max_depth=11, rf__criterion=gini, pca__n_components=7, score=0.432, total=   1.3s
[CV] rf__n_estimators=700, rf__max_depth=11, rf__criterion=gini, pca__n_components=7 


[Parallel(n_jobs=1)]: Done   9 out of   9 | elapsed:   23.9s remaining:    0.0s


[CV]  rf__n_estimators=700, rf__max_depth=11, rf__criterion=gini, pca__n_components=7, score=0.420, total=   1.3s
[CV] rf__n_estimators=200, rf__max_depth=5, rf__criterion=entropy, pca__n_components=6 
[CV]  rf__n_estimators=200, rf__max_depth=5, rf__criterion=entropy, pca__n_components=6, score=0.407, total=   0.5s
[CV] rf__n_estimators=200, rf__max_depth=5, rf__criterion=entropy, pca__n_components=6 
[CV]  rf__n_estimators=200, rf__max_depth=5, rf__criterion=entropy, pca__n_components=6, score=0.438, total=   0.5s
[CV] rf__n_estimators=200, rf__max_depth=5, rf__criterion=entropy, pca__n_components=6 
[CV]  rf__n_estimators=200, rf__max_depth=5, rf__criterion=entropy, pca__n_components=6, score=0.385, total=   0.5s
[CV] rf__n_estimators=200, rf__max_depth=5, rf__criterion=entropy, pca__n_components=6 
[CV]  rf__n_estimators=200, rf__max_depth=5, rf__criterion=entropy, pca__n_components=6, score=0.405, total=   0.5s
[CV] rf__n_estimators=200, rf__max_depth=5, rf__criterion=entropy, pca

[Parallel(n_jobs=1)]: Done  50 out of  50 | elapsed:  1.5min finished


RandomizedSearchCV(cv=5, error_score=nan,
                   estimator=Pipeline(memory=None,
                                      steps=[('scaling',
                                              StandardScaler(copy=True,
                                                             with_mean=True,
                                                             with_std=True)),
                                             ('pca',
                                              PCA(copy=True,
                                                  iterated_power='auto',
                                                  n_components=None,
                                                  random_state=None,
                                                  svd_solver='auto', tol=0.0,
                                                  whiten=False)),
                                             ('rf',
                                              RandomForestClassifier(bootstrap=True,
                 

In [9]:
model.best_score_

0.42699999999999994

In [10]:
model.best_estimator_

Pipeline(memory=None,
         steps=[('scaling',
                 StandardScaler(copy=True, with_mean=True, with_std=True)),
                ('pca',
                 PCA(copy=True, iterated_power='auto', n_components=8,
                     random_state=None, svd_solver='auto', tol=0.0,
                     whiten=False)),
                ('rf',
                 RandomForestClassifier(bootstrap=True, ccp_alpha=0.0,
                                        class_weight=None, criterion='entropy',
                                        max_depth=5, max_features='auto',
                                        max_leaf_nodes=None, max_samples=None,
                                        min_impurity_decrease=0.0,
                                        min_impurity_split=None,
                                        min_samples_leaf=1, min_samples_split=2,
                                        min_weight_fraction_leaf=0.0,
                                        n_estimators=1300, n_job