This project 
- create a random forest model to predict the mobile phone price range 
- use grid search to find the best combination of parameters

In [1]:
import numpy as np 
import pandas as pd
from sklearn import ensemble
from sklearn import metrics
from sklearn import model_selection

In [2]:
# load data
df = pd.read_csv('mobile_train.csv')
df.head()

Unnamed: 0,battery_power,blue,clock_speed,dual_sim,fc,four_g,int_memory,m_dep,mobile_wt,n_cores,...,px_height,px_width,ram,sc_h,sc_w,talk_time,three_g,touch_screen,wifi,price_range
0,842,0,2.2,0,1,0,7,0.6,188,2,...,20,756,2549,9,7,19,0,0,1,1
1,1021,1,0.5,1,0,1,53,0.7,136,3,...,905,1988,2631,17,3,7,1,1,0,2
2,563,1,0.5,1,2,1,41,0.9,145,5,...,1263,1716,2603,11,2,9,1,1,0,2
3,615,1,2.5,0,0,0,10,0.8,131,6,...,1216,1786,2769,16,8,11,1,0,0,2
4,1821,1,1.2,0,13,1,44,0.6,141,2,...,1208,1212,1411,8,2,15,1,1,0,1


In [3]:
# define features and label
X = df.drop('price_range', axis=1).values
y = df.price_range.values

In [4]:
# random forest model
classifier = ensemble.RandomForestClassifier()

In [5]:
# define a grid of parameters
param_grid = {"n_estimators": [100, 200, 250, 300, 400, 500], 
              "max_depth": [1, 2, 5, 7, 11, 15],
              "criterion": ["gini", "entropy"]
             }

In [6]:
model = model_selection.GridSearchCV(
    estimator=classifier, # the model we difined
    param_grid=param_grid, # grid of parameters
    scoring="accuracy", 
    verbose=10, # higher value of verbose implies a lot of details are printed
    cv=5 
)

In [7]:
# fit the model and extract best score
model.fit(X, y)
print(f"Best score: {model.best_score_}")

Fitting 5 folds for each of 72 candidates, totalling 360 fits
[CV 1/5; 1/72] START criterion=gini, max_depth=1, n_estimators=100..............
[CV 1/5; 1/72] END criterion=gini, max_depth=1, n_estimators=100;, score=0.570 total time=   0.1s
[CV 2/5; 1/72] START criterion=gini, max_depth=1, n_estimators=100..............
[CV 2/5; 1/72] END criterion=gini, max_depth=1, n_estimators=100;, score=0.595 total time=   0.1s
[CV 3/5; 1/72] START criterion=gini, max_depth=1, n_estimators=100..............
[CV 3/5; 1/72] END criterion=gini, max_depth=1, n_estimators=100;, score=0.603 total time=   0.1s
[CV 4/5; 1/72] START criterion=gini, max_depth=1, n_estimators=100..............
[CV 4/5; 1/72] END criterion=gini, max_depth=1, n_estimators=100;, score=0.610 total time=   0.1s
[CV 5/5; 1/72] START criterion=gini, max_depth=1, n_estimators=100..............
[CV 5/5; 1/72] END criterion=gini, max_depth=1, n_estimators=100;, score=0.575 total time=   0.1s
[CV 1/5; 2/72] START criterion=gini, max_de

In [9]:
print("Best parameters set:")
best_parameters = model.best_estimator_.get_params() 
for param_name in sorted(param_grid.keys()):
    print(f"\t{param_name}: {best_parameters[param_name]}")

Best parameters set:
	criterion: entropy
	max_depth: 11
	n_estimators: 500
