# Mobile Classfication Case study - Random Forest

In [1]:
import pandas as pd

In [2]:
mobile = pd.read_csv(r'C:\Users\Laxman\OneDrive\Desktop\Data Files_1\Mobile Classification.csv')
mobile.head()

Unnamed: 0,battery_power,blue,clock_speed,dual_sim,fc,four_g,int_memory,m_dep,mobile_wt,n_cores,...,px_height,px_width,ram,sc_h,sc_w,talk_time,three_g,touch_screen,wifi,price_range
0,842,0,2.2,0,1,0,7,0.6,188,2,...,20,756,2549,9,7,19,0,0,1,1
1,1021,1,0.5,1,0,1,53,0.7,136,3,...,905,1988,2631,17,3,7,1,1,0,2
2,563,1,0.5,1,2,1,41,0.9,145,5,...,1263,1716,2603,11,2,9,1,1,0,2
3,615,1,2.5,0,0,0,10,0.8,131,6,...,1216,1786,2769,16,8,11,1,0,0,2
4,1821,1,1.2,0,13,1,44,0.6,141,2,...,1208,1212,1411,8,2,15,1,1,0,1


In [3]:
mobile.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2000 entries, 0 to 1999
Data columns (total 21 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   battery_power  2000 non-null   int64  
 1   blue           2000 non-null   int64  
 2   clock_speed    2000 non-null   float64
 3   dual_sim       2000 non-null   int64  
 4   fc             2000 non-null   int64  
 5   four_g         2000 non-null   int64  
 6   int_memory     2000 non-null   int64  
 7   m_dep          2000 non-null   float64
 8   mobile_wt      2000 non-null   int64  
 9   n_cores        2000 non-null   int64  
 10  pc             2000 non-null   int64  
 11  px_height      2000 non-null   int64  
 12  px_width       2000 non-null   int64  
 13  ram            2000 non-null   int64  
 14  sc_h           2000 non-null   int64  
 15  sc_w           2000 non-null   int64  
 16  talk_time      2000 non-null   int64  
 17  three_g        2000 non-null   int64  
 18  touch_sc

In [4]:
mobile.isnull().sum()

battery_power    0
blue             0
clock_speed      0
dual_sim         0
fc               0
four_g           0
int_memory       0
m_dep            0
mobile_wt        0
n_cores          0
pc               0
px_height        0
px_width         0
ram              0
sc_h             0
sc_w             0
talk_time        0
three_g          0
touch_screen     0
wifi             0
price_range      0
dtype: int64

In [5]:
mobile['price_range'].nunique()

4

In [6]:
#Finding out the number of categories in price range.
mobile['price_range'].unique()

array([1, 2, 3, 0], dtype=int64)

Assumption: If the price range 0, the mobiles are cheaper and if the price range is 3 the mobiles are costlier.

To validate the above assumption, we need to group the price ranges based on average ram.

In [7]:
mobile.groupby('price_range')[['ram']].mean().reset_index()

Unnamed: 0,price_range,ram
0,0,785.314
1,1,1679.49
2,2,2582.816
3,3,3449.232


In [8]:
#Identifying the output and input variables
y = mobile[['price_range']]
x = mobile.drop(columns=['price_range'])

In [9]:
#Splitting the data into train and test
from sklearn.model_selection import train_test_split
x_train,x_test,y_train,y_test = train_test_split(x,y,test_size=0.2,random_state=0)
len(x_train),len(x_test),len(y_train),len(y_test)

(1600, 400, 1600, 400)

In [10]:
from sklearn.ensemble import RandomForestClassifier
rf = RandomForestClassifier(n_estimators=1000,random_state=42)
model=rf.fit(x_train,y_train)
print('The model has been built successfuly!! yeah!')

  model=rf.fit(x_train,y_train)


The model has been built successfuly!! yeah!


In [11]:
#Predicting on test data
y_test['Prediction'] = model.predict(x_test)

In [12]:
from sklearn.metrics import accuracy_score,confusion_matrix
print(confusion_matrix(y_test['price_range'],y_test['Prediction']))
print(accuracy_score(y_test['price_range'],y_test['Prediction']))

[[ 92   3   0   0]
 [  6  74  12   0]
 [  0  16  74   9]
 [  0   0   8 106]]
0.865


In [13]:
x.head()

Unnamed: 0,battery_power,blue,clock_speed,dual_sim,fc,four_g,int_memory,m_dep,mobile_wt,n_cores,pc,px_height,px_width,ram,sc_h,sc_w,talk_time,three_g,touch_screen,wifi
0,842,0,2.2,0,1,0,7,0.6,188,2,2,20,756,2549,9,7,19,0,0,1
1,1021,1,0.5,1,0,1,53,0.7,136,3,6,905,1988,2631,17,3,7,1,1,0
2,563,1,0.5,1,2,1,41,0.9,145,5,6,1263,1716,2603,11,2,9,1,1,0
3,615,1,2.5,0,0,0,10,0.8,131,6,9,1216,1786,2769,16,8,11,1,0,0
4,1821,1,1.2,0,13,1,44,0.6,141,2,14,1208,1212,1411,8,2,15,1,1,0


In [14]:
x.columns

Index(['battery_power', 'blue', 'clock_speed', 'dual_sim', 'fc', 'four_g',
       'int_memory', 'm_dep', 'mobile_wt', 'n_cores', 'pc', 'px_height',
       'px_width', 'ram', 'sc_h', 'sc_w', 'talk_time', 'three_g',
       'touch_screen', 'wifi'],
      dtype='object')

In [15]:
#A new mobile with the below features is coming up in the market
val_dict = {'battery_power':[700],
           'blue':[1],
           'clock_speed':[1.0],
           'dual_sim':[1],
           'fc':[0],
           'four_g':[1],
           'int_memory':[10],
           'm_dep':[0.5],
           'mobile_wt':[150],
           'n_cores':[6],
           'pc':[10],
           'px_height':[1200],
           'px_width':[1800],
           'ram':[3000],
           'sc_h':[15],
           'sc_w':[7],
           'talk_time':[15],
           'three_g':[0],
           'touch_screen':[0],
           'wifi':[0]}

In [16]:
val_df = pd.DataFrame(val_dict)
val_df

Unnamed: 0,battery_power,blue,clock_speed,dual_sim,fc,four_g,int_memory,m_dep,mobile_wt,n_cores,pc,px_height,px_width,ram,sc_h,sc_w,talk_time,three_g,touch_screen,wifi
0,700,1,1.0,1,0,1,10,0.5,150,6,10,1200,1800,3000,15,7,15,0,0,0


In [17]:
model.predict(val_df)

array([2], dtype=int64)

In [18]:
model.feature_importances_

array([0.07581347, 0.00673886, 0.02846209, 0.00678636, 0.02521794,
       0.00647936, 0.03678177, 0.0244546 , 0.03930989, 0.02447744,
       0.02948185, 0.05698109, 0.05755553, 0.4756392 , 0.02752109,
       0.0291623 , 0.03017181, 0.00537195, 0.00699168, 0.00660173])

In [19]:
pd.DataFrame(model.feature_importances_,x.columns)

Unnamed: 0,0
battery_power,0.075813
blue,0.006739
clock_speed,0.028462
dual_sim,0.006786
fc,0.025218
four_g,0.006479
int_memory,0.036782
m_dep,0.024455
mobile_wt,0.03931
n_cores,0.024477
