## Mobile Price Prediction Model

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
df = pd.read_csv('train.csv')
df_test = pd.read_csv("test.csv")

In [5]:
df.head(3)

Unnamed: 0,battery_power,blue,clock_speed,dual_sim,fc,four_g,int_memory,m_dep,mobile_wt,n_cores,...,px_height,px_width,ram,sc_h,sc_w,talk_time,three_g,touch_screen,wifi,price_range
0,842,0,2.2,0,1,0,7,0.6,188,2,...,20,756,2549,9,7,19,0,0,1,1
1,1021,1,0.5,1,0,1,53,0.7,136,3,...,905,1988,2631,17,3,7,1,1,0,2
2,563,1,0.5,1,2,1,41,0.9,145,5,...,1263,1716,2603,11,2,9,1,1,0,2


In [6]:
df_test.head(3)

Unnamed: 0,id,battery_power,blue,clock_speed,dual_sim,fc,four_g,int_memory,m_dep,mobile_wt,...,pc,px_height,px_width,ram,sc_h,sc_w,talk_time,three_g,touch_screen,wifi
0,1,1043,1,1.8,1,14,0,5,0.1,193,...,16,226,1412,3476,12,7,2,0,1,0
1,2,841,1,0.5,1,4,1,61,0.8,191,...,12,746,857,3895,6,0,7,1,0,0
2,3,1807,1,2.8,0,1,0,27,0.9,186,...,4,1270,1366,2396,17,10,10,0,1,1


### BASE MODEL WITHOUT SCALING & FEATURE SELECTION

In [10]:
X = df[['battery_power', 'blue', 'clock_speed', 'dual_sim', 'fc', 'four_g',
       'int_memory', 'm_dep', 'mobile_wt', 'n_cores', 'pc', 'px_height',
       'px_width', 'ram', 'sc_h', 'sc_w', 'talk_time', 'three_g',
       'touch_screen', 'wifi']]
y = df['price_range']

In [11]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 42)

In [12]:
from sklearn.metrics import r2_score
from sklearn.model_selection import cross_val_score

cv = []
r2_train = []
r2_test = []

def price_model(model, model_name):
    
    model.fit(X_train, y_train)
    
    # R2_Score for Train Set
    pred_train = model.predict(X_train)
    r2_train_model = r2_score(y_train, pred_train)
    r2_train.append(round(r2_train_model, 2))
    
    # R2_Score for Test Set
    pred_test = model.predict(X_test)
    r2_test_model = r2_score(y_test, pred_test)
    r2_test.append(round(r2_test_model,2))
    
    # R2 Mean of Train Set using Cross Val
    cross_val = cross_val_score(model, X_train, y_train, cv = 5)
    cv_mean = cross_val.mean()
    cv.append(round(cv_mean, 2))
    
    
    # R2 Mean of Train Set using Cross Val
    print("Train R2_Score :", round(r2_train_model, 2))
    print("Test R2_Score :", round(r2_test_model, 2))
    print("Train CV Scores :", cross_val)
    print("Train CV Mean :", round(cv_mean, 2))

#### Linear Regression Base Model

In [13]:
from sklearn.linear_model import LinearRegression

lr = LinearRegression()

price_model(lr, "Linear Regression")

Train R2_Score : 0.92
Test R2_Score : 0.92
Train CV Scores : [0.91745106 0.91517033 0.9143163  0.91349809 0.91795702]
Train CV Mean : 0.92


#### Decision Tree Regression Base Model

In [14]:
from sklearn.tree import DecisionTreeRegressor

dtree = DecisionTreeRegressor()

price_model(dtree, "Decision Regression")

Train R2_Score : 1.0
Test R2_Score : 0.88
Train CV Scores : [0.83153813 0.86862442 0.85951833 0.84793556 0.82901001]
Train CV Mean : 0.85


#### Random Forest Regression Base Model

In [15]:
from sklearn.ensemble import RandomForestRegressor

ranreg = RandomForestRegressor()

price_model(ranreg, "Random Forest Regression")

Train R2_Score : 0.99
Test R2_Score : 0.95
Train CV Scores : [0.92775205 0.93964735 0.94168088 0.92970183 0.93303721]
Train CV Mean : 0.93


#### Support Vector Regressor Base Model

In [16]:
from sklearn.svm import SVR

svr = SVR()

price_model(svr, "Support Vector Regressor")

Train R2_Score : 0.94
Test R2_Score : 0.95
Train CV Scores : [0.93852036 0.93940417 0.94185807 0.94551241 0.94627523]
Train CV Mean : 0.94


#### Ridge Regressor Base Model

In [17]:
from sklearn.linear_model import Ridge

ridge = Ridge()

price_model(ridge, "Ridge Regressor")

Train R2_Score : 0.92
Test R2_Score : 0.92
Train CV Scores : [0.9174572  0.91516848 0.91432045 0.91351036 0.91795554]
Train CV Mean : 0.92


#### XGBOOST Regressor Base Model

In [18]:
from xgboost import XGBRegressor

xgbreg = XGBRegressor()

price_model(xgbreg, "Xgb Regressor")

Train R2_Score : 1.0
Test R2_Score : 0.94
Train CV Scores : [0.92770141 0.93363497 0.93842835 0.92573391 0.92936045]
Train CV Mean : 0.93


### BASE MODEL WITH FEATURE SELECTION

In [21]:
from sklearn.feature_selection import SelectKBest
from sklearn.feature_selection import chi2

# Applying SelectKBest to extract top 10 features
bestfeats = SelectKBest(score_func = chi2, k = 10)

fit = bestfeats.fit(X, y)
df_scores = pd.DataFrame(fit.scores_)
dfcolumns = pd.DataFrame(X.columns)

feature_scores = pd.concat([dfcolumns, df_scores], axis = 1)
feature_scores.columns = ['Specs', 'Scores']

print(feature_scores.nlargest(10, 'Scores'))

            Specs         Scores
13            ram  931267.519053
11      px_height   17363.569536
0   battery_power   14129.866576
12       px_width    9810.586750
8       mobile_wt      95.972863
6      int_memory      89.839124
15           sc_w      16.480319
16      talk_time      13.236400
4              fc      10.135166
14           sc_h       9.614878


In [22]:
df.columns

Index(['battery_power', 'blue', 'clock_speed', 'dual_sim', 'fc', 'four_g',
       'int_memory', 'm_dep', 'mobile_wt', 'n_cores', 'pc', 'px_height',
       'px_width', 'ram', 'sc_h', 'sc_w', 'talk_time', 'three_g',
       'touch_screen', 'wifi', 'price_range'],
      dtype='object')

In [23]:
X = df[['battery_power', 'fc',
       'int_memory', 'mobile_wt', 'px_height',
       'px_width', 'ram', 'sc_h', 'sc_w', 'talk_time']]
y = df['price_range']

In [24]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 42)

In [25]:
from sklearn.linear_model import LinearRegression

lr_feat = LinearRegression()

price_model(lr_feat, "Linear Regression")

Train R2_Score : 0.92
Test R2_Score : 0.92
Train CV Scores : [0.91821963 0.91499797 0.91520827 0.91518419 0.91788218]
Train CV Mean : 0.92


In [26]:
from sklearn.tree import DecisionTreeRegressor

dtree_feat = DecisionTreeRegressor()

price_model(dtree_feat, "Decision Regression")

Train R2_Score : 1.0
Test R2_Score : 0.89
Train CV Scores : [0.86630011 0.8737764  0.88416423 0.86265147 0.83937304]
Train CV Mean : 0.87


In [27]:
from sklearn.ensemble import RandomForestRegressor

ranreg_feat = RandomForestRegressor()

price_model(ranreg_feat, "Random Forest Regression")

Train R2_Score : 0.99
Test R2_Score : 0.95
Train CV Scores : [0.93215827 0.94155538 0.9449021  0.93141256 0.93376444]
Train CV Mean : 0.94


In [28]:
from sklearn.svm import SVR

svr_feat = SVR()

price_model(svr_feat, "Support Vector Regressor")

Train R2_Score : 0.94
Test R2_Score : 0.95
Train CV Scores : [0.9386921  0.9392557  0.94166366 0.9454085  0.94647352]
Train CV Mean : 0.94


In [29]:
from xgboost import XGBRegressor

xgbreg_feat = XGBRegressor()

price_model(xgbreg_feat, "Xgb Regressor")

Train R2_Score : 1.0
Test R2_Score : 0.94
Train CV Scores : [0.92809112 0.93438482 0.93733827 0.92641318 0.93503531]
Train CV Mean : 0.93


### BASE MODEL WITH FEATURE SELECTION & SCALING

In [45]:
X = df[['battery_power', 'fc',
       'int_memory', 'mobile_wt', 'px_height',
       'px_width', 'ram', 'sc_h', 'sc_w', 'talk_time']]
y = df['price_range']

In [46]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 42)

In [47]:
from sklearn.preprocessing import StandardScaler

scaled = StandardScaler()

X_train = scaled.fit_transform(X_train)
X_test = scaled.transform(X_test)

In [48]:
from sklearn.linear_model import LinearRegression

lr_feat_sc = LinearRegression()

price_model(lr_feat_sc, "Linear Regression")

Train R2_Score : 0.92
Test R2_Score : 0.92
Train CV Scores : [0.91821963 0.91499797 0.91520827 0.91518419 0.91788218]
Train CV Mean : 0.92


In [49]:
from sklearn.tree import DecisionTreeRegressor

dtree_feat_sc = DecisionTreeRegressor()

price_model(dtree_feat_sc, "Decision Regression")

Train R2_Score : 1.0
Test R2_Score : 0.9
Train CV Scores : [0.85827811 0.86604843 0.88169964 0.86265147 0.84455455]
Train CV Mean : 0.86


In [50]:
from sklearn.ensemble import RandomForestRegressor

ranreg_feat_sc = RandomForestRegressor()

price_model(ranreg_feat_sc, "Random Forest Regression")

Train R2_Score : 0.99
Test R2_Score : 0.95
Train CV Scores : [0.93057473 0.9379423  0.94458713 0.93520362 0.93680132]
Train CV Mean : 0.94
