# Data Splitting

In [1]:
import pandas as pd
import numpy as np

In [2]:
data=pd.read_csv("train.csv")

In [3]:
data.head()

Unnamed: 0,battery_power,blue,clock_speed,dual_sim,fc,four_g,int_memory,m_dep,mobile_wt,n_cores,...,px_height,px_width,ram,sc_h,sc_w,talk_time,three_g,touch_screen,wifi,price_range
0,842,0,2.2,0,1,0,7,0.6,188,2,...,20,756,2549,9,7,19,0,0,1,1
1,1021,1,0.5,1,0,1,53,0.7,136,3,...,905,1988,2631,17,3,7,1,1,0,2
2,563,1,0.5,1,2,1,41,0.9,145,5,...,1263,1716,2603,11,2,9,1,1,0,2
3,615,1,2.5,0,0,0,10,0.8,131,6,...,1216,1786,2769,16,8,11,1,0,0,2
4,1821,1,1.2,0,13,1,44,0.6,141,2,...,1208,1212,1411,8,2,15,1,1,0,1


In [4]:
data.columns

Index(['battery_power', 'blue', 'clock_speed', 'dual_sim', 'fc', 'four_g',
       'int_memory', 'm_dep', 'mobile_wt', 'n_cores', 'pc', 'px_height',
       'px_width', 'ram', 'sc_h', 'sc_w', 'talk_time', 'three_g',
       'touch_screen', 'wifi', 'price_range'],
      dtype='object')

Here is the attributes of our dataset:

       battery_power: Total energy a battery can store in one time measured in mAh
       blue: Has bluetooth or not
       clock_speed: speed at which microprocessor executes instructions
       dual_sim: Has dual sim support or not
       fc: Front Camera mega pixels
       four_g: Has 4G or not
       int_memory: Internal Memory in Gigabytes
       m_dep: Mobile Depth in cm
       mobile_wt: Weight of mobile phone
       n_cores: Number of cores of processor
       pc: Primary Camera mega pixels
       px_height: Pixel Resolution Height
       px_width: Pixel Resolution Width
       ram: Random Access Memory in Megabytes
       sc_h: Screen Height of mobile in cm
       sc_w: Screen Width of mobile in cm
       talk_time: longest time that a single battery charge will last when you are
       three_g: Has 3G or not
       touch_screen: Has touch screen or not
       wifi: Has wifi or not
       price_range: This is the target variable with value of 0 (low cost), 1 (medium cost), 2 (high cost) and 3 (very high cost)

In [6]:
data['price_range']=data['price_range'].apply(lambda x:"low" if x<2 else "high")

In [7]:
data.head()

Unnamed: 0,battery_power,blue,clock_speed,dual_sim,fc,four_g,int_memory,m_dep,mobile_wt,n_cores,...,px_height,px_width,ram,sc_h,sc_w,talk_time,three_g,touch_screen,wifi,price_range
0,842,0,2.2,0,1,0,7,0.6,188,2,...,20,756,2549,9,7,19,0,0,1,low
1,1021,1,0.5,1,0,1,53,0.7,136,3,...,905,1988,2631,17,3,7,1,1,0,high
2,563,1,0.5,1,2,1,41,0.9,145,5,...,1263,1716,2603,11,2,9,1,1,0,high
3,615,1,2.5,0,0,0,10,0.8,131,6,...,1216,1786,2769,16,8,11,1,0,0,high
4,1821,1,1.2,0,13,1,44,0.6,141,2,...,1208,1212,1411,8,2,15,1,1,0,low


In [8]:
data=data.sample(frac=1.0)
rows, cols = data.shape
split_index_1 = int(rows * 0.1)
split_index_2 = int(rows * 0.2)

In [9]:
data_test:pd.DataFrame = data.iloc[0: split_index_1, :]
data_validate:pd.DataFrame = data.iloc[split_index_1:split_index_2, :]
data_train:pd.DataFrame = data.iloc[split_index_2: rows, :]

In [10]:
data_test.to_csv("test.csv", index=False)
data_validate.to_csv("valid.csv",index=False)
data_train.to_csv("train1.csv", index=False)

# Data Preprocessing

In [11]:
train_data=pd.read_csv("train1.csv")
valid_data=pd.read_csv("valid.csv")
test_data=pd.read_csv("test.csv")

In [12]:
train_data.head()

Unnamed: 0,battery_power,blue,clock_speed,dual_sim,fc,four_g,int_memory,m_dep,mobile_wt,n_cores,...,px_height,px_width,ram,sc_h,sc_w,talk_time,three_g,touch_screen,wifi,price_range
0,1067,1,1.7,1,0,0,58,0.5,135,7,...,79,681,706,12,9,16,0,1,0,low
1,1224,1,1.6,0,9,0,33,1.0,157,1,...,522,563,3796,10,5,13,1,1,0,high
2,1125,1,2.7,0,10,0,23,0.4,117,1,...,774,939,1641,14,8,12,1,1,0,low
3,1867,0,2.3,0,0,1,9,0.1,191,6,...,712,1442,990,6,1,2,1,0,1,low
4,1841,1,1.3,1,6,0,14,0.6,167,8,...,593,672,2304,7,1,18,0,0,1,high


In [13]:
test_data.head()

Unnamed: 0,battery_power,blue,clock_speed,dual_sim,fc,four_g,int_memory,m_dep,mobile_wt,n_cores,...,px_height,px_width,ram,sc_h,sc_w,talk_time,three_g,touch_screen,wifi,price_range
0,921,0,0.6,1,1,0,60,0.1,83,2,...,1211,1229,1080,10,0,6,0,0,1,low
1,1866,1,2.5,1,3,1,47,0.8,89,5,...,358,1782,1444,7,4,20,1,0,0,low
2,757,1,0.9,1,4,0,64,0.5,157,7,...,114,819,3433,6,5,10,1,1,0,high
3,1310,1,2.3,1,6,0,57,0.9,135,8,...,449,889,1175,16,0,8,0,0,0,low
4,1261,1,0.5,1,0,1,11,0.2,90,4,...,858,1591,348,14,9,14,1,0,1,low


In [14]:
valid_data.head()

Unnamed: 0,battery_power,blue,clock_speed,dual_sim,fc,four_g,int_memory,m_dep,mobile_wt,n_cores,...,px_height,px_width,ram,sc_h,sc_w,talk_time,three_g,touch_screen,wifi,price_range
0,633,1,2.2,0,0,1,49,0.1,139,8,...,529,1009,3560,11,1,16,1,1,1,high
1,1784,0,1.6,0,4,0,41,0.4,164,6,...,610,1437,2313,14,1,11,0,1,0,high
2,1139,1,0.9,1,6,1,58,0.5,161,2,...,742,999,1850,9,4,8,1,0,0,low
3,1514,1,1.4,0,3,1,12,0.6,138,3,...,306,538,1037,15,3,15,1,0,1,low
4,703,1,2.7,0,12,1,30,0.3,113,1,...,218,1065,1472,14,7,7,1,0,0,low
