In [1]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

import warnings
warnings.filterwarnings('ignore')

In [11]:
pd.set_option('display.max_columns', 25)

In [6]:
path = '../All_data_sets/practice/mobile_prices'
os.listdir(path)

['test.csv', 'train.csv']

In [7]:
train_df = pd.read_csv( os.path.join( path ,'train.csv')  )
test_df = pd.read_csv( os.path.join( path ,'test.csv')  )

## Description of columns
### battery_power -> Total energy a battery can store in one time measured in mAh
### blue -> Has bluetooth or not
### clock_speed -> speed at which microprocessor executes instructions
### dual_sim -> Has dual sim support or not
### fc -> Front Camera mega pixels
### four_g -> Has 4G or not
### int_memory -> Internal Memory in Gigabytes
### m_dep -> Mobile Depth in cm
### mobile_wt -> Weight of mobile phone
### n_cores -> Number of cores of processor
### pc -> Primary Camera mega pixels
### px_height -> Pixel Resolution Height
### px_width -> Pixel Resolution Width
### ram -> Random Access Memory in Mega Bytes
### sc_h -> Screen Height of mobile in cm
### sc_w -> Screen Width of mobile in cm
### talk_time -> longest time that a single battery charge will last when you are
### three_g -> Has 3G or not
### touch_screen -> Has touch screen or not
### wifi -> Has wifi or not
### price_range -> This is the target variable with value of 0(low cost), 1(medium cost), 2(high cost) and 3(very high cost).

In [12]:
train_df.head()

Unnamed: 0,battery_power,blue,clock_speed,dual_sim,fc,four_g,int_memory,m_dep,mobile_wt,n_cores,pc,px_height,px_width,ram,sc_h,sc_w,talk_time,three_g,touch_screen,wifi,price_range
0,842,0,2.2,0,1,0,7,0.6,188,2,2,20,756,2549,9,7,19,0,0,1,1
1,1021,1,0.5,1,0,1,53,0.7,136,3,6,905,1988,2631,17,3,7,1,1,0,2
2,563,1,0.5,1,2,1,41,0.9,145,5,6,1263,1716,2603,11,2,9,1,1,0,2
3,615,1,2.5,0,0,0,10,0.8,131,6,9,1216,1786,2769,16,8,11,1,0,0,2
4,1821,1,1.2,0,13,1,44,0.6,141,2,14,1208,1212,1411,8,2,15,1,1,0,1


In [13]:
test_df.head()

Unnamed: 0,id,battery_power,blue,clock_speed,dual_sim,fc,four_g,int_memory,m_dep,mobile_wt,n_cores,pc,px_height,px_width,ram,sc_h,sc_w,talk_time,three_g,touch_screen,wifi
0,1,1043,1,1.8,1,14,0,5,0.1,193,3,16,226,1412,3476,12,7,2,0,1,0
1,2,841,1,0.5,1,4,1,61,0.8,191,5,12,746,857,3895,6,0,7,1,0,0
2,3,1807,1,2.8,0,1,0,27,0.9,186,3,4,1270,1366,2396,17,10,10,0,1,1
3,4,1546,0,0.5,1,18,1,25,0.5,96,8,20,295,1752,3893,10,0,7,1,1,0
4,5,1434,0,1.4,0,11,1,49,0.5,108,6,18,749,810,1773,15,8,7,1,0,1


In [10]:
print(f"Train Dataset shape:{train_df.shape}")
print(f"Test Dataset shape:{test_df.shape}")

Train Dataset shape:(2000, 21)
Test Dataset shape:(1000, 21)


In [14]:
train_df.isnull().sum()

battery_power    0
blue             0
clock_speed      0
dual_sim         0
fc               0
four_g           0
int_memory       0
m_dep            0
mobile_wt        0
n_cores          0
pc               0
px_height        0
px_width         0
ram              0
sc_h             0
sc_w             0
talk_time        0
three_g          0
touch_screen     0
wifi             0
price_range      0
dtype: int64

In [42]:
test_df.isna().sum()

id               0
battery_power    0
blue             0
clock_speed      0
dual_sim         0
fc               0
four_g           0
int_memory       0
m_dep            0
mobile_wt        0
n_cores          0
pc               0
px_height        0
px_width         0
ram              0
sc_h             0
sc_w             0
talk_time        0
three_g          0
touch_screen     0
wifi             0
dtype: int64

In [43]:
train_df['price_range'].value_counts()

price_range
Medium       500
High         500
Very High    500
Low          500
Name: count, dtype: int64

In [19]:
targets = {
    0:'Low',
    1:'Medium',
    2:'High',
    3:'Very High'
}

In [20]:
train_df['price_range']

0       1
1       2
2       2
3       2
4       1
       ..
1995    0
1996    2
1997    3
1998    0
1999    3
Name: price_range, Length: 2000, dtype: int64

In [21]:
train_df['price_range'] = train_df['price_range'].map(targets)

In [22]:
train_df.head()

Unnamed: 0,battery_power,blue,clock_speed,dual_sim,fc,four_g,int_memory,m_dep,mobile_wt,n_cores,pc,px_height,px_width,ram,sc_h,sc_w,talk_time,three_g,touch_screen,wifi,price_range
0,842,0,2.2,0,1,0,7,0.6,188,2,2,20,756,2549,9,7,19,0,0,1,Medium
1,1021,1,0.5,1,0,1,53,0.7,136,3,6,905,1988,2631,17,3,7,1,1,0,High
2,563,1,0.5,1,2,1,41,0.9,145,5,6,1263,1716,2603,11,2,9,1,1,0,High
3,615,1,2.5,0,0,0,10,0.8,131,6,9,1216,1786,2769,16,8,11,1,0,0,High
4,1821,1,1.2,0,13,1,44,0.6,141,2,14,1208,1212,1411,8,2,15,1,1,0,Medium


In [32]:
X_train = train_df.drop(columns='price_range')
y_train = train_df['price_range']

In [33]:
cat_cols = ['blue' , 'dual_sim' , 'four_g' ,'three_g','touch_screen','wifi']

num_cols = [col for col in X_train.columns if col not in cat_cols]

print(cat_cols)
print(num_cols)

['blue', 'dual_sim', 'four_g', 'three_g', 'touch_screen', 'wifi']
['battery_power', 'clock_speed', 'fc', 'int_memory', 'm_dep', 'mobile_wt', 'n_cores', 'pc', 'px_height', 'px_width', 'ram', 'sc_h', 'sc_w', 'talk_time']


### scale the numerical features

In [28]:
from sklearn.preprocessing import StandardScaler

In [30]:
from sklearn.compose import ColumnTransformer

In [29]:
scale = StandardScaler()

In [34]:
ct = ColumnTransformer(
    transformers=[
        ('scale',scale,num_cols)
    ],
    remainder='passthrough'
)

In [35]:
X_train_transform = ct.fit_transform( X_train )

In [39]:
X_test_transform = ct.transform( test_df )

In [37]:
ct.get_feature_names_out()

array(['scale__battery_power', 'scale__clock_speed', 'scale__fc',
       'scale__int_memory', 'scale__m_dep', 'scale__mobile_wt',
       'scale__n_cores', 'scale__pc', 'scale__px_height',
       'scale__px_width', 'scale__ram', 'scale__sc_h', 'scale__sc_w',
       'scale__talk_time', 'remainder__blue', 'remainder__dual_sim',
       'remainder__four_g', 'remainder__three_g',
       'remainder__touch_screen', 'remainder__wifi'], dtype=object)

In [36]:
X_train_transform

array([[-0.90259726,  0.83077942, -0.76249466, ...,  0.        ,
         0.        ,  1.        ],
       [-0.49513857, -1.2530642 , -0.99289039, ...,  1.        ,
         1.        ,  0.        ],
       [-1.5376865 , -1.2530642 , -0.53209893, ...,  1.        ,
         1.        ,  0.        ],
       ...,
       [ 1.53077336, -0.76274805, -0.76249466, ...,  1.        ,
         1.        ,  0.        ],
       [ 0.62252745, -0.76274805, -0.07130748, ...,  1.        ,
         1.        ,  1.        ],
       [-1.65833069,  0.58562134,  0.15908825, ...,  1.        ,
         1.        ,  1.        ]])

In [40]:
X_test_transform

array([[-0.44505984,  0.34046327,  2.2326498 , ...,  0.        ,
         1.        ,  0.        ],
       [-0.90487356, -1.2530642 , -0.07130748, ...,  1.        ,
         0.        ,  0.        ],
       [ 1.29403758,  1.56625363, -0.76249466, ...,  0.        ,
         1.        ,  1.        ],
       ...,
       [-0.12182446, -0.14985287, -0.76249466, ...,  1.        ,
         0.        ,  0.        ],
       [ 0.67032987, -1.2530642 , -0.99289039, ...,  0.        ,
         1.        ,  0.        ],
       [ 0.07166151, -1.2530642 , -0.07130748, ...,  1.        ,
         0.        ,  1.        ]])

In [38]:
# check out the columns
pd.DataFrame( data=X_train_transform , columns=ct.get_feature_names_out() )

Unnamed: 0,scale__battery_power,scale__clock_speed,scale__fc,scale__int_memory,scale__m_dep,scale__mobile_wt,scale__n_cores,scale__pc,scale__px_height,scale__px_width,scale__ram,scale__sc_h,scale__sc_w,scale__talk_time,remainder__blue,remainder__dual_sim,remainder__four_g,remainder__three_g,remainder__touch_screen,remainder__wifi
0,-0.902597,0.830779,-0.762495,-1.380644,0.340740,1.349249,-1.101971,-1.305750,-1.408949,-1.146784,0.391703,-0.784983,0.283103,1.462493,0.0,0.0,0.0,0.0,0.0,1.0
1,-0.495139,-1.253064,-0.992890,1.155024,0.687548,-0.120059,-0.664768,-0.645989,0.585778,1.704465,0.467317,1.114266,-0.635317,-0.734267,1.0,1.0,1.0,1.0,1.0,0.0
2,-1.537686,-1.253064,-0.532099,0.493546,1.381165,0.134244,0.209639,-0.645989,1.392684,1.074968,0.441498,-0.310171,-0.864922,-0.368140,1.0,1.0,1.0,1.0,1.0,0.0
3,-1.419319,1.198517,-0.992890,-1.215274,1.034357,-0.261339,0.646842,-0.151168,1.286750,1.236971,0.594569,0.876859,0.512708,-0.002014,1.0,0.0,0.0,1.0,0.0,0.0
4,1.325906,-0.395011,2.002254,0.658915,0.340740,0.021220,-1.101971,0.673534,1.268718,-0.091452,-0.657666,-1.022389,-0.864922,0.730240,1.0,0.0,1.0,1.0,1.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1995,-1.011860,-1.253064,-0.992890,-1.656260,1.034357,-0.967737,0.646842,0.673534,1.300273,1.477661,-1.342799,0.164641,-0.405712,1.462493,1.0,1.0,1.0,1.0,1.0,0.0
1996,1.653694,1.321096,-0.992890,0.383299,-1.046495,1.320993,-0.227564,-1.140810,0.608317,1.651235,-0.085031,-0.310171,0.971917,0.913303,1.0,1.0,0.0,1.0,1.0,1.0
1997,1.530773,-0.762748,-0.762495,0.217930,0.687548,-0.911225,1.521249,-1.140810,0.502383,0.880565,0.860139,-0.784983,-1.094526,-1.100394,0.0,1.0,1.0,1.0,1.0,0.0
1998,0.622527,-0.762748,-0.071307,0.769162,-1.393304,0.134244,0.209639,-0.810929,-0.696707,-1.345816,-1.157454,1.351672,0.971917,1.462493,0.0,0.0,1.0,1.0,1.0,1.0


### Preapare a models

In [47]:
# Model metrics
from sklearn.metrics import accuracy_score , f1_score , precision_score , classification_report

In [41]:
from sklearn.linear_model import LogisticRegression

In [45]:
model_lr = LogisticRegression( )

In [46]:
model_lr.fit( X_train_transform , y_train)

0,1,2
,penalty,'l2'
,dual,False
,tol,0.0001
,C,1.0
,fit_intercept,True
,intercept_scaling,1
,class_weight,
,random_state,
,solver,'lbfgs'
,max_iter,100


In [48]:
y_train_pred = model_lr.predict( X_train_transform )

In [55]:
print( f"Training accuracy is:{accuracy_score( y_train , y_train_pred )*100}" )
print( f"Training f1 score is:{f1_score( y_train , y_train_pred , average='weighted')*100}" )
print( f"Training precision score is:{precision_score( y_train , y_train_pred, average='weighted' )*100}" )

Training accuracy is:97.75
Training f1 score is:97.74567897447396
Training precision score is:97.74752269275587


In [54]:
print( classification_report( y_train , y_train_pred ) )

              precision    recall  f1-score   support

        High       0.97      0.96      0.97       500
         Low       0.98      0.99      0.99       500
      Medium       0.98      0.96      0.97       500
   Very High       0.98      0.99      0.98       500

    accuracy                           0.98      2000
   macro avg       0.98      0.98      0.98      2000
weighted avg       0.98      0.98      0.98      2000



In [56]:
y_pre = model_lr.predict( X_test_transform )

In [63]:
y_prediction = pd.DataFrame( data=y_pre , columns=['price_range'])
y_prediction

Unnamed: 0,price_range
0,Very High
1,Very High
2,High
3,Very High
4,Medium
...,...
995,High
996,Medium
997,Low
998,High


In [65]:
pd.concat( [ test_df , y_prediction ] , axis = 1  )

Unnamed: 0,id,battery_power,blue,clock_speed,dual_sim,fc,four_g,int_memory,m_dep,mobile_wt,n_cores,pc,px_height,px_width,ram,sc_h,sc_w,talk_time,three_g,touch_screen,wifi,price_range
0,1,1043,1,1.8,1,14,0,5,0.1,193,3,16,226,1412,3476,12,7,2,0,1,0,Very High
1,2,841,1,0.5,1,4,1,61,0.8,191,5,12,746,857,3895,6,0,7,1,0,0,Very High
2,3,1807,1,2.8,0,1,0,27,0.9,186,3,4,1270,1366,2396,17,10,10,0,1,1,High
3,4,1546,0,0.5,1,18,1,25,0.5,96,8,20,295,1752,3893,10,0,7,1,1,0,Very High
4,5,1434,0,1.4,0,11,1,49,0.5,108,6,18,749,810,1773,15,8,7,1,0,1,Medium
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
995,996,1700,1,1.9,0,0,1,54,0.5,170,7,17,644,913,2121,14,8,15,1,1,0,High
996,997,609,0,1.8,1,0,0,13,0.9,186,4,2,1152,1632,1933,8,1,19,0,1,1,Medium
997,998,1185,0,1.4,0,1,1,8,0.5,80,1,12,477,825,1223,5,0,14,1,0,0,Low
998,999,1533,1,0.5,1,0,0,50,0.4,171,2,12,38,832,2509,15,11,6,0,1,0,High
