In [217]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [218]:
data = pd.read_csv('mobile_price_classification.csv')

In [219]:
data.head()

Unnamed: 0,battery_power,bluetooth,clock_speed,dual_sim,front_cam,4G,int_memory,m_dep,mobile_wt,n_cores,...,px_height,px_width,ram,sc_h,sc_w,talk_time,three_g,touch_screen,wifi,price_range
0,842,0,2.2,0,1,0,7,0.6,188,2,...,20,756,2549,9,7,19,0,0,1,1
1,1021,1,0.5,1,0,1,53,0.7,136,3,...,905,1988,2631,17,3,7,1,1,0,2
2,563,1,0.5,1,2,1,41,0.9,145,5,...,1263,1716,2603,11,2,9,1,1,0,2
3,615,1,2.5,0,0,0,10,0.8,131,6,...,1216,1786,2769,16,8,11,1,0,0,2
4,1821,1,1.2,0,13,1,44,0.6,141,2,...,1208,1212,1411,8,2,15,1,1,0,1


In [220]:
data.shape

(2000, 21)

In [221]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2000 entries, 0 to 1999
Data columns (total 21 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   battery_power   2000 non-null   int64  
 1   bluetooth       2000 non-null   int64  
 2   clock_speed     2000 non-null   float64
 3   dual_sim        2000 non-null   int64  
 4   front_cam       2000 non-null   int64  
 5   4G              2000 non-null   int64  
 6   int_memory      2000 non-null   int64  
 7   m_dep           2000 non-null   float64
 8   mobile_wt       2000 non-null   int64  
 9   n_cores         2000 non-null   int64  
 10  primary_camera  2000 non-null   int64  
 11  px_height       2000 non-null   int64  
 12  px_width        2000 non-null   int64  
 13  ram             2000 non-null   int64  
 14  sc_h            2000 non-null   int64  
 15  sc_w            2000 non-null   int64  
 16  talk_time       2000 non-null   int64  
 17  three_g         2000 non-null   i

In [222]:
data.duplicated().sum()

0

In [223]:
x = data.drop(['price_range'], axis=1)
y = data['price_range']

In [224]:
y = pd.get_dummies(y) # one-hot encoding the target column
y.value_counts()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,count
0,1,2,3,Unnamed: 4_level_1
False,False,False,True,500
False,False,True,False,500
False,True,False,False,500
True,False,False,False,500


In [225]:
data['price_range'].value_counts()

Unnamed: 0_level_0,count
price_range,Unnamed: 1_level_1
1,500
2,500
3,500
0,500


In [226]:
print('x ->',x.shape)
print('y ->',y.shape)

x -> (2000, 20)
y -> (2000, 4)


In [227]:
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(x,y,test_size=0.2,random_state=42)

In [228]:
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
x_train = sc.fit_transform(x_train)
x_test = sc.fit_transform(x_test)

# ANN with hyperparameter tuning

In [229]:
pip install tensorflow keras-tuner



In [230]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
import keras_tuner as kt

In [231]:
#Create a function to build the ANN model
def build_model(hp):
  model = Sequential()
  model.add(Dense(hp.Int('units',min_value=16,max_value=128,step=16),activation='relu',input_dim=20)) # input_dim=20 for 20 features
  for i in range(hp.Int('num_layers',1,3)):
    model.add(Dense(hp.Int(f'units_{i}',min_value=16,max_value=128,step=16),activation='relu'))
  model.add(Dense(4, activation='softmax'))# For multi-class classification with 4 classes
  model.compile(
      optimizer = tf.keras.optimizers.get({
          'class_name': hp.Choice('optimizer',values=['adam','rmsprop','sgd']),
          'config' : {'learning_rate':hp.Float('learning_rate',min_value=1e-4,max_value=1e-2,sampling= 'log')}
      }),
      loss = 'categorical_crossentropy',
      metrics = ['accuracy']
    )
  return model

In [232]:
# Define the tuner
tuner = kt.RandomSearch(
    build_model,
    objective = 'val_accuracy',
    max_trials = 5
)

Reloading Tuner from ./untitled_project/tuner0.json


In [233]:
# Training the tuner
tuner.search(
    x_train,y_train,
    epochs =20,
    validation_split = 0.2,
    verbose = 1
)

In [234]:
best_hps = tuner.get_best_hyperparameters(1)[0]
print('Units in 1st layer',best_hps.get('units'))
print('No of layers',best_hps.get('num_layers'))
for i in range(best_hps.get('num_layers')):
  print(f"units in layer {i+1}:{best_hps.get(f'units_{i}')}")
print('optimizer :',best_hps.get('optimizer'))
print('Learning rate :',best_hps.get('learning_rate'))

Units in 1st layer 80
No of layers 3
units in layer 1:64
units in layer 2:32
units in layer 3:128
optimizer : rmsprop
Learning rate : 0.002912350224944571


In [235]:
# Rebuild the model with the best hyperparameters
model = build_model(best_hps)

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [236]:
#Training the model
model.fit(x_train,y_train,batch_size=10, epochs=100,
          callbacks=[tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=3)]  # Early stopping to avoid overfitting
          )

Epoch 1/100
[1m160/160[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - accuracy: 0.5911 - loss: 0.8840
Epoch 2/100


  current = self.get_monitor_value(logs)


[1m160/160[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.8582 - loss: 0.3374
Epoch 3/100
[1m160/160[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.9144 - loss: 0.2253
Epoch 4/100
[1m160/160[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.9111 - loss: 0.2255
Epoch 5/100
[1m160/160[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.9150 - loss: 0.1879
Epoch 6/100
[1m160/160[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.9400 - loss: 0.1494
Epoch 7/100
[1m160/160[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.9561 - loss: 0.1347
Epoch 8/100
[1m160/160[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.9482 - loss: 0.1229
Epoch 9/100
[1m160/160[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.9727 - loss: 0.0913
Epoch 10/100
[1m160/160[0m [32m━━━━━━━━━━

<keras.src.callbacks.history.History at 0x7fed03499810>

In [237]:
# Evaluate the model on the test dataset
test_loss, test_acc = model.evaluate(x_test, y_test)

[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.9200 - loss: 0.9960  


In [238]:
# Make predictions
y_pred = model.predict(x_test)

[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step


In [239]:
# Get predicted class labels (index of the highest probability)
predicted_classes = np.argmax(y_pred, axis=1)

# For multi-class classification, y_test should be in integer form (not one-hot encoded)
true_classes = np.argmax(y_test, axis=1)  # Convert one-hot encoded labels to class indices


In [240]:
# Calculate accuracy using sklearn's accuracy_score function
from sklearn.metrics import accuracy_score
accuracy = accuracy_score(true_classes, predicted_classes)
accuracy

0.9