# Mobile price classification.

### Import the necessary libaries.

In [2]:
import pandas as pd
import numpy as np
import plotly.graph_objects as go
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

### Load the required dataset.

In [3]:
data = pd.read_csv('mobile_prices.csv')

### Analyze the dataset.

In [4]:
data.head(10)

Unnamed: 0,battery_power,blue,clock_speed,dual_sim,fc,four_g,int_memory,m_dep,mobile_wt,n_cores,...,px_height,px_width,ram,sc_h,sc_w,talk_time,three_g,touch_screen,wifi,price_range
0,842,0,2.2,0,1,0,7,0.6,188,2,...,20,756,2549,9,7,19,0,0,1,1
1,1021,1,0.5,1,0,1,53,0.7,136,3,...,905,1988,2631,17,3,7,1,1,0,2
2,563,1,0.5,1,2,1,41,0.9,145,5,...,1263,1716,2603,11,2,9,1,1,0,2
3,615,1,2.5,0,0,0,10,0.8,131,6,...,1216,1786,2769,16,8,11,1,0,0,2
4,1821,1,1.2,0,13,1,44,0.6,141,2,...,1208,1212,1411,8,2,15,1,1,0,1
5,1859,0,0.5,1,3,0,22,0.7,164,1,...,1004,1654,1067,17,1,10,1,0,0,1
6,1821,0,1.7,0,4,1,10,0.8,139,8,...,381,1018,3220,13,8,18,1,0,1,3
7,1954,0,0.5,1,0,0,24,0.8,187,4,...,512,1149,700,16,3,5,1,1,1,0
8,1445,1,0.5,0,0,0,53,0.7,174,7,...,386,836,1099,17,1,20,1,0,0,0
9,509,1,0.6,1,2,1,9,0.1,93,5,...,1137,1224,513,19,10,12,1,0,0,0


In [5]:
data.columns.tolist()

['battery_power',
 'blue',
 'clock_speed',
 'dual_sim',
 'fc',
 'four_g',
 'int_memory',
 'm_dep',
 'mobile_wt',
 'n_cores',
 'pc',
 'px_height',
 'px_width',
 'ram',
 'sc_h',
 'sc_w',
 'talk_time',
 'three_g',
 'touch_screen',
 'wifi',
 'price_range']

In [6]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2000 entries, 0 to 1999
Data columns (total 21 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   battery_power  2000 non-null   int64  
 1   blue           2000 non-null   int64  
 2   clock_speed    2000 non-null   float64
 3   dual_sim       2000 non-null   int64  
 4   fc             2000 non-null   int64  
 5   four_g         2000 non-null   int64  
 6   int_memory     2000 non-null   int64  
 7   m_dep          2000 non-null   float64
 8   mobile_wt      2000 non-null   int64  
 9   n_cores        2000 non-null   int64  
 10  pc             2000 non-null   int64  
 11  px_height      2000 non-null   int64  
 12  px_width       2000 non-null   int64  
 13  ram            2000 non-null   int64  
 14  sc_h           2000 non-null   int64  
 15  sc_w           2000 non-null   int64  
 16  talk_time      2000 non-null   int64  
 17  three_g        2000 non-null   int64  
 18  touch_sc

### Check for any missing data.

In [8]:
data.isnull().sum()

battery_power    0
blue             0
clock_speed      0
dual_sim         0
fc               0
four_g           0
int_memory       0
m_dep            0
mobile_wt        0
n_cores          0
pc               0
px_height        0
px_width         0
ram              0
sc_h             0
sc_w             0
talk_time        0
three_g          0
touch_screen     0
wifi             0
price_range      0
dtype: int64

Luckily we have no missing data to deal with!

### Let's have a look at the correlation between the features in the dataset.

In [12]:
heatmap = go.Figure(data = go.Heatmap(x = data.columns, 
                                     y = data.columns,
                                     z = data.corr(numeric_only = True),
                                     colorscale = 'Plotly3'))
heatmap.update_layout(title = 'Correlation between the features.', 
                      xaxis_title = 'Features - X', 
                      yaxis_title = 'Features - Y')

heatmap.show()

![Correlation between the features](https://github.com/Paul1518/Mobile-price-classification/blob/main/Plots/newplot.png?raw=true)


## Data preparation.

### This dataset has no categorical features, so we can just use the dataset without any transformation because all the features in the dataset are numerical.

### Let's now standardize the dataset and divide the data into training and testing data. (4:1)


In [13]:
# # Extract features (X) and target (y) from the DataFrame
# We're selecting all rows and all columns except the last column.
x = data.iloc[:, :-1].values # We're selecting every features except the target variable which is the last variable.
# We're selecting all rows and only the last column which is the target variable.
y = data.iloc[:, -1]

# Initialize the StandardScaler
scaler = StandardScaler()

# Fit and transform the feature data(x) to bring them to a similar scale
x = scaler.fit_transform(x)
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.20, random_state = 0)

### Now let's train the mobile price classification model. As this is a problem of classification, I'll be using the Logistic Regression algorithm.

In [14]:
lreg = LogisticRegression()
lreg.fit(x_train, y_train)
y_pred = lreg.predict(x_test)

### Now let's have a look at the accuracy of the model.

In [16]:
accuracy = accuracy_score(y_test, y_pred) * 100
print('Accuracy pf the Logistic Regression Model is :', accuracy)

Accuracy pf the Logistic Regression Model is : 95.5


### So the model gives an accuracy of about 95.5% which is great.
### Now let's have a look at the predictions made by the model.

In [18]:
y_pred.tolist()

[3,
 0,
 2,
 2,
 3,
 0,
 0,
 3,
 3,
 1,
 1,
 3,
 0,
 2,
 3,
 0,
 3,
 2,
 2,
 1,
 0,
 0,
 3,
 1,
 2,
 2,
 3,
 1,
 3,
 1,
 1,
 0,
 2,
 0,
 2,
 3,
 0,
 0,
 3,
 3,
 3,
 1,
 3,
 3,
 1,
 3,
 0,
 1,
 3,
 1,
 1,
 3,
 0,
 3,
 0,
 2,
 2,
 2,
 0,
 3,
 3,
 1,
 3,
 2,
 1,
 2,
 3,
 2,
 2,
 2,
 3,
 2,
 1,
 0,
 1,
 3,
 2,
 2,
 1,
 2,
 3,
 3,
 3,
 0,
 0,
 0,
 2,
 1,
 2,
 3,
 1,
 2,
 2,
 1,
 0,
 3,
 3,
 3,
 0,
 3,
 1,
 1,
 3,
 1,
 3,
 2,
 2,
 3,
 2,
 3,
 3,
 0,
 0,
 1,
 3,
 3,
 0,
 0,
 1,
 0,
 0,
 3,
 2,
 2,
 1,
 2,
 1,
 1,
 0,
 2,
 1,
 3,
 3,
 3,
 3,
 3,
 3,
 2,
 0,
 1,
 1,
 2,
 1,
 3,
 0,
 3,
 0,
 0,
 2,
 0,
 1,
 1,
 1,
 1,
 3,
 0,
 0,
 3,
 1,
 3,
 2,
 1,
 3,
 1,
 2,
 3,
 3,
 2,
 1,
 0,
 3,
 1,
 2,
 3,
 3,
 0,
 2,
 2,
 3,
 1,
 2,
 1,
 0,
 1,
 2,
 2,
 2,
 0,
 3,
 3,
 1,
 1,
 0,
 2,
 3,
 0,
 1,
 2,
 2,
 0,
 3,
 3,
 3,
 1,
 2,
 3,
 3,
 3,
 0,
 0,
 0,
 2,
 3,
 3,
 0,
 0,
 1,
 3,
 2,
 3,
 3,
 3,
 0,
 0,
 2,
 3,
 3,
 1,
 0,
 2,
 0,
 0,
 0,
 3,
 2,
 1,
 2,
 2,
 1,
 1,
 0,
 2,
 3,
 3,
 0,
 0,
 1,
 3,
 3,
 1,


### The above output shows the price range classified by the model. Let's have a look at the number of mobile phones classified for each price range.

In [19]:
(unique, counts) = np.unique(y_pred, return_counts = True)
price_range = np.asarray((unique, counts)).T
price_range

array([[  0,  95],
       [  1,  90],
       [  2,  97],
       [  3, 118]], dtype=int64)

---