**CLASSIFYING MOBILE PRICE USING VARIOUS CLASSIFICATION MODELS**

**Dataset**: <a href= "https://www.kaggle.com/datasets/iabhishekofficial/mobile-price-classification"> Mobile Price Classification</a> posted on Kaggle by user Abhisek Sharma.


In [None]:
#importing libraries 
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
#importing dataset
pd.set_option('max_columns',None)
df = pd.read_csv('../input/mobile-price-classification/train.csv')
df.head()

In [3]:
#3000 columns and 21 rows
df.shape

In [4]:
df.info()

In [5]:
df.isnull().any()

***NO NULL VALUES***

In [6]:
df.columns

In [8]:
plt.figure(figsize=(12,10))
col = ['blue','dual_sim','four_g','three_g','touch_screen','price_range']
c = 1
for i in col:
    if c < 7:
        plt.subplot(2,3,c)
        sns.countplot(x = i,data =df)
        plt.xlabel(i)
    c += 1
    plt.tight_layout()

In [19]:
plt.figure(figsize=(14,12))
sns.relplot(x = 'price_range',y = 'ram',data = df,kind = 'line')

In [10]:
plt.figure(figsize=(14,12))
sns.relplot(x = 'price_range',y = 'battery_power',data = df,kind = 'line')

In [23]:
plt.figure(figsize=(14,12))
sns.relplot(x = 'price_range',y = 'int_memory',data = df,kind="line")

In [24]:
plt.figure(figsize=(14,12))
sns.stripplot(y = 'ram' , x = 'price_range', data = df,jitter = True)

In [26]:
fig = plt.figure(figsize=(14,12))
sns.heatmap(df.corr(),cmap="Blues")

**NO SIGNIFICANT CORRELATION**

In [27]:
#defining dependent and independent variables
x = df.drop('price_range', axis=1)
y = df['price_range']

In [28]:
#splitting data into training and testing set
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(x, y, test_size = 0.23, random_state = 0)

**APPLYING MODELS**

In [29]:
#Logistic Regression
#training model
from sklearn.linear_model import LogisticRegression
lr = LogisticRegression()
lr.fit(X_train,y_train)

#getting confusion matrix
from sklearn.metrics import confusion_matrix
y_pred = lr.predict(X_test)
cm = confusion_matrix(y_test,y_pred)
print('confusion matrix:\n',cm)

#checking accuracy
from sklearn.metrics import accuracy_score
lra = accuracy_score(y_test,y_pred)
print('accuracy score = ',lra)

In [31]:
#KNN
#training model
from sklearn.neighbors import KNeighborsClassifier
knn = KNeighborsClassifier(n_neighbors = 5, metric = 'minkowski',p = 2)
knn.fit(X_train,y_train)

#getting confusion matrix
from sklearn.metrics import confusion_matrix
y_pred = knn.predict(X_test)
cm = confusion_matrix(y_test,y_pred)
print('confusion matrix:\n',cm)

#checking accuracy
from sklearn.metrics import accuracy_score
knna = accuracy_score(y_test,y_pred)
print('accuracy score = ',accuracy_score(y_test,y_pred))

In [32]:
#SVM
#training model
from sklearn.svm import SVC
svc = SVC(kernel = 'linear',random_state = 0)
svc.fit(X_train,y_train)

#getting confusion matrix
from sklearn.metrics import confusion_matrix
y_pred = svc.predict(X_test)
cm = confusion_matrix(y_test,y_pred)
print('confusion matrix:\n',cm)

#checking accuracy
from sklearn.metrics import accuracy_score
sva =accuracy_score(y_test,y_pred)
print('accuracy score = ',accuracy_score(y_test,y_pred))

In [33]:
#Kernel SVM
#training model
from sklearn.svm import SVC
svc = SVC(kernel = 'rbf', random_state = 0)
svc.fit(X_train,y_train)

#getting confusion matrix
from sklearn.metrics import confusion_matrix
y_pred = svc.predict(X_test)
cm = confusion_matrix(y_test,y_pred)
print('confusion matrix:\n',cm)

#checking accuracy
from sklearn.metrics import accuracy_score
sva2 = accuracy_score(y_test,y_pred)
print('accuracy score = ',accuracy_score(y_test,y_pred))

In [34]:
#applying hyperparameter tuning on SVM
from sklearn.model_selection import GridSearchCV
parameters = [{'C': [0.25, 0.5, 0.75, 1], 'kernel': ['linear']},
              {'C': [0.25, 0.5, 0.75, 1], 'kernel': ['rbf'], 'gamma': [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]}]
grid_search = GridSearchCV(estimator = svc,
                           param_grid = parameters,
                           scoring = 'accuracy',
                           cv = 10,
                           n_jobs = -1)
grid_search.fit(X_train, y_train)
best_accuracy = grid_search.best_score_
best_parameters = grid_search.best_params_
print("Best Accuracy: {:.2f} %".format(best_accuracy*100))
print("Best Parameters:", best_parameters)

In [35]:
#Naive Bayes
#training model
from sklearn.naive_bayes import GaussianNB
nb = GaussianNB()
nb.fit(X_train,y_train)

#getting confusion matrix
from sklearn.metrics import confusion_matrix
y_pred = nb.predict(X_test)
cm = confusion_matrix(y_test,y_pred)
print('confusion matrix:\n',cm)

#checking accuracy
from sklearn.metrics import accuracy_score
nba = accuracy_score(y_test,y_pred)
print('accuracy score = ',accuracy_score(y_test,y_pred))

In [36]:
#Decision Tree
#training model
from sklearn.tree import DecisionTreeClassifier
dt = DecisionTreeClassifier(criterion = 'entropy')
dt.fit(X_train,y_train)

#getting confusion matrix
from sklearn.metrics import confusion_matrix
y_pred = dt.predict(X_test)
cm = confusion_matrix(y_test,y_pred)
print('confusion matrix:\n',cm)

#checking accuracy
from sklearn.metrics import accuracy_score
dta = accuracy_score(y_test,y_pred)
print('accuracy score = ',accuracy_score(y_test,y_pred))

In [37]:
#Random Forest
#training model
from sklearn.ensemble import RandomForestClassifier
rf = RandomForestClassifier(n_estimators = 140, criterion = 'entropy',random_state = 0)
rf.fit(X_train,y_train)

#getting confusion matrix
from sklearn.metrics import confusion_matrix
y_pred = rf.predict(X_test)
cm = confusion_matrix(y_test,y_pred)
print('confusion matrix:\n',cm)

#checking accuracy
from sklearn.metrics import accuracy_score
rfa = accuracy_score(y_test,y_pred)
print('accuracy score = ',accuracy_score(y_test,y_pred))

In [38]:
#XGB Classifier
#training model
from xgboost import XGBClassifier
xc = XGBClassifier(use_label_encoder=False,)
xc.fit(X_train,y_train)

#getting confusion matrix
from sklearn.metrics import confusion_matrix
y_pred = xc.predict(X_test)
cm = confusion_matrix(y_test,y_pred)
print('confusion matrix:\n',cm)

#checking accuracy
from sklearn.metrics import accuracy_score
xca = accuracy_score(y_test,y_pred)
print('accuracy score = ',accuracy_score(y_test,y_pred))

In [39]:
#Cat Boost
#training model
from catboost import CatBoostClassifier
cb = CatBoostClassifier()
cb.fit(X_train,y_train)

#getting confusion matrix
from sklearn.metrics import confusion_matrix
y_pred = cb.predict(X_test)
cm = confusion_matrix(y_test,y_pred)
print('confusion matrix:\n',cm)

#checking accuracy
from sklearn.metrics import accuracy_score
cba = accuracy_score(y_test,y_pred)
print('accuracy score = ',accuracy_score(y_test,y_pred))

In [44]:
#comparing accuracies
plt.figure(figsize= (8,7))
ac = [lra,knna,sva,sva2,nba,dta,rfa,xca,cba,best_accuracy]
name = ['Logistic Regression','knn','svm','Kernel Svm','Naive Bayes','Decision Tree', 'Random Forest','XG Boost','Cat Boost','SVM (Hyperparameter Tuning)']
sns.barplot(x = ac,y = name,palette="tab10")
plt.title("Plotting the Model Accuracies", fontsize=16, fontweight="bold")

BEST ACCURACY IS FROM SVM

In [49]:
ds = pd.DataFrame({'models' : ['Logistic Regression','KNN','SVM','Kernel SVM','Naive Bayes','Decision Tree', 'Random Forest','XG Boost','Cat Boost','SVM (Hyperparameter Tuning)'], 'Accuracy Score':[lra,knna,sva,sva2,nba,dta,rfa,xca,cba,best_accuracy]})

In [48]:
ds.sort_values(by = 'Accuracy Score')