In [1]:
import pandas as pd
import numpy as np
import sklearn
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
import warnings
warnings.filterwarnings('ignore')
from sklearn.metrics import accuracy_score
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import confusion_matrix
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import classification_report
from sklearn.ensemble import RandomForestClassifier
from sklearn import svm


In [2]:
data=pd.read_csv("/content/mobile_price_range_data.csv")
newdf=data.dropna()
#print(newdf)
x=newdf.iloc[:, :-1]
y=newdf.iloc[:,-1]


In [3]:
X_train, X_test,y_train, y_test = train_test_split(x,y,random_state=23,test_size=0.25, shuffle=True)


In [4]:
print('X_train : ')
print(X_train.head())
print('')
print('X_test : ')
print(X_test.head())
print('')
print('y_train : ')
print(y_train.head())
print('')
print('y_test : ')
print(y_test.head())


X_train : 
      battery_power  blue  clock_speed  dual_sim  fc  four_g  int_memory  \
1697           1152     1          2.1         1   0       1          26   
1260           1097     0          1.4         0   0       0          60   
610            1430     1          0.9         0   3       0          56   
1334           1039     0          0.5         1   8       0          47   
1550           1280     1          1.3         0   0       1          39   

      m_dep  mobile_wt  n_cores  pc  px_height  px_width   ram  sc_h  sc_w  \
1697    0.3         82        2   0       1304      1673  3284    15     2   
1260    0.8        181        1  17        405       519   509    15    12   
610     0.1        179        3   5        569       906   752    17     8   
1334    0.2        130        8   9        384      1878  1742    18    12   
1550    1.0        108        7   1        356       588  2335     6     4   

      talk_time  three_g  touch_screen  wifi  
1697          8 

## Logistic Regression

In [6]:
log_reg = LogisticRegression()
log_reg.fit(X_train,y_train)
log_y_pred = log_reg.predict(X_test)
accuracy_log_reg = accuracy_score(y_test, log_y_pred)*100
print ("Accuracy - Logistic Regression : "
, accuracy_log_reg)
print("Confusion Matrix: "
,confusion_matrix(y_test, log_y_pred))
print("Report : "
,classification_report(y_test, log_y_pred))


Accuracy - Logistic Regression :  65.0
Confusion Matrix:  [[ 95  27   0   0]
 [ 22  63  26   8]
 [  0  24  67  39]
 [  0   2  27 100]]
Report :                precision    recall  f1-score   support

           0       0.81      0.78      0.79       122
           1       0.54      0.53      0.54       119
           2       0.56      0.52      0.54       130
           3       0.68      0.78      0.72       129

    accuracy                           0.65       500
   macro avg       0.65      0.65      0.65       500
weighted avg       0.65      0.65      0.65       500



## KNN Classification

In [7]:
knn = KNeighborsClassifier(n_neighbors = 3)
knn.fit(X_train, y_train)
knn_y_pred = knn.predict(X_test)
accuracy_knn_cls = accuracy_score(y_test, knn_y_pred)*100
print ("Accuracy - KNN Classification : "
,accuracy_knn_cls )
print("Confusion Matrix: "
,confusion_matrix(y_test, knn_y_pred))
print("Report : "
,classification_report(y_test, knn_y_pred))

Accuracy - KNN Classification :  91.2
Confusion Matrix:  [[119   3   0   0]
 [  8  96  15   0]
 [  0   7 115   8]
 [  0   0   3 126]]
Report :                precision    recall  f1-score   support

           0       0.94      0.98      0.96       122
           1       0.91      0.81      0.85       119
           2       0.86      0.88      0.87       130
           3       0.94      0.98      0.96       129

    accuracy                           0.91       500
   macro avg       0.91      0.91      0.91       500
weighted avg       0.91      0.91      0.91       500



## Decision Tree classifier

In [8]:
dtc = DecisionTreeClassifier(criterion = "gini"
,random_state =
100,max_depth=3, min_samples_leaf=5)
dtc.fit(X_train, y_train)
dtc_y_pred = dtc.predict(X_test)
accuracy_dtc = accuracy_score(y_test, dtc_y_pred)*100
print ("Accuracy - Decision Tree classifier : "
,accuracy_dtc )
print("Confusion Matrix: "
,confusion_matrix(y_test, dtc_y_pred))
print("Report : "
,classification_report(y_test, dtc_y_pred))


Accuracy - Decision Tree classifier :  75.4
Confusion Matrix:  [[ 98  24   0   0]
 [ 12  89  18   0]
 [  0  24  67  39]
 [  0   0   6 123]]
Report :                precision    recall  f1-score   support

           0       0.89      0.80      0.84       122
           1       0.65      0.75      0.70       119
           2       0.74      0.52      0.61       130
           3       0.76      0.95      0.85       129

    accuracy                           0.75       500
   macro avg       0.76      0.76      0.75       500
weighted avg       0.76      0.75      0.75       500



## Random Forest classifier

In [9]:
rfc = RandomForestClassifier(n_estimators = 100)
rfc.fit(X_train, y_train)
rfc_y_pred = rfc.predict(X_test)
accuracy_rfc = accuracy_score(y_test, rfc_y_pred)*100
print ("Accuracy - Random Forest classifier : "
,accuracy_rfc )
print("Confusion Matrix: "
,confusion_matrix(y_test, rfc_y_pred))
print("Report : "
,classification_report(y_test, rfc_y_pred))


Accuracy - Random Forest classifier :  87.8
Confusion Matrix:  [[116   6   0   0]
 [ 10  95  14   0]
 [  0  13 105  12]
 [  0   0   6 123]]
Report :                precision    recall  f1-score   support

           0       0.92      0.95      0.94       122
           1       0.83      0.80      0.82       119
           2       0.84      0.81      0.82       130
           3       0.91      0.95      0.93       129

    accuracy                           0.88       500
   macro avg       0.88      0.88      0.88       500
weighted avg       0.88      0.88      0.88       500



## SVM Linear Kernel

In [10]:
svm_lin = svm.SVC(kernel = 'linear'
, random_state = 0, C=1.0)
svm_lin.fit(X_train, y_train)
svm_lin_y_pred = svm_lin.predict(X_test)
accuracy_svm_lin = accuracy_score(y_test, svm_lin_y_pred)*100
print ("Accuracy - SVM Linear Kernel : "
,accuracy_svm_lin)
print("Confusion Matrix: "
,confusion_matrix(y_test,
svm_lin_y_pred))
print("Report : "
,classification_report(y_test, svm_lin_y_pred))

Accuracy - SVM Linear Kernel :  97.2
Confusion Matrix:  [[121   1   0   0]
 [  4 111   4   0]
 [  0   1 128   1]
 [  0   0   3 126]]
Report :                precision    recall  f1-score   support

           0       0.97      0.99      0.98       122
           1       0.98      0.93      0.96       119
           2       0.95      0.98      0.97       130
           3       0.99      0.98      0.98       129

    accuracy                           0.97       500
   macro avg       0.97      0.97      0.97       500
weighted avg       0.97      0.97      0.97       500



## SVM rbf Kernel

In [11]:
svm_rbf = svm.SVC(kernel = 'rbf'
, random_state = 0, C=1.0)
svm_rbf.fit(X_train, y_train)
svm_rbf_y_pred = svm_rbf.predict(X_test)
accuracy_svm_rbf = accuracy_score(y_test, svm_rbf_y_pred)*100
print ("Accuracy - SVM rbf Kernel : "
,accuracy_svm_rbf)
print("Confusion Matrix: "
,confusion_matrix(y_test,
svm_rbf_y_pred))
print("Report : "
,classification_report(y_test, svm_rbf_y_pred))

Accuracy - SVM rbf Kernel :  95.39999999999999
Confusion Matrix:  [[120   2   0   0]
 [  5 108   6   0]
 [  0   3 120   7]
 [  0   0   0 129]]
Report :                precision    recall  f1-score   support

           0       0.96      0.98      0.97       122
           1       0.96      0.91      0.93       119
           2       0.95      0.92      0.94       130
           3       0.95      1.00      0.97       129

    accuracy                           0.95       500
   macro avg       0.95      0.95      0.95       500
weighted avg       0.95      0.95      0.95       500



## Best Accuracy Model

In [12]:
Acc_df = {'Model Name': ['Logistic Regression','KNN Classification','Decision Tree classifier',
'Random Forest classifier','SVM Linear Kernel','SVM RBF Kernel'],
'Accuracy': [accuracy_log_reg, accuracy_knn_cls,
accuracy_dtc, accuracy_rfc,accuracy_svm_lin,accuracy_svm_rbf]}
model_accuracy = pd.DataFrame(Acc_df)
print()
print("Best Accuracy Model: " ,model_accuracy.max())


Best Accuracy Model:  Model Name    SVM RBF Kernel
Accuracy                97.2
dtype: object
