In [1]:
import pandas as pd
import numpy as np
import warnings
warnings.filterwarnings('ignore')
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split,GridSearchCV,KFold
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LogisticRegression
from xgboost import XGBClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score,confusion_matrix,roc_auc_score,classification_report

In [2]:
scaler = StandardScaler()

In [3]:
df = pd.read_csv('D:\Internship\Mobile_price_prediction_working\dataset\dataset.csv')

In [4]:
df.head()

Unnamed: 0,battery_power,blue,clock_speed,dual_sim,fc,four_g,int_memory,m_dep,mobile_wt,n_cores,...,px_height,px_width,ram,sc_h,sc_w,talk_time,three_g,touch_screen,wifi,price_range
0,842,0,2.2,0,1,0,7,0.6,188,2,...,20,756,2549,9,7,19,0,0,1,1
1,1021,1,0.5,1,0,1,53,0.7,136,3,...,905,1988,2631,17,3,7,1,1,0,2
2,563,1,0.5,1,2,1,41,0.9,145,5,...,1263,1716,2603,11,2,9,1,1,0,2
3,615,1,2.5,0,0,0,10,0.8,131,6,...,1216,1786,2769,16,8,11,1,0,0,2
4,1821,1,1.2,0,13,1,44,0.6,141,2,...,1208,1212,1411,8,2,15,1,1,0,1


In [5]:
num_col = ['battery_power','clock_speed','fc','int_memory','m_dep','mobile_wt','n_cores','pc','px_height','px_width','ram','sc_h','sc_w','talk_time']
cat_col = ['blue','dual_sim','four_g','three_g','touch_screen','wifi']
target = 'price_range'

In [6]:
X = df.drop('price_range',axis=1)
y = df['price_range']

In [7]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=42)

In [8]:
preprocessor = ColumnTransformer(transformers=[
    ('Scaling',scaler,num_col)
])

In [9]:
classifiers = {
    "LogisticRegression":LogisticRegression(),
    "RandomForestClassifier":RandomForestClassifier(random_state=42,n_estimators=1000),
    "XGClassifier":XGBClassifier()
}

In [11]:
for name,clf in classifiers.items():
    model = Pipeline(steps=[
        ('Preprocessing',preprocessor),
        ('Classifier',clf)
    ])
    model.fit(X_train,y_train)
    y_pred = model.predict(X_test)

    print(f"\n-------{name}-------")
    print("\nAccuracy: ",accuracy_score(y_test,y_pred))
    print("\n",classification_report(y_test,y_pred))
    print("Confusion Matrix: \n",confusion_matrix(y_test,y_pred))


-------LogisticRegression-------

Accuracy:  0.985

               precision    recall  f1-score   support

           0       1.00      0.99      1.00       105
           1       0.96      1.00      0.98        91
           2       1.00      0.95      0.97        92
           3       0.98      1.00      0.99       112

    accuracy                           0.98       400
   macro avg       0.99      0.98      0.98       400
weighted avg       0.99      0.98      0.98       400

Confusion Matrix: 
 [[104   1   0   0]
 [  0  91   0   0]
 [  0   3  87   2]
 [  0   0   0 112]]

-------RandomForestClassifier-------

Accuracy:  0.8875

               precision    recall  f1-score   support

           0       0.94      0.95      0.95       105
           1       0.86      0.87      0.86        91
           2       0.80      0.84      0.82        92
           3       0.93      0.88      0.91       112

    accuracy                           0.89       400
   macro avg       0.88      