In [2]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report , confusion_matrix 



In [3]:
#load the Data
df= pd.read_csv("dataset.csv")

In [4]:
#Basic EDA
print(df.info())
print(df.describe())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2000 entries, 0 to 1999
Data columns (total 21 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   battery_power  2000 non-null   int64  
 1   blue           2000 non-null   int64  
 2   clock_speed    2000 non-null   float64
 3   dual_sim       2000 non-null   int64  
 4   fc             2000 non-null   int64  
 5   four_g         2000 non-null   int64  
 6   int_memory     2000 non-null   int64  
 7   m_dep          2000 non-null   float64
 8   mobile_wt      2000 non-null   int64  
 9   n_cores        2000 non-null   int64  
 10  pc             2000 non-null   int64  
 11  px_height      2000 non-null   int64  
 12  px_width       2000 non-null   int64  
 13  ram            2000 non-null   int64  
 14  sc_h           2000 non-null   int64  
 15  sc_w           2000 non-null   int64  
 16  talk_time      2000 non-null   int64  
 17  three_g        2000 non-null   int64  
 18  touch_sc

In [5]:
#Handleing Missing Values
print(df.isnull().sum())

battery_power    0
blue             0
clock_speed      0
dual_sim         0
fc               0
four_g           0
int_memory       0
m_dep            0
mobile_wt        0
n_cores          0
pc               0
px_height        0
px_width         0
ram              0
sc_h             0
sc_w             0
talk_time        0
three_g          0
touch_screen     0
wifi             0
price_range      0
dtype: int64


In [6]:
#Feature Extraction
X= df.drop(columns="price_range")
y=df['price_range']

In [8]:
#Train Test Split

X_train ,X_test, y_train , y_test = train_test_split(X,y ,test_size= 0.3, random_state=42)

In [9]:
#Feature Scaleing
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test= scaler.transform(X_test)

In [11]:
# Step 7: Model Training
clf= RandomForestClassifier(random_state=42)
clf.fit(X_train, y_train)


In [12]:
#Model Evaluation
y_pred= clf.predict(X_test)
print("confusion_matrix")
print(confusion_matrix(y_test, y_pred))
print("Classification Report:")
print(classification_report(y_test, y_pred))


confusion_matrix
[[144   7   0   0]
 [ 10 120  16   0]
 [  0  21 115  12]
 [  0   0  13 142]]
Classification Report:
              precision    recall  f1-score   support

           0       0.94      0.95      0.94       151
           1       0.81      0.82      0.82       146
           2       0.80      0.78      0.79       148
           3       0.92      0.92      0.92       155

    accuracy                           0.87       600
   macro avg       0.87      0.87      0.87       600
weighted avg       0.87      0.87      0.87       600



In [None]:
# Step 9: Feature Importance
feature_importances = pd.DataFrame({
    'Feature': X.columns,
    'Importance': clf.feature_importances_
}).sort_values(by='Importance', ascending=False)
print("Feature Importances:")
print(feature_importances)


Feature Importances:
          Feature  Importance
13            ram    0.458098
0   battery_power    0.073734
12       px_width    0.059472
11      px_height    0.057745
6      int_memory    0.040384
8       mobile_wt    0.040155
10             pc    0.031049
16      talk_time    0.030855
2     clock_speed    0.030386
15           sc_w    0.029968
14           sc_h    0.029481
4              fc    0.027306
7           m_dep    0.024602
9         n_cores    0.023988
3        dual_sim    0.007676
19           wifi    0.007420
1            blue    0.007279
18   touch_screen    0.007040
5          four_g    0.006971
17        three_g    0.006391


In [14]:
from sklearn.model_selection import GridSearchCV

param_grid = {
    'n_estimators': [100, 200, 300],
    'max_depth': [None, 10, 20, 30],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4]
}
grid_search = GridSearchCV(estimator=clf, param_grid=param_grid, cv=3, scoring='accuracy', verbose=2, n_jobs=-1)
grid_search.fit(X_train, y_train)
print("Best Parameters:", grid_search.best_params_)


Fitting 3 folds for each of 108 candidates, totalling 324 fits
Best Parameters: {'max_depth': None, 'min_samples_leaf': 1, 'min_samples_split': 5, 'n_estimators': 200}


In [17]:
#Using Logestic Regression
from sklearn.linear_model import LogisticRegression
log_reg = LogisticRegression(random_state=42, max_iter=1000)
log_reg.fit(X_train, y_train)
y_pred_log_reg = log_reg.predict(X_test)
print("Logistic Regression Confusion Matrix:")
print(confusion_matrix(y_test, y_pred_log_reg))
print("Logistic Regression Classification Report:")
print(classification_report(y_test, y_pred_log_reg))

Logistic Regression Confusion Matrix:
[[144   7   0   0]
 [  3 142   1   0]
 [  0   7 135   6]
 [  0   0   1 154]]
Logistic Regression Classification Report:
              precision    recall  f1-score   support

           0       0.98      0.95      0.97       151
           1       0.91      0.97      0.94       146
           2       0.99      0.91      0.95       148
           3       0.96      0.99      0.98       155

    accuracy                           0.96       600
   macro avg       0.96      0.96      0.96       600
weighted avg       0.96      0.96      0.96       600



In [18]:
from sklearn.svm import SVC

svm_clf= SVC(random_state=42)
svm_clf.fit(X_train, y_train)
y_pred_svm= svm_clf.predict(X_test)
print("SVM Confusion Matrix:")
print(confusion_matrix(y_test, y_pred_svm))
print("SVM Classification Report:")
print(classification_report(y_test, y_pred_svm))


SVM Confusion Matrix:
[[135  16   0   0]
 [ 14 123   9   0]
 [  0  19 122   7]
 [  0   0  13 142]]
SVM Classification Report:
              precision    recall  f1-score   support

           0       0.91      0.89      0.90       151
           1       0.78      0.84      0.81       146
           2       0.85      0.82      0.84       148
           3       0.95      0.92      0.93       155

    accuracy                           0.87       600
   macro avg       0.87      0.87      0.87       600
weighted avg       0.87      0.87      0.87       600



In [19]:
# Step 9: Neural Network
from sklearn.neural_network import MLPClassifier
nn_clf = MLPClassifier(random_state=42, max_iter=1000)
nn_clf.fit(X_train, y_train)
y_pred_nn = nn_clf.predict(X_test)
print("Neural Network Confusion Matrix:")
print(confusion_matrix(y_test, y_pred_nn))
print("Neural Network Classification Report:")
print(classification_report(y_test, y_pred_nn))


Neural Network Confusion Matrix:
[[138  13   0   0]
 [  9 134   3   0]
 [  0  11 128   9]
 [  0   0  14 141]]
Neural Network Classification Report:
              precision    recall  f1-score   support

           0       0.94      0.91      0.93       151
           1       0.85      0.92      0.88       146
           2       0.88      0.86      0.87       148
           3       0.94      0.91      0.92       155

    accuracy                           0.90       600
   macro avg       0.90      0.90      0.90       600
weighted avg       0.90      0.90      0.90       600



In [20]:
# Step 10: Gradient Descent (Using Logistic Regression as an Example)
gd_clf = LogisticRegression(random_state=42, solver='saga', max_iter=1000)
gd_clf.fit(X_train, y_train)
y_pred_gd = gd_clf.predict(X_test)
print("Gradient Descent Confusion Matrix:")
print(confusion_matrix(y_test, y_pred_gd))
print("Gradient Descent Classification Report:")
print(classification_report(y_test, y_pred_gd))


Gradient Descent Confusion Matrix:
[[144   7   0   0]
 [  3 142   1   0]
 [  0   7 135   6]
 [  0   0   1 154]]
Gradient Descent Classification Report:
              precision    recall  f1-score   support

           0       0.98      0.95      0.97       151
           1       0.91      0.97      0.94       146
           2       0.99      0.91      0.95       148
           3       0.96      0.99      0.98       155

    accuracy                           0.96       600
   macro avg       0.96      0.96      0.96       600
weighted avg       0.96      0.96      0.96       600



In [21]:
# Using Logestic reg And gradient Descent Algo we Got more Accurancy than others



import joblib  # For saving models
import h5py  # For saving to HDF5

joblib.dump(gd_clf, 'gradient_descent.h5')


['gradient_descent.h5']