# Import library

In [1]:
import numpy as np
import pandas as pd 
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')

# Load Data

In [2]:
data  = pd.read_csv('../input/mobile-price-classification/train.csv')

# Data Analysis

In [3]:
data.head()

In [4]:
data .info()

In [5]:
data.describe()

# Data Preprocessing

In [6]:
from sklearn.preprocessing import StandardScaler
stand = StandardScaler()
for column in ['battery_power','int_memory','mobile_wt','px_height','px_width','ram']:
    data[column] = stand.fit_transform(data[column].values.reshape(-1,1))

In [7]:
X = data.iloc[:,:-1]
y = data.iloc[:,-1]

In [8]:
X.head()

In [9]:
y.head()

#### Split Data

In [10]:
from sklearn.model_selection import train_test_split
X_train , X_test , y_train ,y_test = train_test_split(X,y,train_size = 0.75 , random_state = 0)

# Logistic Regression

In [11]:
from sklearn.model_selection import cross_validate ,cross_val_score
from sklearn.linear_model import LogisticRegression
lg = LogisticRegression(random_state = 42)
cross_validate(lg , X , y ,cv= 10)

In [12]:
cross_val_score(lg , X , y ,cv= 10).mean()

In [13]:
from sklearn.model_selection import GridSearchCV
grid={"C":np.logspace(-3,3,7), "penalty":["l1","l2"]}
logreg_cv=GridSearchCV(lg,grid,cv=10)
logreg_cv.fit(X_train,y_train)
print("tuned hpyerparameters :(best parameters) ",logreg_cv.best_params_)
print("accuracy :",logreg_cv.best_score_)

# KNN

#### Cross Validation

In [14]:
from sklearn.model_selection import cross_validate ,cross_val_score
from sklearn.neighbors import KNeighborsClassifier
kn = KNeighborsClassifier()
cross_validate(kn , X , y ,cv= 10)

In [15]:
cross_val_score(kn , X , y ,cv= 10).mean()

#### Grid Search

In [16]:
k_range = list(range(1,50))
weight_options = ["uniform", "distance"]
param_grid = dict(n_neighbors = k_range, weights = weight_options)
#print (param_grid)

grid = GridSearchCV(kn, param_grid, cv = 10, scoring = 'accuracy')
grid.fit(X_train,y_train)
print (grid.best_score_)
print (grid.best_params_)

# SVM

#### Cross Validation

In [17]:
from sklearn.svm import SVC
sv = SVC()
cross_validate(sv , X , y ,cv= 10)

In [18]:
cross_val_score(sv , X , y ,cv= 10).mean()

#### Grid Search

In [19]:
params = [
        {'C':[1, 10, 100], 'kernel':['linear', 'sigmoid', 'poly']},
        {'C':[1, 10, 100], 'kernel':['rbf'], 'gamma':[0.5, 0.6, 0.7, 0.1, 0.01, 0.01]}
         ]

grid_search = GridSearchCV(estimator=sv,
                           param_grid=params,
                           scoring='accuracy',
                           cv=10)
grid_search.fit(X_train,y_train)
print (grid_search.best_score_)
print (grid_search.best_params_)

# Decision Tree

#### Cross Validation

In [20]:
from sklearn.ensemble import RandomForestClassifier
dt = RandomForestClassifier(criterion='entropy' , n_estimators = 10 ,random_state = 0)
cross_validate(dt , X , y ,cv= 10)

In [21]:
cross_val_score(dt, X , y ,cv= 10).mean()

#### Grid Search

In [22]:
tree_para = {'criterion':['gini','entropy'],'max_depth':list(range(1,50))}
clf = GridSearchCV(dt, tree_para, cv=5)
clf.fit(X_train,y_train)
print (clf.best_score_)
print (clf.best_params_)

# Random Forest

#### Cross Validation

In [23]:
from sklearn.ensemble import RandomForestClassifier
rf = RandomForestClassifier(criterion='entropy' , n_estimators = 10 ,random_state = 0)
cross_validate(rf , X , y ,cv= 10)

In [24]:
cross_val_score(rf , X , y ,cv= 10).mean()

##### Grid Search

In [25]:
params = [
        {'criterion':['gini', 'entropy'], 'n_estimators':[10,20,30,40,50,60,70,80,90,100,110,120,130,140,150,160,170,180,190]}
         ]
rfc = GridSearchCV(estimator=rf, param_grid=params,scoring='accuracy', cv= 10)
rfc.fit(X_train,y_train)
print (rfc.best_score_)
print (rfc.best_params_)

# GaussianNB

#### Cross Validation

In [26]:
from sklearn.naive_bayes import GaussianNB
nb = GaussianNB()
cross_validate(nb , X , y ,cv= 10)

In [28]:
cross_val_score(nb , X , y ,cv= 10).mean()

# The Best Algorithem is SVM

In [30]:
y_pred = grid_search.predict(X_test)

In [31]:
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(y_test , y_pred)
cm

In [32]:
from sklearn.metrics import classification_report

print(classification_report(y_test, y_pred))

# Price prediction of Test.csv Using Grid Search with svm

In [33]:
df = pd.read_csv('../input/mobile-price-classification/test.csv')

In [34]:
df.head()

In [35]:
df1=df.drop('id', axis = 1)

In [36]:
for column in ['battery_power','int_memory','mobile_wt','px_height','px_width','ram']:
    df1[column] = stand.fit_transform(df[column].values.reshape(-1,1))

In [37]:
test_pred = grid_search.predict(df1)
df['price range'] = test_pred

In [38]:
df.head()