# Mobile Price Classification

### Dataset description

1. id  ID
2. battery_power   Total energy a battery can store in one time measured in mAh
3. blue    Has bluetooth or not
4. clock_speed speed at which microprocessor executes instructions
5. dual_sim    Has dual sim support or not
6. fc  Front Camera mega pixels
7. four_g  Has 4G or not
8. int_memory  Internal Memory in Gigabytes
9. m_dep   Mobile Depth in cm
10. mobile_wt   Weight of mobile phone
11. n_cores Number of cores of processor
12. pc  Primary Camera mega pixels
13. px_height   Pixel Resolution Height
14. px_width    Pixel Resolution Width
15. ram Random Access Memory in Megabytes
16. sc_h    Screen Height of mobile in cm
17. sc_w    Screen Width of mobile in cm
18. talk_time   longest time that a single battery charge will last when you are
19. three_g Has 3G or not
20. touch_screenH   as touch screen or not
21. wifi    Has wifi or not


In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import StandardScaler

## Loading dataset

In [None]:
ds = pd.read_csv('../dataset/train.csv')
ds.head()

In [None]:
y = ds['price_range']
X = ds.drop(columns=['price_range'])
ds.describe()

In [None]:
scaler = StandardScaler()
df_corr = pd.DataFrame(scaler.fit_transform(ds), columns = ds.columns)
corr = df_corr.corr()
plt.figure(figsize=(25,11))
sns.heatmap(corr, cmap = 'inferno')
plt.show()

In [None]:
from sklearn import svm
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split, cross_val_score, GridSearchCV

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.7)

In [None]:
# Evaluacion simple con el parametro C default del SVM (C=1)

#clf = svm.SVC(kernel = 'linear')
#clf = svm.SVC(kernel='poly', degree=8, gamma = 'scale')
#clf = svm.SVC(kernel='rbf', gamma = 'scale')
#clf = svm.SVC(kernel='sigmoid', gamma = 'scale')
#clf.fit(X_train, y_train)
#y_pred = clf.predict(X_test)

# Validación Cruzada

In [None]:
# Evaluacion con la búsqueda de mejores parámetros

# Colocar paramteros por validacion cruzada   
parametros = {'gamma': [1e-2, 1e-3, 1e-4, 1e-5],'C': [0.001, 0.10, 0.1, 10, 25, 50, 100, 1000]}

#modelo = svm.SVC(kernel = 'linear')
#modelo = svm.SVC(kernel = 'poly')
modelo = svm.SVC(kernel = 'rbf')
#modelo = svm.SVC(kernel = 'sigmoid')
clf = GridSearchCV(modelo, parametros, cv=4) 
clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)
print(clf.best_params_)

# Resultados de la validación cruzada
resultado = cross_val_score(clf, X_train, y_train, cv=4)
print(resultado)

# Graficación de los kernels

In [None]:
def make_meshgrid(x, y, h=.02):
    x_min, x_max = x.min() - 1, x.max() + 1
    y_min, y_max = y.min() - 1, y.max() + 1
    xx, yy = np.meshgrid(np.arange(x_min, x_max, h),
                         np.arange(y_min, y_max, h))
    return xx, yy


def plot_contours(ax, clf, xx, yy, **params):
    Z = clf.predict(np.c_[xx.ravel(), yy.ravel()])
    Z = Z.reshape(xx.shape)
    out = ax.contourf(xx, yy, Z, **params)
    return out


## import some data to play with
# iris = datasets.load_iris()
## Take the first two features. We could avoid this by using a two-dim dataset
X=ds.iloc[:, :2].values
y = ds['price_range']

#y = ds['price_range']
#X = ds.drop(columns=['price_range'])
#X = ds.drop(columns=['wifi'])

#X_train, X_test, y_train, y_test

# we create an instance of SVM and fit out data. We do not scale our
# data since we want to plot the support vectors
C = 1.0  # SVM regularization parameter
models = (svm.SVC(kernel='linear', C=C),
          svm.SVC(kernel='rbf', gamma=0.7, C=C),
          svm.SVC(kernel='poly', degree=3, C=C),
          svm.SVC(kernel='sigmoide', degree=3, C=C))
models = (clf.fit(X, y) for clf in models)

# title for the plots
titles = ('SVC with linear kernel',
          'SVC with RBF kernel',
          'SVC with polynomial (degree 3) kernel',
         'SVC with sigmoid (degree 3) kernel')

# Set-up 2x2 grid for plotting.
fig, sub = plt.subplots(2, 2)
plt.subplots_adjust(wspace=0.4, hspace=0.4)

X0, X1 = X[:, 0], X[:, 1]
xx, yy = make_meshgrid(X0, X1)

for clf, title, ax in zip(models, titles, sub.flatten()):
    plot_contours(ax, clf, xx, yy,
                  cmap=plt.cm.coolwarm, alpha=0.8)
    ax.scatter(X0, X1, c=y, cmap=plt.cm.coolwarm, s=20, edgecolors='k')
    ax.set_xlim(xx.min(), xx.max())
    ax.set_ylim(yy.min(), yy.max())
    ax.set_xlabel('Sepal length')
    ax.set_ylabel('Sepal width')
    ax.set_xticks(())
    ax.set_yticks(())
    ax.set_title(title)

plt.show()

# Confusion matrix

In [None]:
def plot_confusion_matrix(cm, classes,
                          normalize=False,
                          title='Confusion matrix',
                          cmap=plt.cm.Blues):
    """
    This function prints and plots the confusion matrix.
    Normalization can be applied by setting `normalize=True`.
    """
    if normalize:
        cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
        print("Normalized confusion matrix")
    else:
        print('Confusion matrix, without normalization')

    print(cm)

    plt.imshow(cm, interpolation='nearest', cmap=cmap)
    plt.title(title)
    plt.colorbar()
    tick_marks = np.arange(len(classes))
    plt.xticks(tick_marks, classes, rotation=45)
    plt.yticks(tick_marks, classes)

    fmt = '.2f' if normalize else 'd'
    thresh = cm.max() / 2.
    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
        plt.text(j, i, format(cm[i, j], fmt),
                 horizontalalignment="center",
                 color="white" if cm[i, j] > thresh else "black")

    plt.ylabel('True label')
    plt.xlabel('Predicted label')
    plt.tight_layout()

In [None]:
from sklearn.metrics import accuracy_score, confusion_matrix
import itertools

cnf_matrix = confusion_matrix(y_test, y_pred)
class_names = ['very cheap','affordable','moderate','expensive']
np.set_printoptions(precision=2)

# Plot non-normalized confusion matrix
plt.figure()
plot_confusion_matrix(cnf_matrix, classes=class_names,
                      title='Confusion matrix, without normalization')

# Plot normalized confusion matrix
plt.figure()
plot_confusion_matrix(cnf_matrix, classes=class_names, normalize=True,
                      title='Normalized confusion matrix')
plt.show()


In [None]:
accuracy_score(y_test, y_pred)