# **Exploring the Effect of Phone Characteristics on Pricing**

### Data https://www.kaggle.com/iabhishekofficial/mobile-price-classification#train.csv

### GitHub: https://github.com/ArielJosephCohen/Mod_5_P

### Presentation: https://docs.google.com/presentation/d/1_igi1IM-bah3LhsbrlclEXndclVpjCQVZg2Q0Xxwx-c/edit#slide=id.g35f391192_00

# **Notebook**

## This line below will avoid getting annoying and pointless warnings throughout the notebook

In [1]:
import warnings
warnings.filterwarnings(action='ignore')

## Assign uniform randomness

In [2]:
seed = 14

## Import custom functions

In [3]:
from helper_module import *
import helper_module as hm

Using TensorFlow backend.


## Load data

In [4]:
df = pd.read_csv('train.csv')

## Review correlation

In [5]:
num_list=hm.create_num_list()
corr_check = hm.check_correlation(df,num_list)
print(f'no multicolinearity exists: {corr_check}')

no multicolinearity exists: True


## RFE

In [6]:
df = hm.reduce_features(df,seed,'price_range',10)

## Filter outliers

In [7]:
df = hm.filter_outliers(df,2.5,'price_range')

## Separate predictors from target

In [8]:
X, y = hm.separate_x_and_y(df,'price_range')

## Normalize features

In [9]:
X = hm.transform_data(X)

## Scale data

In [10]:
X = hm.scale_data(X)

## Train-Test-Split

In [11]:
X_train, X_test, y_train, y_test = hm.validation_split(X, y, seed)

## Training models

In [12]:
lr_scores_train, lr_cm_train = hm.models.logistic_regression_model(X_train,y_train,X_train,y_train,seed)

In [13]:
svc_scores_train, svc_cm_train = hm.models.support_vector_machine_model(X_train,y_train,X_train,y_train,seed)

In [14]:
knn_scores_train, knn_cm_train = hm.models.knn_model(X_train,y_train,X_train,y_train)

In [15]:
gnb_scores_train, gnb_cm_train = hm.models.gaussian_naive_bayes_model(X_train,y_train,X_train,y_train)

In [16]:
lsvc_scores_train, lsvc_cm_train = hm.models.linear_svc_model(X_train,y_train,X_train,y_train,seed)

In [17]:
sgd_scores_train, sgd_cm_train = hm.models.stochastic_gradient_descent_model(X_train,y_train,X_train,y_train,seed)

In [18]:
dt_scores_train, dt_cm_train = hm.models.decision_tree_model(X_train,y_train,X_train,y_train,seed)

In [19]:
rf_scores_train, rf_cm_train = hm.models.random_forest_model(X_train,y_train,X_train,y_train,seed)

In [20]:
xgb_scores_train, xgb_cm_train = hm.models.XGBoost_model(X_train,y_train,X_train,y_train,seed)

## Testing models

In [21]:
lr_scores_test, lr_cm_test = hm.models.logistic_regression_model(X_train,y_train,X_test,y_test,seed)

In [22]:
svc_scores_test, svc_cm_test = hm.models.support_vector_machine_model(X_train,y_train,X_test,y_test,seed)

In [23]:
knn_scores_test, knn_cm_test = hm.models.knn_model(X_train,y_train,X_test,y_test)

In [24]:
gnb_scores_test, gnb_cm_test = hm.models.gaussian_naive_bayes_model(X_train,y_train,X_test,y_test)

In [25]:
lsvc_scores_test, lsvc_cm_test = hm.models.linear_svc_model(X_train,y_train,X_test,y_test,seed)

In [26]:
sgd_scores_test, sgd_cm_test = hm.models.stochastic_gradient_descent_model(X_train,y_train,X_test,y_test,seed)

In [27]:
dt_scores_test, dt_cm_test = hm.models.decision_tree_model(X_train,y_train,X_test,y_test,seed)

In [28]:
rf_scores_test, rf_cm_test = hm.models.random_forest_model(X_train,y_train,X_test,y_test,seed)

In [29]:
xgb_scores_test, xgb_cm_test = hm.models.XGBoost_model(X_train,y_train,X_test,y_test,seed)

## Results summary

In [30]:
test_results = hm.create_summary_dataframe(hm.models.test_results)
train_results = hm.create_summary_dataframe(hm.models.train_results)

In [31]:
feature_importance_df = hm.feature_importance_dataframe(X_train,y_train,seed)

## Decision tree visual

In [37]:
decision_tree_visual = hm.draw_decision_tree(seed,X_train,y_train,3)