# Import Necessary Library

In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

import warnings
warnings.filterwarnings('ignore')

# 1. Data Collection

In [2]:
company = pd.read_csv('Company_Data (1).csv')
company

Unnamed: 0,Sales,CompPrice,Income,Advertising,Population,Price,ShelveLoc,Age,Education,Urban,US
0,9.50,138,73,11,276,120,Bad,42,17,Yes,Yes
1,11.22,111,48,16,260,83,Good,65,10,Yes,Yes
2,10.06,113,35,10,269,80,Medium,59,12,Yes,Yes
3,7.40,117,100,4,466,97,Medium,55,14,Yes,Yes
4,4.15,141,64,3,340,128,Bad,38,13,Yes,No
...,...,...,...,...,...,...,...,...,...,...,...
395,12.57,138,108,17,203,128,Good,33,14,Yes,Yes
396,6.14,139,23,3,37,120,Medium,55,11,No,Yes
397,7.41,162,26,12,368,159,Medium,40,18,Yes,Yes
398,5.94,100,79,7,284,95,Bad,50,12,Yes,Yes


# 3. Data Undersatnding

In [3]:
print(company.shape)
print(company.isna().sum())
print(company.dtypes)

(400, 11)
Sales          0
CompPrice      0
Income         0
Advertising    0
Population     0
Price          0
ShelveLoc      0
Age            0
Education      0
Urban          0
US             0
dtype: int64
Sales          float64
CompPrice        int64
Income           int64
Advertising      int64
Population       int64
Price            int64
ShelveLoc       object
Age              int64
Education        int64
Urban           object
US              object
dtype: object


In [4]:
company.ShelveLoc.unique()

array(['Bad', 'Good', 'Medium'], dtype=object)

# 4.Data Preparation

In [5]:
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
company['ShelveLoc']= le.fit_transform(company['ShelveLoc'])
company['Urban']=  le.fit_transform(company['Urban'])
company['US'] =  le.fit_transform(company['US'])

In [6]:
company.dtypes

Sales          float64
CompPrice        int64
Income           int64
Advertising      int64
Population       int64
Price            int64
ShelveLoc        int32
Age              int64
Education        int64
Urban            int32
US               int32
dtype: object

# 5.Model Building

In [7]:
X = company.drop('ShelveLoc',axis=1)
y = company[['ShelveLoc']]

In [8]:
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=.15,random_state=12,stratify=y)

In [9]:
print(X_train.shape,y_train.shape)
print(X_test.shape,y_test.shape)

(340, 10) (340, 1)
(60, 10) (60, 1)


# 6.Model training

In [12]:
from sklearn.model_selection import GridSearchCV
gs = GridSearchCV(estimator =rf_classifier,param_grid= {'criterion':['gini','entropy'],
                                                        'max_depth':[2,3,4,5,6,7,8,9,10],
                                                       'n_estimators':[10,20,30,40,50,60,70,80,90,100]},cv =5)
grid = gs.fit(X,y)

In [13]:
grid.best_params_

{'criterion': 'gini', 'max_depth': 10, 'n_estimators': 30}

In [14]:
grid.best_score_

0.665

In [16]:
from sklearn.ensemble import RandomForestClassifier
rf_classifier = RandomForestClassifier(criterion= 'gini', max_depth= 10, n_estimators= 30)
rf_classifier.fit(X_train,y_train)


RandomForestClassifier(max_depth=10, n_estimators=30)

# 7.Model Testing

##### Training data

In [17]:
y_pred_train = rf_classifier.predict(X_train)
y_pred_train

array([0, 0, 2, 0, 2, 2, 1, 1, 1, 1, 2, 0, 2, 2, 2, 2, 2, 2, 0, 0, 2, 0,
       2, 2, 0, 2, 1, 1, 2, 2, 0, 1, 1, 2, 2, 2, 2, 2, 2, 0, 2, 0, 2, 0,
       0, 2, 2, 2, 1, 2, 1, 1, 1, 2, 0, 2, 1, 2, 2, 0, 1, 0, 2, 2, 2, 2,
       0, 1, 0, 0, 2, 2, 2, 1, 2, 0, 2, 2, 0, 1, 2, 1, 0, 2, 0, 0, 2, 1,
       2, 1, 2, 0, 2, 0, 0, 2, 0, 1, 0, 2, 0, 2, 2, 2, 2, 0, 0, 0, 1, 1,
       2, 2, 1, 2, 2, 2, 2, 2, 1, 0, 2, 2, 2, 2, 1, 2, 1, 2, 0, 0, 2, 0,
       1, 0, 2, 1, 2, 0, 0, 2, 2, 2, 2, 1, 1, 2, 2, 2, 2, 0, 2, 2, 2, 2,
       0, 2, 2, 2, 2, 1, 1, 2, 0, 0, 2, 0, 2, 1, 0, 2, 2, 2, 2, 2, 2, 2,
       0, 0, 1, 1, 2, 1, 2, 0, 2, 2, 2, 1, 0, 0, 2, 2, 1, 2, 2, 2, 2, 2,
       2, 2, 0, 2, 0, 0, 2, 2, 1, 2, 2, 0, 2, 2, 2, 1, 2, 0, 2, 2, 0, 2,
       1, 2, 0, 0, 0, 2, 2, 2, 2, 1, 2, 2, 0, 2, 2, 0, 2, 2, 1, 0, 2, 2,
       2, 2, 1, 1, 0, 2, 2, 0, 0, 1, 2, 2, 1, 2, 2, 2, 2, 2, 1, 1, 1, 2,
       0, 2, 2, 2, 0, 2, 1, 1, 2, 2, 2, 2, 2, 0, 1, 2, 2, 0, 2, 2, 2, 2,
       0, 0, 2, 1, 2, 2, 1, 2, 2, 0, 2, 0, 1, 0, 2,

##### Test data

In [18]:
y_pred_test = rf_classifier.predict(X_test)
y_pred_test

array([1, 2, 2, 2, 2, 2, 2, 2, 0, 2, 2, 2, 2, 2, 2, 2, 2, 0, 1, 2, 1, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 0, 2, 2, 2, 0, 2, 2, 2, 1, 1, 2, 2,
       1, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 2, 2])

## 7. Model Evaluation

In [19]:
from sklearn.metrics import accuracy_score,precision_score,recall_score,r2_score,confusion_matrix,roc_auc_score,classification_report

##### Training data

In [21]:
print('accuracy score:',accuracy_score(y_train,y_pred_train))
print('classification_report:\n',classification_report(y_train,y_pred_train))
print('confusion matrix:\n',confusion_matrix(y_train,y_pred_train))


accuracy score: 0.9882352941176471
classification_report:
               precision    recall  f1-score   support

           0       1.00      0.99      0.99        82
           1       1.00      0.96      0.98        72
           2       0.98      1.00      0.99       186

    accuracy                           0.99       340
   macro avg       0.99      0.98      0.99       340
weighted avg       0.99      0.99      0.99       340

confusion matrix:
 [[ 81   0   1]
 [  0  69   3]
 [  0   0 186]]


##### Test data

In [22]:
print('accuracy score:',accuracy_score(y_test,y_pred_test))
print('classification_report:\n',classification_report(y_test,y_pred_test))
print('confusion matrix:\n',confusion_matrix(y_test,y_pred_test))

accuracy score: 0.6166666666666667
classification_report:
               precision    recall  f1-score   support

           0       0.43      0.21      0.29        14
           1       0.86      0.46      0.60        13
           2       0.61      0.85      0.71        33

    accuracy                           0.62        60
   macro avg       0.63      0.51      0.53        60
weighted avg       0.62      0.62      0.59        60

confusion matrix:
 [[ 3  0 11]
 [ 0  6  7]
 [ 4  1 28]]


# Using Entropy criteria

In [23]:
from sklearn.ensemble import RandomForestClassifier
rf_classifier = RandomForestClassifier(criterion= 'entropy', max_depth= 10, n_estimators= 30)
rf_classifier.fit(X_train,y_train)

RandomForestClassifier(criterion='entropy', max_depth=10, n_estimators=30)

# 7.Model Testing

##### Training data

In [24]:
y_pred_train_en = rf_classifier.predict(X_train)
y_pred_train_en

array([0, 0, 2, 0, 2, 2, 1, 1, 1, 1, 2, 0, 2, 2, 2, 2, 2, 2, 0, 0, 2, 0,
       2, 2, 0, 2, 1, 1, 2, 2, 0, 1, 1, 2, 2, 2, 2, 2, 2, 0, 2, 0, 2, 0,
       0, 2, 2, 2, 1, 2, 1, 1, 1, 2, 0, 2, 1, 2, 2, 0, 1, 0, 2, 2, 2, 2,
       0, 1, 0, 0, 2, 2, 2, 1, 2, 0, 2, 2, 0, 1, 2, 1, 0, 2, 0, 0, 2, 1,
       2, 1, 2, 0, 2, 0, 0, 2, 0, 1, 0, 2, 0, 2, 2, 2, 2, 0, 0, 0, 1, 1,
       2, 2, 1, 2, 2, 2, 2, 2, 1, 0, 2, 2, 2, 2, 1, 2, 1, 2, 0, 0, 2, 0,
       1, 0, 2, 1, 2, 0, 0, 2, 2, 2, 2, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       0, 2, 2, 0, 2, 1, 1, 2, 0, 0, 2, 0, 2, 1, 0, 2, 2, 2, 2, 2, 2, 2,
       0, 0, 1, 1, 2, 1, 2, 0, 2, 2, 2, 1, 0, 0, 2, 2, 1, 2, 2, 2, 2, 2,
       2, 2, 0, 2, 0, 0, 2, 2, 1, 2, 2, 0, 2, 2, 2, 1, 2, 0, 2, 2, 0, 2,
       1, 2, 0, 0, 0, 2, 2, 1, 2, 2, 2, 2, 0, 2, 2, 0, 2, 2, 1, 0, 2, 2,
       2, 2, 1, 1, 0, 2, 2, 0, 0, 1, 2, 2, 1, 2, 2, 2, 2, 2, 1, 1, 1, 2,
       0, 2, 2, 2, 0, 2, 1, 2, 2, 2, 2, 2, 2, 0, 1, 2, 2, 0, 2, 2, 2, 2,
       0, 0, 2, 1, 2, 2, 1, 2, 2, 0, 2, 0, 1, 0, 2,

##### Test data

In [25]:
y_pred_test_en = rf_classifier.predict(X_test)
y_pred_test_en

array([2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 1, 2, 2, 2,
       2, 2, 1, 2, 2, 2, 2, 2, 2, 1, 0, 2, 2, 2, 0, 2, 2, 2, 1, 1, 2, 2,
       1, 0, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 0, 0, 2, 2])

## 7. Model Evaluation

In [19]:
from sklearn.metrics import accuracy_score,precision_score,recall_score,r2_score,confusion_matrix,roc_auc_score,classification_report

##### Training data

In [26]:
print('accuracy score:',accuracy_score(y_train,y_pred_train_en))
print('classification_report:\n',classification_report(y_train,y_pred_train_en))
print('confusion matrix:\n',confusion_matrix(y_train,y_pred_train_en))


accuracy score: 0.9911764705882353
classification_report:
               precision    recall  f1-score   support

           0       1.00      0.99      0.99        82
           1       1.00      0.97      0.99        72
           2       0.98      1.00      0.99       186

    accuracy                           0.99       340
   macro avg       0.99      0.99      0.99       340
weighted avg       0.99      0.99      0.99       340

confusion matrix:
 [[ 81   0   1]
 [  0  70   2]
 [  0   0 186]]


##### Test data

In [27]:
print('accuracy score:',accuracy_score(y_test,y_pred_test_en))
print('classification_report:\n',classification_report(y_test,y_pred_test_en))
print('confusion matrix:\n',confusion_matrix(y_test,y_pred_test_en))

accuracy score: 0.65
classification_report:
               precision    recall  f1-score   support

           0       0.50      0.21      0.30        14
           1       0.86      0.46      0.60        13
           2       0.64      0.91      0.75        33

    accuracy                           0.65        60
   macro avg       0.67      0.53      0.55        60
weighted avg       0.65      0.65      0.61        60

confusion matrix:
 [[ 3  1 10]
 [ 0  6  7]
 [ 3  0 30]]
