In [30]:
import numpy as np 
import pandas as pd 
from sklearn.metrics import confusion_matrix 
from sklearn.model_selection import train_test_split 
from sklearn.tree import DecisionTreeClassifier 
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, recall_score
from sklearn.metrics import classification_report 

In [3]:
#importing data
def importing():
    df = pd.read_csv( 
'https://archive.ics.uci.edu/ml/machine-learning-'+
'databases/balance-scale/balance-scale.data')
    print(df.shape)
    print(df.columns)
    return df

In [4]:
df = importing()
df.head()

(624, 5)
Index(['B', '1', '1.1', '1.2', '1.3'], dtype='object')


Unnamed: 0,B,1,1.1,1.2,1.3
0,R,1,1,1,2
1,R,1,1,1,3
2,R,1,1,1,4
3,R,1,1,1,5
4,R,1,1,2,1


In [5]:
#splitting data
def splitting(df):
        X = df.drop('B',axis=1).values
        y = df['B'].values
        
        #train_test
        X_train,X_test,y_train, y_test = train_test_split(X,y,test_size=0.3, random_state = 42, stratify=y)
        
        return X,y,X_train,X_test,y_train, y_test 


In [6]:
#training using the gini
def train_gini(X_train,y_train):
    # Classification object
    clf_gini = DecisionTreeClassifier(criterion='gini', max_depth =3 , min_samples_leaf = 5 , random_state = 42)
    
    # Fitting the data
    clf_gini.fit(X_train,y_train)
    
    return clf_gini

In [7]:
def train_entropy(X_train, y_train):
    #classifier making
    clf_ent = DecisionTreeClassifier(criterion='entropy', max_depth=3, min_samples_leaf=5,random_state=42)
    
    clf_ent.fit(X_train,y_train)
    
    return clf_ent

In [8]:
#making predictions
def make_pred(classifier, X_test):
    y_pred= classifier.predict(X_test)
    print(y_pred)
    return y_pred

In [9]:
#calculationg Accuracy
def cal_accuracy(y_test,y_pred):
    print('Confusion_matrix: \n', 
          confusion_matrix(y_test,y_pred))
    print('Accuracy_score : ', 
          accuracy_score(y_test,y_pred))
    print('classification report:\n ', 
          classification_report(y_test,y_pred))

In [10]:
def main():
    data = importing()
    X,y,X_train,X_test,y_train, y_test = splitting(data)
    clf_gini = train_gini(X_train,y_train)
    clf_entropy = train_entropy(X_train,y_train)
    
    print('Gini')
    y_pred_gini = make_pred(clf_gini,X_test)
    cal_accuracy(y_test, y_pred_gini)
    
    print('Entropy')
    y_pred_ent = make_pred(clf_entropy,X_test)
    cal_accuracy(y_test,y_pred_ent)
    

In [12]:
main()

(624, 5)
Index(['B', '1', '1.1', '1.2', '1.3'], dtype='object')
Gini
['R' 'L' 'R' 'R' 'R' 'R' 'L' 'R' 'L' 'L' 'R' 'R' 'R' 'L' 'R' 'R' 'L' 'R'
 'L' 'R' 'R' 'R' 'R' 'R' 'R' 'R' 'R' 'R' 'L' 'L' 'L' 'R' 'R' 'L' 'R' 'L'
 'L' 'R' 'R' 'R' 'R' 'R' 'L' 'L' 'R' 'R' 'R' 'L' 'R' 'L' 'R' 'L' 'L' 'L'
 'R' 'L' 'R' 'R' 'R' 'L' 'L' 'R' 'R' 'L' 'R' 'R' 'R' 'R' 'L' 'R' 'L' 'L'
 'L' 'R' 'R' 'L' 'R' 'R' 'R' 'L' 'L' 'L' 'R' 'R' 'R' 'R' 'L' 'R' 'R' 'L'
 'L' 'L' 'L' 'L' 'R' 'R' 'L' 'L' 'R' 'L' 'L' 'R' 'R' 'R' 'R' 'R' 'R' 'R'
 'R' 'R' 'R' 'L' 'R' 'L' 'R' 'R' 'R' 'L' 'R' 'L' 'L' 'L' 'L' 'R' 'L' 'L'
 'R' 'R' 'R' 'L' 'L' 'L' 'R' 'R' 'L' 'R' 'R' 'R' 'R' 'L' 'L' 'L' 'L' 'R'
 'L' 'L' 'R' 'R' 'L' 'L' 'R' 'R' 'R' 'L' 'L' 'L' 'R' 'R' 'R' 'R' 'R' 'L'
 'L' 'L' 'L' 'L' 'R' 'R' 'L' 'L' 'R' 'L' 'L' 'R' 'R' 'R' 'R' 'R' 'R' 'R'
 'R' 'L' 'R' 'L' 'L' 'L' 'R' 'R']
Confusion_matrix: 
 [[ 0  5  9]
 [ 0 58 29]
 [ 0 16 71]]
Accuracy_score :  0.6861702127659575
classification report:
                precision    recall  f1-score   su

  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)


In [21]:
#using ensembling with KNN, Logistic and Decision Tree

lr = LogisticRegression()
knn = KNeighborsClassifier()
dt = DecisionTreeClassifier(criterion='gini', max_depth=3, min_samples_leaf=5, random_state=42)

classifier = [('KNN', knn),
            ('Logistic',lr),
            ('Decision tree', dt)]

In [32]:
for cl, clf in classifier:
    data = importing()
    X,y,X_train,X_test,y_train, y_test = splitting(data)
    clf.fit(X_train,y_train)
    y_pred = clf.predict(X_test)
    print('for :' + cl.upper())
    print('accuracy_score :', accuracy_score(y_test,y_pred))
   # print('recall_score :', recall_score(y_test,y_pred))
    print('Confusion_matrix \n :', confusion_matrix(y_test,y_pred))
    print('classification report\n:',classification_report(y_test,y_pred))
    

(624, 5)
Index(['B', '1', '1.1', '1.2', '1.3'], dtype='object')
for :KNN
accuracy_score : 0.8563829787234043
Confusion_matrix 
 : [[ 2  5  7]
 [ 5 80  2]
 [ 5  3 79]]
classification report
:               precision    recall  f1-score   support

           B       0.17      0.14      0.15        14
           L       0.91      0.92      0.91        87
           R       0.90      0.91      0.90        87

   micro avg       0.86      0.86      0.86       188
   macro avg       0.66      0.66      0.66       188
weighted avg       0.85      0.86      0.85       188

(624, 5)
Index(['B', '1', '1.1', '1.2', '1.3'], dtype='object')
for :LOGISTIC
accuracy_score : 0.8723404255319149
Confusion_matrix 
 : [[ 0  4 10]
 [ 0 79  8]
 [ 0  2 85]]
classification report
:               precision    recall  f1-score   support

           B       0.00      0.00      0.00        14
           L       0.93      0.91      0.92        87
           R       0.83      0.98      0.89        87

   micro avg  

  'precision', 'predicted', average, warn_for)


(624, 5)
Index(['B', '1', '1.1', '1.2', '1.3'], dtype='object')
for :DECISION TREE
accuracy_score : 0.6861702127659575
Confusion_matrix 
 : [[ 0  5  9]
 [ 0 58 29]
 [ 0 16 71]]
classification report
:               precision    recall  f1-score   support

           B       0.00      0.00      0.00        14
           L       0.73      0.67      0.70        87
           R       0.65      0.82      0.72        87

   micro avg       0.69      0.69      0.69       188
   macro avg       0.46      0.49      0.47       188
weighted avg       0.64      0.69      0.66       188



  'precision', 'predicted', average, warn_for)


In [27]:
from sklearn.ensemble import VotingClassifier 


In [28]:
data = importing()
X,y,X_train,X_test,y_train, y_test = splitting(data)
vc = VotingClassifier(classifier)
vc.fit(X_train,y_train) 
y_pred = clf.predict(X_test)

print('vc ACCURACY SCORE :' , accuracy_score(y_test,y_pred))


(624, 5)
Index(['B', '1', '1.1', '1.2', '1.3'], dtype='object')
vc ACCURACY SCORE : 0.6861702127659575


