# Implementation of voting classifier with 3 base learners from sklearn library: LogisticRegression, DecisionTreeClassifier, GaussianNB.

In [430]:
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import confusion_matrix, accuracy_score
from collections import Counter
from sklearn.ensemble import BaggingClassifier
from sklearn.ensemble import VotingClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
import numpy as np
import pandas as pd

# Drink Quality Dataset
Drink Quality Dataset contains of 700 samples, 11 features and 1 column for 4 quality class labels. https://archive.ics.uci.edu/ml/datasets/wine+quality

In [449]:
df= pd.read_csv("F:\\Data Science MSC\\Winter 2020\\Assignment3\\drink_quality.csv")
df.shape
df.head()
df.shape


(700, 12)

In [447]:
df.quality.unique()
df.isnull().any()

fixed acidity           False
volatile acidity        False
citric acid             False
residual sugar          False
chlorides               False
free sulfur dioxide     False
total sulfur dioxide    False
density                 False
pH                      False
sulphates               False
alcohol                 False
quality                 False
dtype: bool

In [432]:
def prepare_train_test_data(input_dataframe):
    train_data, test_data = train_test_split(input_dataframe, test_size=0.2)    
    return train_data, test_data

In [433]:
def prepare_list_data(unformatted_data):
    data_list = []
    for i in range(len(unformatted_data)):
        data_list.append(list(unformatted_data[i]))
    return data_list

## MyHardVoting used for Voting Classifier 

In [359]:
#MyHardVoting used for myVotingClassifier
def myHardVoting(prediction_data_list):
    n_estimators = len(prediction_data_list)
    num_test_sample = len(prediction_data_list[0])
    
    y_pred = []
    for i in range(num_test_sample):
        temp_y_list = []
        for j in range(n_estimators):
            estimator_pred_list = prediction_data_list[j]
            #print(estimator_pred_list)
            temp_y_list.append(estimator_pred_list[i])
        #print(temp_y_list) #all posible outcome        
        major_vote = Counter(temp_y_list).most_common(1)[0][0]
        y_pred.append(major_vote)
    return y_pred

In [361]:
def calculate_accuracy(y_test, y_pred):
    return  (accuracy_score(y_test, y_pred))*100.0

In [435]:
def myVotingClassfier(model_list, scaled_x_train, scaled_x_test, y_train, y_test, voting='hard'):    
    prediction_list = []
    for model_tuple in model_list:
        model = model_tuple[1]
        model.fit(scaled_x_train, y_train)
        y_pred = model.predict(scaled_x_test)
        prediction_list.append(list(y_pred))
        #acc = accuracy_score(y_pred, y_test)
        #model_accuracy= acc*100
        #print(f"{model_tuple[0]}'s accuracy: {model_accuracy}")
    y_pred = myHardVoting(prediction_list) #Use Hard Voting, as same used for my Bagging Classifier       
    return y_pred

In [443]:
###Question 2(b) : Voting Classifier
train_data, test_data = prepare_train_test_data(df)

x_train = train_data.iloc[:, :-1]
y_train = train_data.iloc[:,-1]
x_test = test_data.iloc[:, :-1]
y_test = test_data.iloc[:,-1]

scaler = StandardScaler()
scaled_x_train = scaler.fit_transform(x_train) #array
scaled_x_test = scaler.transform(x_test) #array

print("Actual result : ", y_test_list)
print("\n")

#3 base learners from sklearn library
model_list = [('Decision tree', DecisionTreeClassifier(max_depth=3)),
        ('Logistic regression', LogisticRegression(solver='liblinear')),
        ('Naive bayes classifier', GaussianNB())]

#Using myVotingClassifier
y_pred = myVotingClassfier(model_list, scaled_x_train, scaled_x_test, y_train, y_test, voting='hard')
print("Predicted result using myVotingClassifier with hard voting: ", list(y_test))
    
accuracy_myvoting_classifier = calculate_accuracy( y_pred,list(y_test))
print("MyVotingClassifier Accuracy :", accuracy_myvoting_classifier)
print("\n")

Actual result :  [4, 5, 5, 4, 5, 5, 4, 5, 4, 4, 5, 5, 4, 4, 7, 7, 4, 7, 4, 8, 8, 8, 5, 7, 7, 5, 8, 8, 8, 4, 4, 5, 5, 4, 5, 5, 5, 5, 4, 4, 4, 8, 7, 5, 4, 4, 7, 5, 4, 8, 5, 5, 5, 4, 7, 4, 4, 8, 5, 7, 7, 8, 4, 5, 8, 7, 8, 8, 4, 5, 8, 7, 8, 7, 5, 5, 4, 7, 5, 4, 5, 8, 7, 8, 8, 7, 8, 7, 4, 8, 8, 7, 7, 4, 8, 5, 7, 5, 4, 8, 7, 5, 4, 4, 7, 8, 4, 8, 5, 8, 8, 8, 8, 5, 4, 5, 5, 8, 7, 8, 5, 8, 4, 4, 7, 7, 7, 8, 5, 5, 7, 8, 4, 8, 5, 7, 4, 5, 4, 8]


Predicted result using myVotingClassifier with hard voting:  [7, 4, 5, 7, 8, 7, 8, 7, 4, 8, 5, 5, 8, 4, 5, 5, 5, 5, 4, 8, 4, 7, 4, 8, 5, 7, 4, 5, 7, 5, 8, 7, 7, 4, 5, 7, 8, 4, 7, 7, 7, 4, 4, 7, 5, 4, 8, 7, 8, 5, 5, 4, 7, 4, 5, 7, 8, 7, 8, 4, 5, 4, 5, 8, 8, 4, 7, 8, 5, 4, 5, 4, 7, 8, 5, 5, 7, 8, 5, 8, 4, 7, 8, 5, 4, 7, 8, 8, 5, 4, 7, 5, 4, 5, 4, 4, 8, 8, 4, 4, 5, 4, 8, 7, 5, 4, 5, 8, 8, 4, 7, 8, 7, 8, 8, 5, 7, 5, 4, 5, 7, 4, 8, 4, 8, 8, 5, 7, 4, 5, 8, 5, 4, 4, 8, 8, 7, 5, 7, 7]
MyVotingClassifier Accuracy : 64.28571428571429




## Using sklearn library function VotingClassifier

In [444]:
#Using library VotingClassifier
voting_clf = VotingClassifier(model_list, voting='hard')
voting_clf.fit(scaled_x_train, y_train)
y_pred = voting_clf.predict(scaled_x_test)

print("Predicted result using library Voting Classifer: ", y_pred)
accuracy_library_votingClassifier = calculate_accuracy(y_test, y_pred)
print("Library VotingClassifier Accuracy:", accuracy_library_votingClassifier)

Predicted result using library Voting Classifer:  [8 4 5 8 8 7 8 7 4 8 4 5 8 4 5 5 8 5 5 8 7 7 4 8 5 8 8 5 7 7 7 8 8 8 5 7 4
 5 7 8 8 7 4 8 5 4 8 7 8 5 7 4 5 7 5 7 8 7 8 4 7 4 5 8 8 4 5 4 5 4 4 4 7 8
 5 5 4 5 5 5 4 8 8 4 4 7 8 8 5 5 7 7 5 5 4 4 5 8 4 4 7 4 8 7 5 4 4 8 8 5 8
 8 5 8 8 5 8 5 5 4 8 4 8 4 8 8 4 7 5 5 8 5 4 4 8 8 7 5 8 7]
Library VotingClassifier Accuracy: 66.42857142857143
