In [7]:
# conventional way to import pandas
import pandas as pd
import numpy as np

# read CSV file from the 'data' subdirectory using a relative path

df = pd.read_csv('https://archive.ics.uci.edu/ml/machine-learning-databases/wine/wine.data', header=None)
df.columns = ['Class label', 'Alcohol', 'Malic acid', 'Ash',
                   'Alcalinity of ash', 'Magnesium', 'Total phenols',
                   'Flavanoids', 'Nonflavanoid phenols', 'Proanthocyanins',
                   'Color intensity', 'Hue', 'OD280/OD315 of diluted wines',
                   'Proline']

In [8]:
X=df[['Alcohol', 'Malic acid', 'Ash',
                   'Alcalinity of ash', 'Magnesium', 'Total phenols',
                   'Flavanoids', 'Nonflavanoid phenols', 'Proanthocyanins',
                   'Color intensity', 'Hue', 'OD280/OD315 of diluted wines',
                   'Proline']]
Y=df['Class label']

In [10]:

from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, Y, random_state=1,test_size=0.3)

##Apply Scaling to train set and test set both Xs

from sklearn.preprocessing import StandardScaler
sc=StandardScaler()
sc.fit(X_train)
X_train_std=sc.transform(X_train)
X_test_std=sc.transform(X_test)

# 1. Perceptron Model

from sklearn.linear_model import Perceptron
ppn=Perceptron(n_iter=40,eta0=0.1,random_state=0)
ppn.fit(X_train_std,y_train)

# Make Classification on Test Set

y_pred=ppn.predict(X_test_std)
print('Misclassified Samples: %d' % (y_test!=y_pred).sum())

from sklearn.metrics import accuracy_score
print('Accuracy of Perceptron Model: %.2f' % accuracy_score(y_test,y_pred))

mods=[]
vals=[]
mods.append('Perceptron Model')
vals.append(accuracy_score(y_test,y_pred).round(2))

# 2. Logistic Regression

from sklearn.linear_model import LogisticRegression

lr=LogisticRegression(C=1000.0,random_state=0)
lr.fit(X_train_std,y_train)

y_pred=lr.predict(X_test_std)
print('Misclassified Samples: %d' % (y_test!=y_pred).sum())

from sklearn.metrics import accuracy_score
print('Accuracy of Logistic Regression Model: %.2f' % accuracy_score(y_test,y_pred))

mods.append('Logistic Regression Model')
vals.append(accuracy_score(y_test,y_pred).round(2))

# 3. Support Vector Machine

from sklearn.svm import SVC

svm=SVC(kernel='linear',random_state=0,C=1.0)
svm.fit(X_train_std,y_train)

y_pred=svm.predict(X_test_std)
print('Misclassified Samples: %d' % (y_test!=y_pred).sum())

from sklearn.metrics import accuracy_score
print('Accuracy of Support Vector Machine Model: %.2f' % accuracy_score(y_test,y_pred))


mods.append('Support Vector Machine')
vals.append(accuracy_score(y_test,y_pred).round(2))


from sklearn.svm import SVC

svm=SVC(kernel='rbf',random_state=0,C=1.0,gamma=0.1)
svm.fit(X_train_std,y_train)

y_pred=svm.predict(X_test_std)
print('Misclassified Samples: %d' % (y_test!=y_pred).sum())

from sklearn.metrics import accuracy_score
print('Accuracy of Support Vector Machine Model with gamma: %.2f' % accuracy_score(y_test,y_pred))


# 4. Decision Tree Classifier

from sklearn.tree import DecisionTreeClassifier

tree=DecisionTreeClassifier(criterion='entropy',max_depth=3,random_state=0)
tree.fit(X_train_std,y_train)

y_pred=tree.predict(X_test_std)
print('Misclassified Samples: %d' % (y_test!=y_pred).sum())

from sklearn.metrics import accuracy_score
print('Accuracy of Decision Tree Classifier with gamma: %.2f' % accuracy_score(y_test,y_pred))


mods.append('Decision Tree Classifier')
vals.append(accuracy_score(y_test,y_pred).round(2))

# 5. Random Forest Classifier

from sklearn.ensemble import RandomForestClassifier

forest=RandomForestClassifier(criterion='entropy',n_estimators=10,random_state=1,n_jobs=2)
forest.fit(X_train_std,y_train)

y_pred=forest.predict(X_test_std)
print('Misclassified Samples: %d' % (y_test!=y_pred).sum())

from sklearn.metrics import accuracy_score
print('Accuracy of Random Forest Classifier with gamma: %.2f' % accuracy_score(y_test,y_pred))


mods.append('Random Forest Classifier')
vals.append(accuracy_score(y_test,y_pred).round(2))

# 6. KNN CLASSIFIER

from sklearn.neighbors import KNeighborsClassifier

knn=KNeighborsClassifier(n_neighbors=5,p=2,metric='minkowski')
knn.fit(X_train_std,y_train)

y_pred=knn.predict(X_test_std)
print('Misclassified Samples: %d' % (y_test!=y_pred).sum())

from sklearn.metrics import accuracy_score
print('Accuracy of KNN Classifier with gamma: %.2f' % accuracy_score(y_test,y_pred))


mods.append('KNN Classifier')
vals.append(accuracy_score(y_test,y_pred).round(2))

# For visualization
from bokeh.plotting import figure, output_file, show
from bokeh.models import ColumnDataSource, HoverTool
from bokeh.palettes import Spectral6, brewer
from bokeh.transform import factor_cmap

ser_df=pd.DataFrame({'Model': mods, 'Accuracy_Value': vals})

source = ColumnDataSource(ser_df)

p = figure(x_range=mods, plot_width=1000, plot_height=700)
color_map = factor_cmap(field_name='Model', palette=Spectral6, factors=mods)
p.vbar(x='Model', top='Accuracy_Value', source=source, width=0.70, color=color_map)

p.title.text ='Comparison of Models'
p.xaxis.axis_label = 'Model Types'
p.yaxis.axis_label = "Accuracy value of different models"

show(p)



Misclassified Samples: 1
Accuracy of Perceptron Model: 0.98
Misclassified Samples: 1
Accuracy of Logistic Regression Model: 0.98
Misclassified Samples: 1
Accuracy of Support Vector Machine Model: 0.98
Misclassified Samples: 1
Accuracy of Support Vector Machine Model with gamma: 0.98
Misclassified Samples: 2
Accuracy of Decision Tree Classifier with gamma: 0.96
Misclassified Samples: 0
Accuracy of Random Forest Classifier with gamma: 1.00
Misclassified Samples: 1
Accuracy of KNN Classifier with gamma: 0.98
