In [2]:
# In this exercise we wiil look at a
# small wine database which carries a categorical label 
# for each wine along with several continuous-valued features.
# Sila. 18 Nov. 2022.

In [3]:
%matplotlib inline
from sklearn import datasets
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import numpy as np

In [4]:
raw_data = datasets.load_wine()

In [5]:
print(raw_data['DESCR'])

.. _wine_dataset:

Wine recognition dataset
------------------------

**Data Set Characteristics:**

    :Number of Instances: 178 (50 in each of three classes)
    :Number of Attributes: 13 numeric, predictive attributes and the class
    :Attribute Information:
 		- Alcohol
 		- Malic acid
 		- Ash
		- Alcalinity of ash  
 		- Magnesium
		- Total phenols
 		- Flavanoids
 		- Nonflavanoid phenols
 		- Proanthocyanins
		- Color intensity
 		- Hue
 		- OD280/OD315 of diluted wines
 		- Proline

    - class:
            - class_0
            - class_1
            - class_2
		
    :Summary Statistics:
    
                                   Min   Max   Mean     SD
    Alcohol:                      11.0  14.8    13.0   0.8
    Malic Acid:                   0.74  5.80    2.34  1.12
    Ash:                          1.36  3.23    2.36  0.27
    Alcalinity of Ash:            10.6  30.0    19.5   3.3
    Magnesium:                    70.0 162.0    99.7  14.3
    Total Phenols:                0

In [6]:
# There are apparently 3 classes (creatively named 'class_0', 'class_1', and 'class_2'). 
# Probably... these correspond to some typical wine varietals like Pinot Noir, or Cabernet, or Merlot...

In [7]:
# As this is a dictionary, we will print out the  key/value pairs 
# so we can decide how we'll format a data structure useful for our needs
for key,value in raw_data.items():
    print(key,'\n',value,'\n')

data 
 [[1.423e+01 1.710e+00 2.430e+00 ... 1.040e+00 3.920e+00 1.065e+03]
 [1.320e+01 1.780e+00 2.140e+00 ... 1.050e+00 3.400e+00 1.050e+03]
 [1.316e+01 2.360e+00 2.670e+00 ... 1.030e+00 3.170e+00 1.185e+03]
 ...
 [1.327e+01 4.280e+00 2.260e+00 ... 5.900e-01 1.560e+00 8.350e+02]
 [1.317e+01 2.590e+00 2.370e+00 ... 6.000e-01 1.620e+00 8.400e+02]
 [1.413e+01 4.100e+00 2.740e+00 ... 6.100e-01 1.600e+00 5.600e+02]] 

target 
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2] 

frame 
 None 

target_names 
 ['class_0' 'class_1' 'class_2'] 

DESCR 
 .. _wine_dataset:

Wine recognition dataset
------------------------

**Data Set Characteristics:**

    :Number of Instances: 178 (50 in 

In [8]:
print('data.shape\t',raw_data['data'].shape,
      '\ntarget.shape \t',raw_data['target'].shape)

data.shape	 (178, 13) 
target.shape 	 (178,)


In [9]:
# We have 178 samples (rows) and 13 features (columns).

In [11]:
print(raw_data.DESCR)

.. _wine_dataset:

Wine recognition dataset
------------------------

**Data Set Characteristics:**

    :Number of Instances: 178 (50 in each of three classes)
    :Number of Attributes: 13 numeric, predictive attributes and the class
    :Attribute Information:
 		- Alcohol
 		- Malic acid
 		- Ash
		- Alcalinity of ash  
 		- Magnesium
		- Total phenols
 		- Flavanoids
 		- Nonflavanoid phenols
 		- Proanthocyanins
		- Color intensity
 		- Hue
 		- OD280/OD315 of diluted wines
 		- Proline

    - class:
            - class_0
            - class_1
            - class_2
		
    :Summary Statistics:
    
                                   Min   Max   Mean     SD
    Alcohol:                      11.0  14.8    13.0   0.8
    Malic Acid:                   0.74  5.80    2.34  1.12
    Ash:                          1.36  3.23    2.36  0.27
    Alcalinity of Ash:            10.6  30.0    19.5   3.3
    Magnesium:                    70.0 162.0    99.7  14.3
    Total Phenols:                0

In [12]:
X = raw_data.data
y = raw_data.target

In [14]:
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(X,y,test_size=0.2,random_state=1)

In [15]:
from sklearn.svm import SVC

In [16]:
######### predict using kernel = 'linear'
classification1=SVC(kernel='linear')  
classification1.fit(x_train,y_train)
linear = classification1.predict(x_test)
print(linear)

[2 1 0 1 0 2 1 0 2 1 0 0 1 0 1 1 2 0 1 0 0 1 1 0 0 2 0 0 0 2 1 2 2 0 1 1]


In [19]:
# We will do Confusion_matrix and Classfication_report
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report

In [20]:
############ confusion matrix for kernel = 'linear'
print(confusion_matrix(y_test, linear))

[[14  0  0]
 [ 1 12  0]
 [ 0  1  8]]


In [21]:
target=raw_data.target_names
print(classification_report(y_test,linear,target_names=target))

              precision    recall  f1-score   support

     class_0       0.93      1.00      0.97        14
     class_1       0.92      0.92      0.92        13
     class_2       1.00      0.89      0.94         9

    accuracy                           0.94        36
   macro avg       0.95      0.94      0.94        36
weighted avg       0.95      0.94      0.94        36



In [43]:
############## predict using kernel = 'rbf'
classification2=SVC(kernel='rbf', C= 100000)
classification2.fit(x_train,y_train)
non_linear_rbf= classification2.predict(x_test)
print(non_linear_rbf)

[2 1 0 1 0 2 1 0 2 1 0 0 1 0 1 1 2 0 1 0 0 1 1 1 0 2 0 0 0 2 1 2 2 0 1 1]


In [44]:
############ confusion matrix for kernel = 'rbf'
print(confusion_matrix(y_test, non_linear_rbf))

[[14  0  0]
 [ 0 13  0]
 [ 0  1  8]]


In [45]:
print(classification_report(y_test,non_linear_rbf,target_names=target))

              precision    recall  f1-score   support

     class_0       1.00      1.00      1.00        14
     class_1       0.93      1.00      0.96        13
     class_2       1.00      0.89      0.94         9

    accuracy                           0.97        36
   macro avg       0.98      0.96      0.97        36
weighted avg       0.97      0.97      0.97        36



In [61]:
from sklearn.neural_network import MLPClassifier

clf = MLPClassifier(hidden_layer_sizes=(10,10,10), 
                    random_state=42)

In [66]:
clf= MLPClassifier(alpha=1e-05, hidden_layer_sizes=(6,), random_state=1,
              solver='lbfgs', max_iter= 10000)

In [67]:
clf.fit(x_train,y_train)

MLPClassifier(alpha=1e-05, hidden_layer_sizes=(6,), max_iter=10000,
              random_state=1, solver='lbfgs')

In [68]:
from sklearn.metrics import accuracy_score
predictions_train = clf.predict(x_train)
predictions_test = clf.predict(x_test)
train_score = accuracy_score(predictions_train, y_train)
print("score on train data: ", train_score)
test_score = accuracy_score(predictions_test, y_test)
print("score on test data: ", test_score)

score on train data:  1.0
score on test data:  0.9444444444444444


In [69]:
############ confusion matrix for MLPClassifier
print(confusion_matrix(y_test, predictions_test))

[[14  0  0]
 [ 0 12  1]
 [ 0  1  8]]


In [70]:
print(classification_report(y_test,predictions_test,target_names=target))

              precision    recall  f1-score   support

     class_0       1.00      1.00      1.00        14
     class_1       0.92      0.92      0.92        13
     class_2       0.89      0.89      0.89         9

    accuracy                           0.94        36
   macro avg       0.94      0.94      0.94        36
weighted avg       0.94      0.94      0.94        36

