In [1]:
from sklearn.datasets import load_wine

In [2]:
data = load_wine()

In [3]:
print(data.DESCR)

.. _wine_dataset:

Wine recognition dataset
------------------------

**Data Set Characteristics:**

:Number of Instances: 178
:Number of Attributes: 13 numeric, predictive attributes and the class
:Attribute Information:
    - Alcohol
    - Malic acid
    - Ash
    - Alcalinity of ash
    - Magnesium
    - Total phenols
    - Flavanoids
    - Nonflavanoid phenols
    - Proanthocyanins
    - Color intensity
    - Hue
    - OD280/OD315 of diluted wines
    - Proline
    - class:
        - class_0
        - class_1
        - class_2

:Summary Statistics:

                                Min   Max   Mean     SD
Alcohol:                      11.0  14.8    13.0   0.8
Malic Acid:                   0.74  5.80    2.34  1.12
Ash:                          1.36  3.23    2.36  0.27
Alcalinity of Ash:            10.6  30.0    19.5   3.3
Magnesium:                    70.0 162.0    99.7  14.3
Total Phenols:                0.98  3.88    2.29  0.63
Flavanoids:                   0.34  5.08    2.03  1.00

In [4]:
data.feature_names

['alcohol',
 'malic_acid',
 'ash',
 'alcalinity_of_ash',
 'magnesium',
 'total_phenols',
 'flavanoids',
 'nonflavanoid_phenols',
 'proanthocyanins',
 'color_intensity',
 'hue',
 'od280/od315_of_diluted_wines',
 'proline']

In [5]:
data.data

array([[1.423e+01, 1.710e+00, 2.430e+00, ..., 1.040e+00, 3.920e+00,
        1.065e+03],
       [1.320e+01, 1.780e+00, 2.140e+00, ..., 1.050e+00, 3.400e+00,
        1.050e+03],
       [1.316e+01, 2.360e+00, 2.670e+00, ..., 1.030e+00, 3.170e+00,
        1.185e+03],
       ...,
       [1.327e+01, 4.280e+00, 2.260e+00, ..., 5.900e-01, 1.560e+00,
        8.350e+02],
       [1.317e+01, 2.590e+00, 2.370e+00, ..., 6.000e-01, 1.620e+00,
        8.400e+02],
       [1.413e+01, 4.100e+00, 2.740e+00, ..., 6.100e-01, 1.600e+00,
        5.600e+02]])

In [6]:
from sklearn.decomposition import PCA

In [7]:
data.data.shape

(178, 13)

In [8]:
pca = PCA(n_components = 4)

In [9]:
pca.fit(data.data)

In [10]:
pca.explained_variance_

array([9.92017895e+04, 1.72535266e+02, 9.43811370e+00, 4.99117861e+00])

In [11]:
pca.explained_variance_.shape

(4,)

In [12]:
data.target_names

array(['class_0', 'class_1', 'class_2'], dtype='<U7')

In [13]:
from sklearn.linear_model import LogisticRegression

In [14]:
model = LogisticRegression ()

In [15]:
model.fit(data.data, data.target)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


In [16]:
y_pred = model.predict(data.data)

In [17]:
y_pred

array([0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 2, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2])

In [18]:
from sklearn.metrics import classification_report

In [19]:
print(classification_report(data.target, y_pred))

              precision    recall  f1-score   support

           0       1.00      0.95      0.97        59
           1       0.95      0.97      0.96        71
           2       0.96      0.98      0.97        48

    accuracy                           0.97       178
   macro avg       0.97      0.97      0.97       178
weighted avg       0.97      0.97      0.97       178



In [20]:
new_values = pca.transform(data.data)

In [21]:
new_values

array([[ 3.18562979e+02,  2.14921307e+01, -3.13073470e+00,
         2.50113758e-01],
       [ 3.03097420e+02, -5.36471768e+00, -6.82283550e+00,
         8.64034749e-01],
       [ 4.38061133e+02, -6.53730945e+00,  1.11322298e+00,
        -9.12410681e-01],
       [ 7.33240139e+02,  1.92729032e-01,  9.17257016e-01,
         5.41250645e-01],
       [-1.15714285e+01,  1.84899946e+01,  5.54422076e-01,
        -1.36089609e+00],
       [ 7.03231192e+02, -3.32158674e-01, -9.49375334e-01,
         3.59993827e-01],
       [ 5.42971581e+02, -1.35189666e+01, -2.12694283e+00,
        -5.55661430e-02],
       [ 5.48401860e+02,  1.14494324e+01, -4.04924202e-02,
        -1.34945380e+00],
       [ 2.98036863e+02, -8.18015784e+00, -3.88097517e+00,
         9.10643037e-01],
       [ 2.98049553e+02, -7.10154294e+00, -1.55845533e+00,
         1.67704202e+00],
       [ 7.63079712e+02, -8.33431723e+00,  1.88629037e+00,
        -1.59831835e+00],
       [ 5.32943228e+02, -1.42876338e+01, -1.30335240e-01,
      

In [22]:
model.fit(new_values, data.target)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


In [23]:
y_pred = model.predict(new_values)

In [24]:
print(classification_report(data.target, y_pred))

              precision    recall  f1-score   support

           0       0.96      0.93      0.95        59
           1       0.93      0.96      0.94        71
           2       0.96      0.96      0.96        48

    accuracy                           0.95       178
   macro avg       0.95      0.95      0.95       178
weighted avg       0.95      0.95      0.95       178



In [52]:
data.iloc[:,3:5]

AttributeError: iloc