# TASK-1: PCA
- use the sklearn utility to load the *wine dataset* (`sklearn.dataset.load_wine`)
- apply PCA to reduce the dimensionality of the dataset and to enable its visualization
- visualize the reduced dataset using a scatter plot, representing instances from different classes with different colors.

In [None]:
import matplotlib.pyplot as plt

from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.datasets import load_wine
import pandas as pd
import numpy as np

In [None]:
dataset = load_wine()

In [None]:
dir(dataset)

In [None]:
print(dataset.DESCR)

In [None]:
df = pd.DataFrame(dataset.data, columns  = dataset.feature_names)
df['Class'] = dataset.target
df

In [None]:
df.drop('Class', axis = 1).describe().T

In [None]:
X = df.drop('Class', axis = 1).values

### Apply PCA on unscaled data

In [None]:
pca_unscaled = PCA(n_components = 2)
X_unscaled_PCA = pca_unscaled.fit_transform(X)

In [None]:
np.round(pca_unscaled.components_, 3)

Note that:
- the first PC roughly corresponds to the last actual component
- the second PC roughly corresponds to the fourth actual component

In [None]:
pca_unscaled.explained_variance_ratio_

###  Apply PCA on scaled data

In [None]:
pca_scaled = PCA(n_components = 2)
X_scaled_PCA = pca_scaled.fit_transform(StandardScaler().fit_transform(X))

In [None]:
np.round(pca_scaled.components_, 3)

In [None]:
pca_scaled.explained_variance_ratio_

###  Visualize the transformed dataset on a scatter plot

In [None]:
fig, (ax1, ax2, ax3) = plt.subplots(1, 3, figsize = (15, 5))

target_classes = range(0, 3)
colors = ("blue", "red", "green")
markers = ("^", "s", "o")

ax1.scatter(X[:, -1], X[:, 4], c = df['Class'])
ax1.set_xlabel('proline')
ax1.set_ylabel('magnesium')
ax1.set_title('Subspace original data')

ax2.scatter(X_unscaled_PCA[:, 0], X_unscaled_PCA[:, 1], c = df['Class'])
ax2.set_xlabel('Principal Component #1')
ax2.set_ylabel('Principal Component #2')
ax2.set_title('PCA without scaling')

ax3.scatter(X_scaled_PCA[:, 0], X_scaled_PCA[:, 1], c = df['Class'])
ax3.set_xlabel('Principal Component #1')
ax3.set_ylabel('Principal Component #2')
ax3.set_title('PCA with scaling')

plt.tight_layout()
plt.show()

Notebook adapted from https://scikit-learn.org/stable/auto_examples/preprocessing/plot_scaling_importance.html
    