In [1]:
from sklearn.datasets import load_iris
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline

iris = load_iris()
columns = ['sepal_length', 'sepal_width', 'petal_length', 'petal_width']
irisDF = pd.DataFrame(iris.data, columns=columns)
irisDF['target'] = iris.target
irisDF.head(3)



Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,target
0,5.1,3.5,1.4,0.2,0
1,4.9,3.0,1.4,0.2,0
2,4.7,3.2,1.3,0.2,0


# StandardScaler

In [3]:
from sklearn.preprocessing import StandardScaler

iris_scaled = StandardScaler().fit_transform(irisDF.iloc[:, :-1])

## PCA with 2 components 

In [4]:
from sklearn.decomposition import PCA

pca = PCA(n_components=2)

pca.fit(iris_scaled)
iris_pca = pca.transform(iris_scaled)
print(iris_pca.shape)

(150, 2)


## Create a DF

In [6]:
pca_columns = ['pca_component_1','pca_component_2'] # pca components name (new features)
irisDF_pca = pd.DataFrame(iris_pca, columns=pca_columns) # DF, iris_pca ndarray + column nmaes
irisDF_pca['target'] = iris.target # add target series
irisDF_pca.head(3)

Unnamed: 0,pca_component_1,pca_component_2,target
0,-2.264703,0.480027,0
1,-2.080961,-0.674134,0
2,-2.364229,-0.341908,0


In [18]:
explainable = pca.explained_variance_ratio_
print('PCA result can be explained by {0:.2f}, + {1:.2f} = {2}'.format(explainable[0], explainable[1], explainable))


PCA result can be explained by 0.73, + 0.23 = [0.72962445 0.22850762]


## BEFORE PCA, Random Forest result

In [21]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import cross_val_score
import numpy as np

rcf = RandomForestClassifier(random_state=42)
scores = cross_val_score(rcf, iris.data, iris.target, scoring = 'accuracy', cv=3)

In [22]:
print(scores)
print(np.mean(scores))

[0.98 0.94 0.98]
0.9666666666666667


## After PCA, Random Forest result

In [24]:
pca_X = irisDF_pca[['pca_component_1', 'pca_component_2']]
scores_pca = cross_val_score(rcf, pca_X, iris.target, scoring='accuracy', cv=3)
print(scores_pca)
print(np.mean(scores_pca))

[0.88 0.88 0.9 ]
0.8866666666666667
