In [1]:
import numpy as np
import pandas as pd
from sklearn.metrics import confusion_matrix, accuracy_score
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.decomposition import FactorAnalysis as FA

In [2]:
url = "https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data"
names = ['sepal-length', 'sepal-width', 'petal-length', 'petal-width', 'Class']
dataset = pd.read_csv(url, names=names)
dataset.head()

Unnamed: 0,sepal-length,sepal-width,petal-length,petal-width,Class
0,5.1,3.5,1.4,0.2,Iris-setosa
1,4.9,3.0,1.4,0.2,Iris-setosa
2,4.7,3.2,1.3,0.2,Iris-setosa
3,4.6,3.1,1.5,0.2,Iris-setosa
4,5.0,3.6,1.4,0.2,Iris-setosa


In [3]:
y = dataset['Class']
X = dataset.drop('Class', axis=1)


In [4]:
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.3,random_state=8)

In [5]:
# Step 8: Scale the data
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

In [6]:
# Step 9: Create a RandomForestClassifier train it on scaled data and print its
# accuracy score and confusion matrix

classifier = RandomForestClassifier(max_depth=2, random_state=8)
classifier.fit(X_train,y_train)
y_pred = classifier.predict(X_test)

In [7]:
cm = confusion_matrix(y_test,y_pred)
print(f'confusion matrix:\n{cm}')
print(f'Accuracy: {accuracy_score(y_test,y_pred)}')

confusion matrix:
[[15  0  0]
 [ 0 15  1]
 [ 0  4 10]]
Accuracy: 0.8888888888888888


In [8]:
# Step 10: Create FactorAnalysis or FA instance and transform x_train and x_test

fa = FA(n_components=1)
X_train = fa.fit_transform(X_train,y_train)
X_test = fa.transform(X_test)

In [9]:
# Step 11: Create a RandomForestClassifier train it on scaled and transformed data
# and print its accuracy score and confusion matrix

classifier = RandomForestClassifier(max_depth=2,random_state=0)
classifier.fit(X_train,y_train)
y_pred = classifier.predict(X_test)
y_pred

array(['Iris-setosa', 'Iris-setosa', 'Iris-setosa', 'Iris-virginica',
       'Iris-versicolor', 'Iris-setosa', 'Iris-setosa', 'Iris-virginica',
       'Iris-virginica', 'Iris-versicolor', 'Iris-versicolor',
       'Iris-setosa', 'Iris-versicolor', 'Iris-versicolor',
       'Iris-virginica', 'Iris-virginica', 'Iris-virginica',
       'Iris-virginica', 'Iris-virginica', 'Iris-virginica',
       'Iris-virginica', 'Iris-setosa', 'Iris-versicolor',
       'Iris-versicolor', 'Iris-versicolor', 'Iris-setosa',
       'Iris-virginica', 'Iris-setosa', 'Iris-setosa', 'Iris-virginica',
       'Iris-setosa', 'Iris-setosa', 'Iris-setosa', 'Iris-virginica',
       'Iris-versicolor', 'Iris-virginica', 'Iris-virginica',
       'Iris-versicolor', 'Iris-setosa', 'Iris-versicolor',
       'Iris-versicolor', 'Iris-setosa', 'Iris-versicolor',
       'Iris-versicolor', 'Iris-virginica'], dtype=object)

In [10]:
cm = confusion_matrix(y_test, y_pred)
print(f'confusion_matrix: \n{cm}\n')
print(f'Accuracy {accuracy_score(y_test, y_pred):.2f}')

confusion_matrix: 
[[15  0  0]
 [ 0 13  3]
 [ 0  1 13]]

Accuracy 0.91
