In [46]:
import numpy as np
import os
import pandas as pd
import sklearn.model_selection as skmc
import sklearn.svm as svm
import sklearn.metrics as skmet
from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.tree import DecisionTreeClassifier
from sklearn.neural_network import MLPClassifier

In [47]:
directory = os.path.dirname(os.getcwd())
iris_path = os.path.join(directory, 'Data/iris.data')
iris_df = pd.read_csv(iris_path)
features_df = iris_df.values[:, :-1]
target_df = iris_df.values[:, -1:]

In [48]:
classes, counts = np.unique(target_df, return_counts = True)
distribution_of_classes = dict(zip(classes, counts))
display(distribution_of_classes)

{'Iris-setosa': 49, 'Iris-versicolor': 50, 'Iris-virginica': 50}

In [49]:
def iris_gaussian_classifier(X, y):
    X_train, X_test, y_train, y_test = skmc.train_test_split(X, y, test_size = 0.2)
    iris_classifier = SVC(kernel = 'rbf')
    iris_classifier.fit(X_train, y_train.ravel())
    prediction = iris_classifier.predict(X_test)
    accuracy = skmet.accuracy_score(y_test, prediction)
    return prediction, accuracy

In [50]:
def iris_decision_tree_classifier(X,y):
    X_train, X_test, y_train, y_test = skmc.train_test_split(X, y, test_size = 0.2)
    classifier = DecisionTreeClassifier()
    classifier.fit(X_train, y_train)
    prediction = classifier.predict(X_test)
    accuracy = skmet.accuracy_score(y_test, prediction)
    return prediction, accuracy

In [51]:
def iris_NN_classifier(X,y):
    X_train, X_test, y_train, y_test = skmc.train_test_split(X, y, test_size = 0.2)
    classifier = MLPClassifier(hidden_layer_sizes = (100,), max_iter = 1000)
    classifier.fit(X_train, y_train.ravel())
    prediction = classifier.predict(X_test)
    accuracy = skmet.accuracy_score(y_test, prediction)
    return prediction, accuracy

In [52]:
#Data manipulation
scalar = StandardScaler()
pca = PCA()
features_scaled = scalar.fit_transform(features_df)
features_PCA = pca.fit_transform(features_df)
features_scaled_PCA = pca.fit_transform(features_scaled)
features_pca_scaled = scalar.fit_transform(features_PCA)
features_scaled_df = pd.DataFrame(features_scaled)
features_pca_df = pd.DataFrame(features_PCA)
features_scaled_PCA_df = pd.DataFrame(features_scaled_PCA)
features_PCA_scaled_df = pd.DataFrame(features_pca_scaled)

In [53]:
def repeated_testing_tree(target):
    count = 0
    avg_ori = 0
    avg_scaled = 0
    avg_pca = 0
    avg_scaled_and_pca = 0
    avg_pca_and_scaled = 0
    while (count < target):
        _, acc = iris_decision_tree_classifier(features_df, target_df)
        _, acc1 = iris_decision_tree_classifier(features_scaled_df, target_df)
        _, acc2 = iris_decision_tree_classifier(features_pca_df, target_df)
        _, acc3 = iris_decision_tree_classifier(features_scaled_PCA_df, target_df)
        _, acc4 = iris_decision_tree_classifier(features_scaled_PCA_df, target_df)
        avg_ori  = avg_ori + acc/target
        avg_scaled = avg_scaled + acc1/target
        avg_pca = avg_pca + acc2/target
        avg_scaled_and_pca = avg_scaled_and_pca + acc3/target
        avg_pca_and_scaled = avg_pca_and_scaled + acc4/target
        count = count + 1
    
    print(avg_ori, avg_scaled, avg_pca, avg_scaled_and_pca, avg_pca_and_scaled)

def repeated_testing_gaussian(target):
    count = 0
    avg_ori = 0
    avg_scaled = 0
    avg_pca = 0
    avg_scaled_and_pca = 0
    avg_pca_and_scaled = 0
    while (count < target):
        _, acc = iris_gaussian_classifier(features_df, target_df)
        _, acc1 = iris_gaussian_classifier(features_scaled_df, target_df)
        _, acc2 = iris_gaussian_classifier(features_pca_df, target_df)
        _, acc3 = iris_gaussian_classifier(features_scaled_PCA_df, target_df)
        _, acc4 = iris_gaussian_classifier(features_scaled_PCA_df, target_df)
        avg_ori  = avg_ori + acc/target
        avg_scaled = avg_scaled + acc1/target
        avg_pca = avg_pca + acc2/target
        avg_scaled_and_pca = avg_scaled_and_pca + acc3/target
        avg_pca_and_scaled = avg_pca_and_scaled + acc4/target
        count = count + 1
    
    print(avg_ori, avg_scaled, avg_pca, avg_scaled_and_pca, avg_pca_and_scaled)

def repeated_testing_NN(target):
    count = 0
    avg_ori = 0
    avg_scaled = 0
    avg_pca = 0
    avg_scaled_and_pca = 0
    avg_pca_and_scaled = 0
    while (count < target):
        _, acc = iris_NN_classifier(features_df, target_df)
        _, acc1 = iris_NN_classifier(features_scaled_df, target_df)
        _, acc2 = iris_NN_classifier(features_pca_df, target_df)
        _, acc3 = iris_NN_classifier(features_scaled_PCA_df, target_df)
        _, acc4 = iris_NN_classifier(features_scaled_PCA_df, target_df)
        avg_ori  = avg_ori + acc/target
        avg_scaled = avg_scaled + acc1/target
        avg_pca = avg_pca + acc2/target
        avg_scaled_and_pca = avg_scaled_and_pca + acc3/target
        avg_pca_and_scaled = avg_pca_and_scaled + acc4/target
        count = count + 1
    
    print(avg_ori, avg_scaled, avg_pca, avg_scaled_and_pca, avg_pca_and_scaled)