In [1]:
# pip install python-mnist will install the required package
from mnist import MNIST
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn import metrics

In [2]:
np.random.seed(60) # reproducability
mndata = MNIST('Datasets/MNIST')

# read training images and corresponding labels
tr_images, tr_labels = mndata.load_training()
# read test images and corresponding labels
tt_images, tt_labels = mndata.load_testing()

# convert lists into numpy format and apply normalization
tr_images = np.array(tr_images) / 255. # shape (60000, 784)
tr_labels = np.array(tr_labels)         # shape (60000,)
tt_images = np.array(tt_images) / 255. # shape (10000, 784)
tt_labels = np.array(tt_labels)         # shape (10000,)

columns_images = ['p{}'.format(i+1) for i in range(784)]
tr_df_images = pd.DataFrame(data=tr_images, columns=columns_images)
tr_df_labels = pd.DataFrame(data=tr_labels, columns=['label'])
tt_df_images = pd.DataFrame(data=tt_images, columns=columns_images)
tt_df_labels = pd.DataFrame(data=tt_labels, columns=['label'])

In [3]:
X_train, X_test, y_train, y_test = train_test_split(tr_df_images, tr_df_labels, test_size=0.2, random_state=0)

In [4]:
dtc = DecisionTreeClassifier(random_state=0, max_depth = 40, min_samples_split = 2, min_samples_leaf=2, max_leaf_nodes=1000, min_impurity_decrease=0.00003)

In [5]:
# Train Decision Tree Classifer
dtc = dtc.fit(X_train,y_train)
y_pred = dtc.predict(X_test)
print("Accuracy:",metrics.accuracy_score(y_test, y_pred))

Accuracy: 0.8724166666666666


In [6]:
from sklearn.ensemble import RandomForestClassifier
rf=RandomForestClassifier(n_estimators=20)
rf.fit(X_train,y_train)
pred=rf.predict(X_test)
print(metrics.accuracy_score(y_test, pred))

  rf.fit(X_train,y_train)


0.9595833333333333


In [7]:
N=20

In [8]:
trained_trees = []
for i in range(N):
    X_train_subtree = X_train.sample(frac=1, replace=True, random_state=i)
    Y_train_subtree = y_train.sample(frac=1, replace=True, random_state=i)
    dtc = DecisionTreeClassifier(random_state=0)
    dtc = dtc.fit(X_train_subtree,Y_train_subtree)
    y_pred = dtc.predict(X_test)
    print("Accuracy of",i+1,"th Tree:",metrics.accuracy_score(y_test, y_pred))
    trained_trees.append(dtc)


Accuracy of 1 th Tree: 0.8490833333333333
Accuracy of 2 th Tree: 0.8491666666666666
Accuracy of 3 th Tree: 0.852
Accuracy of 4 th Tree: 0.85275
Accuracy of 5 th Tree: 0.8564166666666667
Accuracy of 6 th Tree: 0.8475833333333334
Accuracy of 7 th Tree: 0.8441666666666666
Accuracy of 8 th Tree: 0.84775
Accuracy of 9 th Tree: 0.8550833333333333
Accuracy of 10 th Tree: 0.855
Accuracy of 11 th Tree: 0.8511666666666666
Accuracy of 12 th Tree: 0.8513333333333334
Accuracy of 13 th Tree: 0.8544166666666667
Accuracy of 14 th Tree: 0.8525833333333334
Accuracy of 15 th Tree: 0.8515833333333334
Accuracy of 16 th Tree: 0.8515
Accuracy of 17 th Tree: 0.8479166666666667
Accuracy of 18 th Tree: 0.8450833333333333
Accuracy of 19 th Tree: 0.8544166666666667
Accuracy of 20 th Tree: 0.85


In [9]:
total_predictions = trained_trees[0].predict(X_test)
for i in range(1, N):
    total_predictions = np.vstack([total_predictions, trained_trees[i].predict(X_test)])
print(total_predictions)

[[3 6 6 ... 5 1 6]
 [3 6 6 ... 5 1 6]
 [3 6 6 ... 5 1 6]
 ...
 [3 6 6 ... 5 1 6]
 [3 6 6 ... 5 1 6]
 [3 6 6 ... 5 1 6]]


In [10]:
total_predictions = np.transpose(total_predictions)
total_predictions

array([[3, 3, 3, ..., 3, 3, 3],
       [6, 6, 6, ..., 6, 6, 6],
       [6, 6, 6, ..., 6, 6, 6],
       ...,
       [5, 5, 5, ..., 5, 5, 5],
       [1, 1, 1, ..., 1, 1, 1],
       [6, 6, 6, ..., 6, 6, 6]], dtype=uint8)

In [11]:
predicted_values = []
for row in total_predictions:
    majority_vote = np.bincount(row).argmax()
    predicted_values.append(majority_vote)

In [12]:
predicted_values = np.asarray(predicted_values)

In [13]:
print("Accuracy of Random Forest Tree:",metrics.accuracy_score(y_test, predicted_values))

Accuracy of Random Forest Tree: 0.951


In [14]:
# Genetic algorithm
def evolve(trees):
    return

def crossover(tree1, tree2):
    return

def mutate(tree):
    return

def tournament(trees):
    return best
