In [32]:
#all imports
#this will be ran first to get all necessary libraries
import numpy as np
import pandas as pd

#used for random forest 
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier

#used for Support Vector Machine
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split #will be used for other methods, only need to import once
from sklearn.metrics import accuracy_score

#used for AdaBoost
from sklearn.ensemble import AdaBoostClassifier

#used for knn
from sklearn.neighbors import KNeighborsClassifier

#used for neural network
from sklearn.neural_network import MLPClassifier

#image libraries
from skimage.transform import resize
from skimage.io import imread

#To navigate the files and save pathways
import os
import matplotlib.pyplot as plt

#to store the parameters found for later
import pickle

import warnings #these two lines git rid of the big red warnings
warnings.simplefilter('ignore')

In [27]:
#next, we will go ahead and bring in the test images

datadir = 'data/' #I named the file storing the pictures as data
categories = ['ChickenHead', 'ElephantHead']

flat_data_arr = []
target_array = []

for i in categories:
    print('loading category: ', i) #just to ensure its working
    path = os.path.join(datadir, i) #path begins to store file path to images
    print(path) #again, just to ensure its working
    for img in os.listdir(path):
        img_array = imread(os.path.join(path, img)) #reads from the file path
        img_resized = resize (img_array, (150,150,3)) #resizes the pictures found in the file path
        flat_data_arr.append(img_resized.flatten())
        target_array.append(categories.index(i)) #adds the edited image to the array
    print('Loaded category: ', i)

loading category:  ChickenHead
data/ChickenHead
Loaded category:  ChickenHead
loading category:  ElephantHead
data/ElephantHead
Loaded category:  ElephantHead


In [28]:
flat_data = np.array(flat_data_arr)
target = np.array(target_array)

df = pd.DataFrame(flat_data)
df['Target'] = target

df.head() #should display a numeric version of the data. 

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,67491,67492,67493,67494,67495,67496,67497,67498,67499,Target
0,0.258824,0.258824,0.219608,0.231373,0.231373,0.192157,0.168627,0.168627,0.129412,0.180392,...,0.321569,0.321569,0.290196,0.301961,0.301961,0.270588,0.290196,0.290196,0.258824,0
1,0.121569,0.227451,0.105882,0.129412,0.227451,0.113725,0.176471,0.254902,0.14902,0.219608,...,0.784314,0.670588,0.513725,0.772549,0.658824,0.501961,0.768627,0.654902,0.498039,0
2,0.415686,0.698039,0.258824,0.384314,0.666667,0.235294,0.34902,0.619608,0.203922,0.321569,...,0.423529,0.615686,0.266667,0.435294,0.627451,0.270588,0.380392,0.572549,0.215686,0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0
4,0.784314,0.729412,0.623529,0.776471,0.721569,0.615686,0.737255,0.682353,0.576471,0.690196,...,0.717647,0.643137,0.517647,0.72549,0.65098,0.52549,0.721569,0.647059,0.521569,0


In [30]:
#next, we will go ahead and bring in the test images

filename = 'test/chicken1.png'

img_array = imread(filename)
img_resized = resize(img_array, (150,150,3))
test_data = np.array (img_resized.flatten())
test_data1 = test_data.reshape(1, -1)


filename = 'test/chicken2.jpg'

img_array = imread(filename)
img_resized = resize(img_array, (150,150,3))
test_data = np.array (img_resized.flatten())
test_data2 = test_data.reshape(1, -1)


filename = 'test/chicken3.jpg'

img_array = imread(filename)
img_resized = resize(img_array, (150,150,3))
test_data = np.array (img_resized.flatten())
test_data3 = test_data.reshape(1, -1)


filename = 'test/elephent1.jpg'

img_array = imread(filename)
img_resized = resize(img_array, (150,150,3))
test_data = np.array (img_resized.flatten())
test_data4 = test_data.reshape(1, -1)


filename = 'test/elephent2.jpg'

img_array = imread(filename)
img_resized = resize(img_array, (150,150,3))
test_data = np.array (img_resized.flatten())
test_data5 = test_data.reshape(1, -1)

In [4]:
X = df.values[:, :-1]
Y = df.values[:, -1]

xtrain, xtest, ytrain, ytest = train_test_split(X, Y, test_size = .2, random_state = 47)

In [39]:
#random forest
bestTest = 0
bestTestAt = 0
bestTestTrain = 0

for i in range (3, 20, 1):
    dt = DecisionTreeClassifier (criterion = 'entropy', max_depth =i)
    dt.fit (xtrain, ytrain)
    ypred = dt.predict(xtest)
    if(accuracy_score(ypred, ytest) > bestTest):
        bestTest = accuracy_score(ypred, ytest)
        bestTestAt = i
        bestTestTrain = accuracy_score(ypred_train, ytrain)

#printing the best test score and the train score of it
print("The best test score was depth ", bestTestAt, " with a score of ", bestTest)
print("That depth value had a train score of ", bestTestTrain)

model = DecisionTreeClassifier (criterion = 'entropy', max_depth =bestTestAt)
pickle.dump(model, open('FinalForest.p', 'wb'))

The best test score was depth  4  with a score of  0.85
That depth value had a train score of  1.0


In [40]:
#using pickle to retrieve the RandomForest model
model = pickle.load (open('FinalForest.p', 'rb'))
model.fit(xtrain, ytrain)

p1 = model.predict(test_data1)
p2 = model.predict(test_data2)
p3 = model.predict(test_data3)
p4 = model.predict(test_data4)
p5 = model.predict(test_data5)

if p1 == 0: print ('Chicken')
else: 
    print('Elephant')
    
if p2 == 0: print ('Chicken')
else: 
    print('Elephant')
    
if p3 == 0: print ('Chicken')
else: 
    print('Elephant')
    
if p4 == 0: print ('Chicken')
else: 
    print('Elephant')
    
if p5 == 0: print ('Chicken')
else: 
    print('Elephant')

Chicken
Chicken
Elephant
Chicken
Elephant


In [9]:
#SVM
bestTest = 0
bestTestTrain = 0
bestTrain = 0
bestTrainTest = 0
bestTestAt = 0
bestTrainAt = 0

for i in range(1, 10): 
    s = SVC(kernel = 'linear', C = i)
    s.fit(xtrain, ytrain)
    ypred_test = s.predict(xtest)
    ypred_train = s.predict(xtrain)
    if (accuracy_score(ypred_test, ytest) > bestTest):
        bestTest = accuracy_score(ypred_test, ytest)
        bestTestTrain = accuracy_score(ypred_train, ytrain)
        bestTestAt = i
    #still have this code in, but ended up not using it
    #test accuracy is more important than train accuracy
    if (accuracy_score(ypred_train, ytrain) > bestTrain):
        bestTrain = accuracy_score(ypred_train, ytrain)
        bestTrainTest = accuracy_score(ypred_test, ytest)
        bestTrainAt = i

#printing the best test score and the train score of it
print("The best test score was at ", bestTestAt, " with a score of ", bestTest)
print("That C value had a train score of ", bestTestTrain)

model = SVC(kernel = 'linear', C = bestTestAt) #saving the best test score
pickle.dump(model, open('FinalSVM.p', 'wb'))

The best test score was at  1  with a score of  0.95
That C value had a train score of  1.0


In [31]:
#using pickle to retrieve the SVM model
model = pickle.load (open('FinalSVM.p', 'rb'))
model.fit(xtrain, ytrain)

p1 = model.predict(test_data1)
p2 = model.predict(test_data2)
p3 = model.predict(test_data3)
p4 = model.predict(test_data4)
p5 = model.predict(test_data5)

if p1 == 0: print ('Chicken')
else: 
    print('Elephant')
    
if p2 == 0: print ('Chicken')
else: 
    print('Elephant')
    
if p3 == 0: print ('Chicken')
else: 
    print('Elephant')
    
if p4 == 0: print ('Chicken')
else: 
    print('Elephant')
    
if p5 == 0: print ('Chicken')
else: 
    print('Elephant')
    
    

Chicken
Chicken
Chicken
Elephant
Elephant


In [44]:
#Adaboost
bestTest = 0
bestTestAt = 0
bestTestTrain = 0

for e in range (5, 50):
    clf = AdaBoostClassifier(base_estimator = None, n_estimators = e, learning_rate = 1.0)
    clf.fit(xtrain, ytrain)
    if(clf.score(xtest, ytest) > bestTest):
        bestTest = clf.score(xtest, ytest)
        bestTestAt = e
        bestTestTrain = clf.score(xtrain, ytrain)

#printing the best test score and the train score of it
print("The best test score was depth ", bestTestAt, " with a score of ", bestTest)
print("That depth value had a train score of ", bestTestTrain)

model = AdaBoostClassifier(base_estimator = None, n_estimators = bestTestAt, learning_rate = 1.0) #saving the best test score
pickle.dump(model, open('FinalAda.p', 'wb'))

The best test score was depth  22  with a score of  0.95
That depth value had a train score of  1.0


In [None]:
#using pickle to retrieve the Ada model
model = pickle.load (open('FinalAda.p', 'rb'))
model.fit(xtrain, ytrain)

p1 = model.predict(test_data1)
p2 = model.predict(test_data2)
p3 = model.predict(test_data3)
p4 = model.predict(test_data4)
p5 = model.predict(test_data5)

if p1 == 0: print ('Chicken')
else: 
    print('Elephant')
    
if p2 == 0: print ('Chicken')
else: 
    print('Elephant')
    
if p3 == 0: print ('Chicken')
else: 
    print('Elephant')
    
if p4 == 0: print ('Chicken')
else: 
    print('Elephant')
    
if p5 == 0: print ('Chicken')
else: 
    print('Elephant')

In [41]:
#KNN
bestTest = 0
bestTestAt = 0
bestTestTrain = 0

for k in range(1, 41):
    knn = KNeighborsClassifier (n_neighbors = k)
    knn.fit (xtrain, ytrain)
    
    ypred = dt.predict(xtest)
    if(accuracy_score(ypred, ytest) > bestTest):
        bestTest = accuracy_score(ypred, ytest)
        bestTestAt = i
        bestTestTrain = accuracy_score(ypred_train, ytrain)
        
#printing the best test score and the train score of it
print("The best test score was depth ", bestTestAt, " with a score of ", bestTest)
print("That depth value had a train score of ", bestTestTrain)

model = KNeighborsClassifier (n_neighbors = bestTestAt) #saving the best test score
pickle.dump(model, open('FinalKNN.p', 'wb'))

The best test score was depth  19  with a score of  0.8
That depth value had a train score of  1.0


In [None]:
#using pickle to retrieve the KNN model
model = pickle.load (open('FinalKNN.p', 'rb'))
model.fit(xtrain, ytrain)

p1 = model.predict(test_data1)
p2 = model.predict(test_data2)
p3 = model.predict(test_data3)
p4 = model.predict(test_data4)
p5 = model.predict(test_data5)

if p1 == 0: print ('Chicken')
else: 
    print('Elephant')
    
if p2 == 0: print ('Chicken')
else: 
    print('Elephant')
    
if p3 == 0: print ('Chicken')
else: 
    print('Elephant')
    
if p4 == 0: print ('Chicken')
else: 
    print('Elephant')
    
if p5 == 0: print ('Chicken')
else: 
    print('Elephant')

In [45]:
#Neural Network
bestTest = 0
bestTestAt = 0
bestTestTrain = 0

for s in range(1, 5):
    r = MLPClassifier(hidden_layer_size = (s,), activation = 'relu')
    r.fit (xtrain, ytrain)
    
    ypred = r.predict(xtest)
    if(accuracy_score(ypred, ytest) > bestTest):
        bestTest = accuracy_score(ypred, ytest)
        bestTestAt = i
        bestTestTrain = accuracy_score(ypred_train, ytrain)
        
print("The best test score was depth ", bestTestAt, " with a score of ", bestTest)
print("That depth value had a train score of ", bestTestTrain)

model = MLPClassifier(hidden_layer_size = (bestTestAt,), activation = 'relu') #saving the best test score
pickle.dump(model, open('FinalNeural.p', 'wb'))


TypeError: __init__() got an unexpected keyword argument 'hidden_layer_size'

In [None]:
#using pickle to retrieve the Neural Network model
model = pickle.load (open('FinalNeural.p', 'rb'))
model.fit(xtrain, ytrain)

p1 = model.predict(test_data1)
p2 = model.predict(test_data2)
p3 = model.predict(test_data3)
p4 = model.predict(test_data4)
p5 = model.predict(test_data5)

if p1 == 0: print ('Chicken')
else: 
    print('Elephant')
    
if p2 == 0: print ('Chicken')
else: 
    print('Elephant')
    
if p3 == 0: print ('Chicken')
else: 
    print('Elephant')
    
if p4 == 0: print ('Chicken')
else: 
    print('Elephant')
    
if p5 == 0: print ('Chicken')
else: 
    print('Elephant')