In [2]:
import sys
import os
import pandas
import csv
from sklearn import svm, preprocessing, neighbors, ensemble, neural_network, naive_bayes
import joblib

In [4]:
training_data = pandas.read_csv('train_test_datasets/features_train_70000.csv') #import our training data from the csv file (56,000 image)
testing_data = pandas.read_csv('train_test_datasets/features_test_70000.csv') #import our test data from the csv file (14,000 image)

x_train, y_train = training_data.drop([' Tag'], axis=1), training_data[' Tag']
x_test, y_test = testing_data.drop([' Tag'], axis=1), testing_data[' Tag']

scaler = preprocessing.MinMaxScaler(feature_range=(0, 1))
x_train, x_test = scaler.fit_transform(x_train), scaler.fit_transform(x_test) #scale the features in the interval [0:1]

  return self.partial_fit(X, y)
  return self.partial_fit(X, y)


In [6]:
def create_svm_classifier(joblib_file):
    '''
        Train SVM classifier with training set (feature vectors) from csv file,
        and load the trained model in .joblib file
    '''
    classifier = svm.SVC(kernel = 'linear', C=1, gamma='auto')
    classifier.fit(x_train, y_train) # fit svm classifier to the train data
        
    print("SVM classifier:")
    print("Accuracy on train set: ", classifier.score(x_train, y_train))
    print("Accuracy on test set: ", classifier.score(x_test, y_test))
    
    joblib.dump(classifier, joblib_file) # save classifier in the joblib file

In [7]:
def create_knn_classifier(joblib_file, n_neighbors):
    '''
        Train KNN classifier with training set (feature vectors) from csv file,
        and load the trained model in .joblib file
    '''
    classifier = neighbors.KNeighborsClassifier(n_neighbors)
    classifier.fit(x_train, y_train) # fit svm classifier to the train data
    
    print("K-Nearest Neighbors classifier: ")
    print("Accuracy on train set: ", classifier.score(x_train, y_train))
    print("Accuracy on test set: ", classifier.score(x_test, y_test))
    
    joblib.dump(classifier, joblib_file) # save classifier in the joblib file

In [8]:
def create_random_forest_classifier(joblib_file, max_depth, n_estimators, max_features):
    '''
        Train RF classifier with training set (feature vectors) from csv file,
        and load the trained model in .joblib file
    '''
    classifier = ensemble.RandomForestClassifier(max_depth=max_depth, n_estimators=n_estimators, max_features=max_features, min_samples_split=4)
    
    classifier.fit(x_train, y_train) # fit svm classifier to the train data
    
    print("Random Forests classifier: ")
    print("Accuracy on train set: ", classifier.score(x_train, y_train))
    print("Accuracy on test set: ", classifier.score(x_test, y_test))
    
    joblib.dump(classifier, joblib_file) # save classifier in the joblib file

In [9]:
def create_mlp_classifier(joblib_file, alpha):
    '''
        Train MLP classifier with training set (feature vectors) from csv file,
        and load the trained model in .joblib file
    '''    
    classifier = neural_network.MLPClassifier(alpha=alpha, max_iter=100)
    classifier.fit(x_train, y_train) # fit svm classifier to the train data
    y_prediction = classifier.predict(x_test)
    
    print("MLP NN classifier: ")
    print("Accuracy on train set: ", classifier.score(x_train, y_train))
    print("Accuracy on test set: ", classifier.score(x_test, y_test))
    
    joblib.dump(classifier, joblib_file) # save classifier in the joblib file

In [10]:
def create_nb_classifier(joblib_file):
    '''
        Train Naive Bayes classifier with training set (feature vectors) from csv file,
        and load the trained model in .joblib file
    '''    
    classifier = naive_bayes.GaussianNB()
    classifier.fit(x_train, y_train) # fit svm classifier to the train data
    y_prediction = classifier.predict(x_test)
    
    print("Naive Bayes classifier: ")
    print("Accuracy on train set: ", classifier.score(x_train, y_train))
    print("Accuracy on test set: ", classifier.score(x_test, y_test))
    
    joblib.dump(classifier, joblib_file) # save classifier in the joblib file

In [8]:
create_svm_classifier('trained_classifiers/svm-classifier.joblib')
create_knn_classifier('trained_classifiers/knn-classifier.joblib', 7)
create_random_forest_classifier('trained_classifiers/rf-classifier.joblib', 15, 200, 4)
create_mlp_classifier('trained_classifiers/mlp-classifier.joblib', .01)
create_nb_classifier('trained_classifiers/nb-classifier.joblib')

SVM classifier:
Accuracy on train set:  0.9048733013089519
Accuracy on test set:  0.9029930709336381
K-Nearest Neighbors classifier: 
Accuracy on train set:  0.9436775656708155
Accuracy on test set:  0.9294235302521608
Random Forests classifier: 
Accuracy on train set:  0.9846961552884873
Accuracy on test set:  0.9311379384241731




MLP NN classifier: 
Accuracy on train set:  0.926998696405293
Accuracy on test set:  0.922423030216444
Naive Bayes classifier: 
Accuracy on train set:  0.898480330005893
Accuracy on test set:  0.8510607900564326
