In [2]:
# Imports
from cmath import e
from time import time
import cv2
import numpy as np
from skimage.feature import graycomatrix, graycoprops
from skimage.measure import shannon_entropy as Entropy
from sklearn import preprocessing, svm
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier,VotingClassifier,BaggingClassifier,AdaBoostClassifier,GradientBoostingClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split,GridSearchCV,cross_validate
from sklearn.neural_network import MLPClassifier
from sklearn.utils import shuffle
from sklearn.metrics import accuracy_score
from sklearn.svm import SVC
from tuning import svmTuner
import utils
import os
import pandas as pd
import numpy as np

In [3]:
"""
    extractGLCM(filename, outputFileName):
    - filename: path to the image
    - outputFileName: name of the output file
    - returns: numpy array of features
"""
def extractGLCM(filename, outputFileName):
    img = cv2.imread(filename)
    
    # Extract Gray Level Channel
    img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    # crop image to remove the bottom part of the image 

    img = cv2.resize(img,(int(img.shape[1]/2),int(img.shape[0]/2)),interpolation=cv2.INTER_AREA)
    img = cv2.GaussianBlur(img,(5,5),0)

    from LBP.commonfunctions import show_images
    # show_images([img])
    
    
    
    step = [1]  # step size
    step = np.asarray(step)
    angle = [0, np.pi/4, np.pi/2, 3*np.pi/4]  # angles (0, 45, 90, 135)

    coOccuranceMat = graycomatrix(
        img, step, angle, levels=256, normed=True)

    # calculate the GLCM properties
    contrast = graycoprops(coOccuranceMat, prop='contrast')    
    correlation = graycoprops(coOccuranceMat, prop='correlation')
    energy = graycoprops(coOccuranceMat, prop='energy')
    homogeneity = graycoprops(coOccuranceMat, prop='homogeneity')
    # ASM = graycoprops(coOccuranceMat, prop='ASM')
    
    # entropy = []
    # entropy.insert(0, Entropy(coOccuranceMat[0, 0, :, :]))
    # entropy.insert(1, Entropy(coOccuranceMat[0, 1, :, :]))
    # entropy.insert(2, Entropy(coOccuranceMat[1, 0, :, :]))
    # entropy.insert(3, Entropy(coOccuranceMat[1, 1, :, :]))
    # entropy = np.array(entropy)

    # calculate Entropy
    # entropy(i,j) = -sum(p(i,j) * log(p(i,j)))
    entropy = -np.sum(coOccuranceMat * np.log(coOccuranceMat + 1e-100), axis=(0, 1))
    

    
    # append all features to a numpy array
    features = np.array([contrast.flatten(), correlation.flatten(),
                        homogeneity.flatten(), entropy.flatten(), energy.flatten()])

    features = features.flatten()
    features = features.reshape(1, -1)
    

    with open(outputFileName+'.csv', 'a') as csvfile:
        np.savetxt(csvfile, features, fmt='%f', delimiter=',')
        csvfile.close()
    return features

In [4]:
# extractGLCM('Females/Females/F5.jpg','d')

In [5]:
def removeFileIfExists(fileName):
    if os.path.isfile(fileName):
        os.remove(fileName)

In [6]:
def writeFeaturesToFile(features, fileName):
    with open(fileName, 'a') as csvfile:
        np.savetxt(csvfile, features, fmt='%f', delimiter=',')
        csvfile.close()

In [7]:
def writeHeadersOfCSVFile(fileName):
    with open(fileName, 'a') as csvfile:
        np.savetxt(csvfile, [], delimiter=',',
                   header='Contrast1,Contrast2,Contrast3,Contrast4,homogeneity1,homogeneity2,homogeneity3,homogeneity4,energy1,energy2,energy3,energy4,correlation1,correlation2,correlation3,correlation4,entropy1,entropy2,entropy3,entropy4')
        csvfile.close()


In [8]:
def readFeaturesFromFile(fileName):
    CSVData = open(fileName)
    features = np.genfromtxt(CSVData, delimiter=",")
    return features

In [9]:
def extractFeaturesFromFolder(folder,outputFileName,gender):
    train_classes=[]
    features=[]
    for filename in os.listdir(folder):
        try:
            features.append(extractGLCM(folder+filename,outputFileName))
            train_classes.append(gender)
        except Exception as e:
            print(e)
            continue
    return np.array(features),np.array(train_classes)


In [10]:
def extractICDARFeatures():
    features=[]
    # read csv file
    df = pd.read_csv('train_answers.csv')
    # get the labels
    icdar_classes = df['male'].values
    print(icdar_classes.shape)
    icdar_classes_train = np.array([])
    i = 0
    for filename in os.listdir('images_gender/images/train'):
        try:
            features.append(extractGLCM('images_gender/images/train/'+filename,'icdar'))
            icdar_classes_train = np.append(icdar_classes_train, icdar_classes[i//2])
            i = i + 1
        except Exception as e:
            print(e)
            continue
    icdar_classes  = icdar_classes_train
    return np.array(features),np.array(icdar_classes)

In [11]:
def getBestParamsForANN(X_train,Y_train,X_test,Y_test):
    scaler = preprocessing.StandardScaler().fit(X_train)
    solver = ['adam']
    alpha = [0.0001, 0.001, 0.01, 0.1, 1]
    max_iter = [1000, 2000, 3000, 4000]
    layer_sizes = [(5,),(5,5),(15,),(10,10)]
    scores = []
    scores_train = []
    with open('ann.csv','w') as csvfile:
        np.savetxt(csvfile, [], delimiter=',',
                   header='solver,alpha,max_iter,layer_size,score_train,score')
    for i in range(len(solver)):
        for j in range(len(alpha)):
            for k in range(len(max_iter)):
                for l in range(len(layer_sizes)):
                    clf = MLPClassifier(solver=solver[i], alpha=alpha[j], max_iter=max_iter[k],
                            hidden_layer_sizes=(layer_sizes[l]),random_state=1)
                    clf.fit(scaler.transform(X_train), Y_train)
                    temp_train = clf.score(scaler.transform(X_train), Y_train)
                    scores_train.append((temp_train,i,j,k,l))
                    print("Accuracy on training set: {:.2f}".format(
                        temp_train))
                    temp = clf.score(scaler.transform(X_test), Y_test)
                    scores.append((temp,i,j,k,l))
                    print("Accuracy on test set: {:.4f}".format(
                        temp))
                    with open('ann.csv','a') as csvfile:
                        np.savetxt(csvfile, np.array([[solver[i],alpha[j],max_iter[k],layer_sizes[l],temp_train,temp]]), delimiter=',',fmt="%s")
                    print("\n")  
    
    # print max score and score_training together with params
    print("\n")
    print(max(scores))
    print((max(scores_train)))
  

In [12]:
def getBestParamsForSVM(X_train,Y_train,X_test,Y_test):
    scaler = preprocessing.StandardScaler().fit(X_train)
    C = [ 0.01, 0.1, 1, 10, 100,1000]
    gamma = [0.001, 0.01, 0.1, 1]
    kernel = ['linear','rbf']
    scores = []
    scores_train = []
    
    with open('svm_results0.csv','w') as csvfile:
        np.savetxt(csvfile, [], delimiter=',',
                   header='C,gamma,score_train,score')
    for i in range(len(C)):
        for j in range(len(gamma)):
            for k in range(len(kernel)):
                clf = SVC(C=C[i], gamma=gamma[j],kernel=kernel[k],class_weight='balanced',random_state=1)
                clf.fit(scaler.transform( X_train), Y_train)
                temp_train = clf.score(scaler.transform(X_train), Y_train)
                print("Accuracy on training set: {:.2f}".format(
                    temp_train))    
                scores_train.append((temp_train,i,j))
                temp = clf.score(scaler.transform( X_test), Y_test)
                print("Accuracy on test set: {:.4f}".format(
                    temp))
                scores.append((temp,i,j))
                with open('svm_results0.csv', 'a') as csvfile:
                    np.savetxt(csvfile, np.array([[C[i],gamma[j],temp_train,temp]]), delimiter=',',fmt='%s')
        
    print(max(scores))
    print(max(scores_train))
        
    


In [13]:
def FeaturesFromScratch():
    removeFileIfExists('female.csv')
    removeFileIfExists('male.csv')
    removeFileIfExists('icdar.csv')

    writeHeadersOfCSVFile('female.csv')
    writeHeadersOfCSVFile('male.csv')
    writeHeadersOfCSVFile('icdar.csv')

    f_features,f_classes = extractFeaturesFromFolder('Females/Females/','female',0)
    f_features =  f_features.reshape(f_features.shape[0], -1)
    print(f_features.shape)
    print(f_classes.shape)
    m_features,m_classes = extractFeaturesFromFolder('Males/Males/','male',1)
    m_features =  m_features.reshape(m_features.shape[0], -1)
    print(m_features.shape)
    print(m_classes.shape)
    i_features,i_classes = extractICDARFeatures()
    i_features = i_features.reshape(i_features.shape[0], -1)
    print(i_features.shape)
    print(i_classes.shape)



In [14]:
def getBestParamsForRandomForest(X_train,Y_train,X_test,Y_test):
    n_estimators = [10,50,100,200]
    max_depth = [2,5,10,20,50]
    min_samples_split = [2,5,10,20,50]
    min_samples_leaf = [1,2,5,10,20]
    max_features = ['auto']
    bootstrap = [True,False]
    scores = []
    scores_train = []
    with open('randomForest0.csv', 'w') as csvfile:
        np.savetxt(csvfile,[],
                   header='n_estimators,max_depth,min_samples_split,min_samples_leaf,max_features,bootstrap,score_training,score',
                   delimiter=',', fmt='%s')
    for i in range(len(n_estimators)):
        for j in range(len(max_depth)):
            for k in range(len(min_samples_split)):
                for l in range(len(min_samples_leaf)):
                    for m in range(len(max_features)):
                        for n in range(len(bootstrap)):
                                clf = RandomForestClassifier(n_estimators=n_estimators[i],max_depth=max_depth[j],min_samples_split=min_samples_split[k],
                                                             min_samples_leaf=min_samples_leaf[l],max_features=max_features[m],bootstrap=bootstrap[n],
                                                             criterion='gini',random_state=1)
                                clf.fit(X_train, Y_train)
                                temp_train = clf.score(X_train, Y_train)
                                scores_train.append((temp_train,i,j,k,l,m,n))
                                print("Accuracy on training set: {:.2f}".format(
                                    temp_train))
                                temp = clf.score( X_test, Y_test)
                                scores.append((temp,i,j,k,l,m,n))
                                print("Accuracy on test set: {:.4f}".format(
                                    temp))
                                print("\n")
                                with open('randomForest0.csv', 'a') as csvFile:
                                    np.savetxt(csvFile, np.array([[n_estimators[i],max_depth[j],min_samples_split[k],min_samples_leaf[l],max_features[m],bootstrap[n],temp_train,temp]]), delimiter=',', fmt='%s')
    print(max(scores))
    print(max(scores_train))

In [15]:
def randomForest(X_train,Y_train,X_test,Y_test):
    scaler = preprocessing.StandardScaler().fit(X_train)
    clf = RandomForestClassifier(n_estimators=10,max_depth=5,min_samples_split=10,min_samples_leaf=1,bootstrap=False,max_features='log2',criterion='gini',random_state=1)
    clf.fit(scaler.transform(X_train), Y_train)
    # score = clf.score(X_test, Y_test)
    predicted_labels = clf.predict(scaler.transform(X_test))
    score = accuracy_score(Y_test, predicted_labels)
    print("Accuracy on test set: {:.4f}".format(score))
    print("Accuracy on training set: {:.4f}".format(clf.score(scaler.transform( X_train), Y_train)))
    return predicted_labels


In [16]:
def svm(X_train,Y_train,X_test,Y_test):
    # train the classifier and predict the test data
    scaler = preprocessing.StandardScaler().fit(X_train)
    print("Training the classifier...")          
    clf = SVC(C=5000.0, gamma=0.001,kernel='rbf',class_weight='balanced',random_state=1)
    clf.fit(scaler.transform( X_train), Y_train) 
    
    print("Predicting the test data...")
    # score_training = clf.score(scaler.transform( X_train), Y_train) 
    # score = clf.score(scaler.transform(X_test), Y_test)
    predicted_labels = clf.predict(scaler.transform(X_test))
    score = accuracy_score(Y_test, predicted_labels)
    print("Accuracy on test set: {:.4f}".format(score))
    print("Accuracy on training set: {:.4f}".format(clf.score(scaler.transform( X_train), Y_train)))
    return predicted_labels

In [17]:
def DecisionTree(X_train,Y_train,X_test,Y_test):
    scaler = preprocessing.StandardScaler().fit(X_train)
    clf = DecisionTreeClassifier(random_state=1,max_depth=5,min_samples_split=50,min_samples_leaf=10,max_features='auto',criterion='gini')
    clf.fit(scaler.transform(X_train), Y_train)
    # score = clf.score(X_test, Y_test)
    predicted_labels = clf.predict(scaler.transform(X_test))
    score = accuracy_score(Y_test, predicted_labels)
    print("Accuracy on test set: {:.4f}".format(score))
    print("Accuracy on training set: {:.4f}".format(clf.score(scaler.transform(X_train), Y_train)))
    return predicted_labels

In [18]:
def crossValidation(X_train,Y_train):
    # X_train,Y_train = shuffle(X_train,Y_train)
    scores = cross_validate(RandomForestClassifier(n_estimators=50,max_depth=100,min_samples_split=20,min_samples_leaf=5,bootstrap=True,max_features='log2',criterion='gini',random_state=1), X_train, Y_train, cv=5,return_train_score=True)
    print(sorted(scores.keys()))
    print(scores['test_score'])
    print(scores['train_score'])

In [19]:
def getBestParamsForDT(X_train,Y_train,X_test,Y_test):
    max_depth = [2,5,10,20,50,100]
    min_samples_split = [2,5,10,20,50]
    min_samples_leaf = [1,2,5,10,20]
    max_features = ['auto','sqrt','log2']
    scores = []
    scores_train = []
    scaler = preprocessing.StandardScaler().fit(X_train)
    with open('decisionTree.csv', 'w') as csvfile:
        np.savetxt(csvfile,[],
                   header='max_depth,min_samples_split,min_samples_leaf,max_features,score_training,score',
                   delimiter=',', fmt='%s')
    for i in range(len(max_depth)):
        for j in range(len(min_samples_split)):
            for k in range(len(min_samples_leaf)):
                for l in range(len(max_features)):
                        clf = DecisionTreeClassifier(random_state=1,max_depth=max_depth[i],min_samples_split=min_samples_split[j],
                                                     min_samples_leaf=min_samples_leaf[k],max_features=max_features[l],criterion='gini')
                        clf.fit(scaler.transform(X_train), Y_train)
                        temp_train = clf.score(scaler.transform( X_train), Y_train)
                        scores_train.append((temp_train,i,j,k,l))
                        print("Accuracy on training set: {:.2f}".format(
                            temp_train))
                        temp = clf.score(scaler.transform( X_test), Y_test)
                        scores.append((temp,i,j,k,l))
                        print("Accuracy on test set: {:.4f}".format(
                            temp))
                        print("\n")
                        with open('decisionTree.csv', 'a') as csvFile:
                            np.savetxt(csvFile, np.array([[max_depth[i],min_samples_split[j],min_samples_leaf[k],max_features[l],temp_train,temp]]), delimiter=',', fmt='%s')
    

In [20]:
# FeaturesFromScratch()

In [21]:

# FeaturesFromScratch() 
f_features = readFeaturesFromFile('female.csv')
m_features = readFeaturesFromFile('male.csv')
i_features = readFeaturesFromFile('icdar.csv')


# f_features = np.delete(f_features,np.arange(13,17),1)
# m_features = np.delete(m_features,np.arange(13,17),1)
# i_features = np.delete(i_features,np.arange(13,17),1)

# f_features = f_features[:,[0,4,8,12]]
# m_features = m_features[:,[0,4,8,12]]
# i_features = i_features[:,[0,4,8,12]]


print(m_features.shape)

train_classes = []
# read csv file
df = pd.read_csv('train_answers.csv')
# get the labels
icdar_classes = df['male'].values
print(icdar_classes.shape)
icdar_classes_train = np.array([])

for i in range(1, 132):
    try:
        train_classes.append(0)
    except Exception as e:
        print(e)
        continue

for i in range(1, 233):
    try:
        train_classes.append(1)
    except Exception as e:
        print(e)
        continue

for i in range(0, 564):
    try:
        icdar_classes_train = np.append(icdar_classes_train, icdar_classes[i//2])
        i = i + 1
    except Exception as e:
        print(e)
        continue

icdar_classes = icdar_classes_train

X_train = np.concatenate((f_features,m_features,i_features),axis=0)
Y_train = np.concatenate((train_classes,icdar_classes),axis=0)



 
X_train, X_test, Y_train, Y_test = train_test_split(
    X_train, Y_train, test_size=.2,random_state=1)

# DecisionTree(X_train,Y_train,X_test,Y_test)
# svm_ = svm(X_train,Y_train,X_test,Y_test)
# randomForest_ = randomForest(X_train,Y_train,X_test,Y_test)

getBestParamsForANN(X_train,Y_train,X_test,Y_test)
getBestParamsForSVM(X_train,Y_train,X_test,Y_test)
# svm(X_train,Y_train,X_test,Y_test)
# crossValidation(X_train,Y_train)
# getBestParamsForRandomForest(X_train,Y_train,X_test,Y_test)
# getBestParamsForDT(X_train,Y_train,X_test,Y_test)


(232, 20)
(282,)
Accuracy on training set: 0.62
Accuracy on test set: 0.5753




  np.savetxt(csvfile, np.array([[solver[i],alpha[j],max_iter[k],layer_sizes[l],temp_train,temp]]), delimiter=',',fmt="%s")


Accuracy on training set: 0.66
Accuracy on test set: 0.5699




  np.savetxt(csvfile, np.array([[solver[i],alpha[j],max_iter[k],layer_sizes[l],temp_train,temp]]), delimiter=',',fmt="%s")
  np.savetxt(csvfile, np.array([[solver[i],alpha[j],max_iter[k],layer_sizes[l],temp_train,temp]]), delimiter=',',fmt="%s")


Accuracy on training set: 0.70
Accuracy on test set: 0.5753




  np.savetxt(csvfile, np.array([[solver[i],alpha[j],max_iter[k],layer_sizes[l],temp_train,temp]]), delimiter=',',fmt="%s")


Accuracy on training set: 0.72
Accuracy on test set: 0.5376


Accuracy on training set: 0.62
Accuracy on test set: 0.5753




  np.savetxt(csvfile, np.array([[solver[i],alpha[j],max_iter[k],layer_sizes[l],temp_train,temp]]), delimiter=',',fmt="%s")


Accuracy on training set: 0.66
Accuracy on test set: 0.5699




  np.savetxt(csvfile, np.array([[solver[i],alpha[j],max_iter[k],layer_sizes[l],temp_train,temp]]), delimiter=',',fmt="%s")


Accuracy on training set: 0.70
Accuracy on test set: 0.5860




  np.savetxt(csvfile, np.array([[solver[i],alpha[j],max_iter[k],layer_sizes[l],temp_train,temp]]), delimiter=',',fmt="%s")


Accuracy on training set: 0.73
Accuracy on test set: 0.5376




  np.savetxt(csvfile, np.array([[solver[i],alpha[j],max_iter[k],layer_sizes[l],temp_train,temp]]), delimiter=',',fmt="%s")


Accuracy on training set: 0.62
Accuracy on test set: 0.5753




  np.savetxt(csvfile, np.array([[solver[i],alpha[j],max_iter[k],layer_sizes[l],temp_train,temp]]), delimiter=',',fmt="%s")


Accuracy on training set: 0.66
Accuracy on test set: 0.5699




  np.savetxt(csvfile, np.array([[solver[i],alpha[j],max_iter[k],layer_sizes[l],temp_train,temp]]), delimiter=',',fmt="%s")


Accuracy on training set: 0.70
Accuracy on test set: 0.5860




  np.savetxt(csvfile, np.array([[solver[i],alpha[j],max_iter[k],layer_sizes[l],temp_train,temp]]), delimiter=',',fmt="%s")


Accuracy on training set: 0.73
Accuracy on test set: 0.5376




  np.savetxt(csvfile, np.array([[solver[i],alpha[j],max_iter[k],layer_sizes[l],temp_train,temp]]), delimiter=',',fmt="%s")


Accuracy on training set: 0.62
Accuracy on test set: 0.5753




  np.savetxt(csvfile, np.array([[solver[i],alpha[j],max_iter[k],layer_sizes[l],temp_train,temp]]), delimiter=',',fmt="%s")


Accuracy on training set: 0.66
Accuracy on test set: 0.5699




  np.savetxt(csvfile, np.array([[solver[i],alpha[j],max_iter[k],layer_sizes[l],temp_train,temp]]), delimiter=',',fmt="%s")


Accuracy on training set: 0.70
Accuracy on test set: 0.5860




  np.savetxt(csvfile, np.array([[solver[i],alpha[j],max_iter[k],layer_sizes[l],temp_train,temp]]), delimiter=',',fmt="%s")


Accuracy on training set: 0.73
Accuracy on test set: 0.5376




  np.savetxt(csvfile, np.array([[solver[i],alpha[j],max_iter[k],layer_sizes[l],temp_train,temp]]), delimiter=',',fmt="%s")


Accuracy on training set: 0.62
Accuracy on test set: 0.5753




  np.savetxt(csvfile, np.array([[solver[i],alpha[j],max_iter[k],layer_sizes[l],temp_train,temp]]), delimiter=',',fmt="%s")


Accuracy on training set: 0.66
Accuracy on test set: 0.5699




  np.savetxt(csvfile, np.array([[solver[i],alpha[j],max_iter[k],layer_sizes[l],temp_train,temp]]), delimiter=',',fmt="%s")
  np.savetxt(csvfile, np.array([[solver[i],alpha[j],max_iter[k],layer_sizes[l],temp_train,temp]]), delimiter=',',fmt="%s")


Accuracy on training set: 0.70
Accuracy on test set: 0.5753


Accuracy on training set: 0.73
Accuracy on test set: 0.5484




  np.savetxt(csvfile, np.array([[solver[i],alpha[j],max_iter[k],layer_sizes[l],temp_train,temp]]), delimiter=',',fmt="%s")


Accuracy on training set: 0.62
Accuracy on test set: 0.5753




  np.savetxt(csvfile, np.array([[solver[i],alpha[j],max_iter[k],layer_sizes[l],temp_train,temp]]), delimiter=',',fmt="%s")


Accuracy on training set: 0.66
Accuracy on test set: 0.5699




  np.savetxt(csvfile, np.array([[solver[i],alpha[j],max_iter[k],layer_sizes[l],temp_train,temp]]), delimiter=',',fmt="%s")


Accuracy on training set: 0.71
Accuracy on test set: 0.5968




  np.savetxt(csvfile, np.array([[solver[i],alpha[j],max_iter[k],layer_sizes[l],temp_train,temp]]), delimiter=',',fmt="%s")


Accuracy on training set: 0.73
Accuracy on test set: 0.5484




  np.savetxt(csvfile, np.array([[solver[i],alpha[j],max_iter[k],layer_sizes[l],temp_train,temp]]), delimiter=',',fmt="%s")


Accuracy on training set: 0.62
Accuracy on test set: 0.5753




  np.savetxt(csvfile, np.array([[solver[i],alpha[j],max_iter[k],layer_sizes[l],temp_train,temp]]), delimiter=',',fmt="%s")


Accuracy on training set: 0.66
Accuracy on test set: 0.5699




  np.savetxt(csvfile, np.array([[solver[i],alpha[j],max_iter[k],layer_sizes[l],temp_train,temp]]), delimiter=',',fmt="%s")


Accuracy on training set: 0.71
Accuracy on test set: 0.5968




  np.savetxt(csvfile, np.array([[solver[i],alpha[j],max_iter[k],layer_sizes[l],temp_train,temp]]), delimiter=',',fmt="%s")


Accuracy on training set: 0.73
Accuracy on test set: 0.5484




  np.savetxt(csvfile, np.array([[solver[i],alpha[j],max_iter[k],layer_sizes[l],temp_train,temp]]), delimiter=',',fmt="%s")


Accuracy on training set: 0.62
Accuracy on test set: 0.5753




  np.savetxt(csvfile, np.array([[solver[i],alpha[j],max_iter[k],layer_sizes[l],temp_train,temp]]), delimiter=',',fmt="%s")


Accuracy on training set: 0.66
Accuracy on test set: 0.5699




  np.savetxt(csvfile, np.array([[solver[i],alpha[j],max_iter[k],layer_sizes[l],temp_train,temp]]), delimiter=',',fmt="%s")


Accuracy on training set: 0.71
Accuracy on test set: 0.5968




  np.savetxt(csvfile, np.array([[solver[i],alpha[j],max_iter[k],layer_sizes[l],temp_train,temp]]), delimiter=',',fmt="%s")


Accuracy on training set: 0.73
Accuracy on test set: 0.5484




  np.savetxt(csvfile, np.array([[solver[i],alpha[j],max_iter[k],layer_sizes[l],temp_train,temp]]), delimiter=',',fmt="%s")


Accuracy on training set: 0.62
Accuracy on test set: 0.5753




  np.savetxt(csvfile, np.array([[solver[i],alpha[j],max_iter[k],layer_sizes[l],temp_train,temp]]), delimiter=',',fmt="%s")


Accuracy on training set: 0.67
Accuracy on test set: 0.5753




  np.savetxt(csvfile, np.array([[solver[i],alpha[j],max_iter[k],layer_sizes[l],temp_train,temp]]), delimiter=',',fmt="%s")
  np.savetxt(csvfile, np.array([[solver[i],alpha[j],max_iter[k],layer_sizes[l],temp_train,temp]]), delimiter=',',fmt="%s")


Accuracy on training set: 0.70
Accuracy on test set: 0.5860




  np.savetxt(csvfile, np.array([[solver[i],alpha[j],max_iter[k],layer_sizes[l],temp_train,temp]]), delimiter=',',fmt="%s")


Accuracy on training set: 0.74
Accuracy on test set: 0.5645


Accuracy on training set: 0.62
Accuracy on test set: 0.5753




  np.savetxt(csvfile, np.array([[solver[i],alpha[j],max_iter[k],layer_sizes[l],temp_train,temp]]), delimiter=',',fmt="%s")


Accuracy on training set: 0.67
Accuracy on test set: 0.5753




  np.savetxt(csvfile, np.array([[solver[i],alpha[j],max_iter[k],layer_sizes[l],temp_train,temp]]), delimiter=',',fmt="%s")


Accuracy on training set: 0.71
Accuracy on test set: 0.5753




  np.savetxt(csvfile, np.array([[solver[i],alpha[j],max_iter[k],layer_sizes[l],temp_train,temp]]), delimiter=',',fmt="%s")


Accuracy on training set: 0.75
Accuracy on test set: 0.5645




  np.savetxt(csvfile, np.array([[solver[i],alpha[j],max_iter[k],layer_sizes[l],temp_train,temp]]), delimiter=',',fmt="%s")


Accuracy on training set: 0.62
Accuracy on test set: 0.5753




  np.savetxt(csvfile, np.array([[solver[i],alpha[j],max_iter[k],layer_sizes[l],temp_train,temp]]), delimiter=',',fmt="%s")


Accuracy on training set: 0.67
Accuracy on test set: 0.5753




  np.savetxt(csvfile, np.array([[solver[i],alpha[j],max_iter[k],layer_sizes[l],temp_train,temp]]), delimiter=',',fmt="%s")


Accuracy on training set: 0.71
Accuracy on test set: 0.5753




  np.savetxt(csvfile, np.array([[solver[i],alpha[j],max_iter[k],layer_sizes[l],temp_train,temp]]), delimiter=',',fmt="%s")


Accuracy on training set: 0.75
Accuracy on test set: 0.5645




  np.savetxt(csvfile, np.array([[solver[i],alpha[j],max_iter[k],layer_sizes[l],temp_train,temp]]), delimiter=',',fmt="%s")


Accuracy on training set: 0.62
Accuracy on test set: 0.5753




  np.savetxt(csvfile, np.array([[solver[i],alpha[j],max_iter[k],layer_sizes[l],temp_train,temp]]), delimiter=',',fmt="%s")


Accuracy on training set: 0.67
Accuracy on test set: 0.5753




  np.savetxt(csvfile, np.array([[solver[i],alpha[j],max_iter[k],layer_sizes[l],temp_train,temp]]), delimiter=',',fmt="%s")


Accuracy on training set: 0.71
Accuracy on test set: 0.5753




  np.savetxt(csvfile, np.array([[solver[i],alpha[j],max_iter[k],layer_sizes[l],temp_train,temp]]), delimiter=',',fmt="%s")


Accuracy on training set: 0.75
Accuracy on test set: 0.5645




  np.savetxt(csvfile, np.array([[solver[i],alpha[j],max_iter[k],layer_sizes[l],temp_train,temp]]), delimiter=',',fmt="%s")


Accuracy on training set: 0.62
Accuracy on test set: 0.5753




  np.savetxt(csvfile, np.array([[solver[i],alpha[j],max_iter[k],layer_sizes[l],temp_train,temp]]), delimiter=',',fmt="%s")


Accuracy on training set: 0.67
Accuracy on test set: 0.5968




  np.savetxt(csvfile, np.array([[solver[i],alpha[j],max_iter[k],layer_sizes[l],temp_train,temp]]), delimiter=',',fmt="%s")


Accuracy on training set: 0.70
Accuracy on test set: 0.5968




  np.savetxt(csvfile, np.array([[solver[i],alpha[j],max_iter[k],layer_sizes[l],temp_train,temp]]), delimiter=',',fmt="%s")


Accuracy on training set: 0.73
Accuracy on test set: 0.5538




  np.savetxt(csvfile, np.array([[solver[i],alpha[j],max_iter[k],layer_sizes[l],temp_train,temp]]), delimiter=',',fmt="%s")


Accuracy on training set: 0.62
Accuracy on test set: 0.5753




  np.savetxt(csvfile, np.array([[solver[i],alpha[j],max_iter[k],layer_sizes[l],temp_train,temp]]), delimiter=',',fmt="%s")


Accuracy on training set: 0.67
Accuracy on test set: 0.5968




  np.savetxt(csvfile, np.array([[solver[i],alpha[j],max_iter[k],layer_sizes[l],temp_train,temp]]), delimiter=',',fmt="%s")


Accuracy on training set: 0.70
Accuracy on test set: 0.5968




  np.savetxt(csvfile, np.array([[solver[i],alpha[j],max_iter[k],layer_sizes[l],temp_train,temp]]), delimiter=',',fmt="%s")


Accuracy on training set: 0.73
Accuracy on test set: 0.5538




  np.savetxt(csvfile, np.array([[solver[i],alpha[j],max_iter[k],layer_sizes[l],temp_train,temp]]), delimiter=',',fmt="%s")


Accuracy on training set: 0.62
Accuracy on test set: 0.5753




  np.savetxt(csvfile, np.array([[solver[i],alpha[j],max_iter[k],layer_sizes[l],temp_train,temp]]), delimiter=',',fmt="%s")


Accuracy on training set: 0.67
Accuracy on test set: 0.5968




  np.savetxt(csvfile, np.array([[solver[i],alpha[j],max_iter[k],layer_sizes[l],temp_train,temp]]), delimiter=',',fmt="%s")


Accuracy on training set: 0.70
Accuracy on test set: 0.5968




  np.savetxt(csvfile, np.array([[solver[i],alpha[j],max_iter[k],layer_sizes[l],temp_train,temp]]), delimiter=',',fmt="%s")


Accuracy on training set: 0.73
Accuracy on test set: 0.5538




  np.savetxt(csvfile, np.array([[solver[i],alpha[j],max_iter[k],layer_sizes[l],temp_train,temp]]), delimiter=',',fmt="%s")


Accuracy on training set: 0.62
Accuracy on test set: 0.5753




  np.savetxt(csvfile, np.array([[solver[i],alpha[j],max_iter[k],layer_sizes[l],temp_train,temp]]), delimiter=',',fmt="%s")


Accuracy on training set: 0.67
Accuracy on test set: 0.5968




  np.savetxt(csvfile, np.array([[solver[i],alpha[j],max_iter[k],layer_sizes[l],temp_train,temp]]), delimiter=',',fmt="%s")


Accuracy on training set: 0.70
Accuracy on test set: 0.5968




  np.savetxt(csvfile, np.array([[solver[i],alpha[j],max_iter[k],layer_sizes[l],temp_train,temp]]), delimiter=',',fmt="%s")


Accuracy on training set: 0.73
Accuracy on test set: 0.5538




  np.savetxt(csvfile, np.array([[solver[i],alpha[j],max_iter[k],layer_sizes[l],temp_train,temp]]), delimiter=',',fmt="%s")


Accuracy on training set: 0.62
Accuracy on test set: 0.5968




  np.savetxt(csvfile, np.array([[solver[i],alpha[j],max_iter[k],layer_sizes[l],temp_train,temp]]), delimiter=',',fmt="%s")


Accuracy on training set: 0.62
Accuracy on test set: 0.5860




  np.savetxt(csvfile, np.array([[solver[i],alpha[j],max_iter[k],layer_sizes[l],temp_train,temp]]), delimiter=',',fmt="%s")


Accuracy on training set: 0.64
Accuracy on test set: 0.6075




  np.savetxt(csvfile, np.array([[solver[i],alpha[j],max_iter[k],layer_sizes[l],temp_train,temp]]), delimiter=',',fmt="%s")


Accuracy on training set: 0.66
Accuracy on test set: 0.5753




  np.savetxt(csvfile, np.array([[solver[i],alpha[j],max_iter[k],layer_sizes[l],temp_train,temp]]), delimiter=',',fmt="%s")


Accuracy on training set: 0.62
Accuracy on test set: 0.5968




  np.savetxt(csvfile, np.array([[solver[i],alpha[j],max_iter[k],layer_sizes[l],temp_train,temp]]), delimiter=',',fmt="%s")


Accuracy on training set: 0.62
Accuracy on test set: 0.5860




  np.savetxt(csvfile, np.array([[solver[i],alpha[j],max_iter[k],layer_sizes[l],temp_train,temp]]), delimiter=',',fmt="%s")


Accuracy on training set: 0.64
Accuracy on test set: 0.6075




  np.savetxt(csvfile, np.array([[solver[i],alpha[j],max_iter[k],layer_sizes[l],temp_train,temp]]), delimiter=',',fmt="%s")


Accuracy on training set: 0.66
Accuracy on test set: 0.5753




  np.savetxt(csvfile, np.array([[solver[i],alpha[j],max_iter[k],layer_sizes[l],temp_train,temp]]), delimiter=',',fmt="%s")


Accuracy on training set: 0.62
Accuracy on test set: 0.5968




  np.savetxt(csvfile, np.array([[solver[i],alpha[j],max_iter[k],layer_sizes[l],temp_train,temp]]), delimiter=',',fmt="%s")


Accuracy on training set: 0.62
Accuracy on test set: 0.5860




  np.savetxt(csvfile, np.array([[solver[i],alpha[j],max_iter[k],layer_sizes[l],temp_train,temp]]), delimiter=',',fmt="%s")


Accuracy on training set: 0.64
Accuracy on test set: 0.6075




  np.savetxt(csvfile, np.array([[solver[i],alpha[j],max_iter[k],layer_sizes[l],temp_train,temp]]), delimiter=',',fmt="%s")


Accuracy on training set: 0.66
Accuracy on test set: 0.5753




  np.savetxt(csvfile, np.array([[solver[i],alpha[j],max_iter[k],layer_sizes[l],temp_train,temp]]), delimiter=',',fmt="%s")


Accuracy on training set: 0.62
Accuracy on test set: 0.5968




  np.savetxt(csvfile, np.array([[solver[i],alpha[j],max_iter[k],layer_sizes[l],temp_train,temp]]), delimiter=',',fmt="%s")


Accuracy on training set: 0.62
Accuracy on test set: 0.5860




  np.savetxt(csvfile, np.array([[solver[i],alpha[j],max_iter[k],layer_sizes[l],temp_train,temp]]), delimiter=',',fmt="%s")


Accuracy on training set: 0.64
Accuracy on test set: 0.6075




  np.savetxt(csvfile, np.array([[solver[i],alpha[j],max_iter[k],layer_sizes[l],temp_train,temp]]), delimiter=',',fmt="%s")


Accuracy on training set: 0.66
Accuracy on test set: 0.5753




(0.6075268817204301, 0, 4, 3, 2)
(0.7462887989203779, 0, 2, 3, 3)


  np.savetxt(csvfile, np.array([[solver[i],alpha[j],max_iter[k],layer_sizes[l],temp_train,temp]]), delimiter=',',fmt="%s")


In [22]:
# gridSearchCv for svm 
scaler = preprocessing.StandardScaler().fit(X_train)
param_grid = {'C': [0.1, 1, 10, 100, 1000,5000], 'gamma': [1, 0.1, 0.01, 0.001, 0.0001], 'kernel': ['rbf','linear'], 'class_weight': ['balanced']}
grid = GridSearchCV(SVC(random_state=1), param_grid, cv=5, verbose=3)
grid.fit(scaler.transform(X_train), Y_train)
print(grid.best_params_)
print(grid.best_score_)
print(grid.score(scaler.transform(X_test), Y_test))
print(grid.score(scaler.transform(X_train), Y_train))


Fitting 5 folds for each of 60 candidates, totalling 300 fits
[CV 1/5] END C=0.1, class_weight=balanced, gamma=1, kernel=rbf;, score=0.544 total time=   0.0s
[CV 2/5] END C=0.1, class_weight=balanced, gamma=1, kernel=rbf;, score=0.561 total time=   0.0s
[CV 3/5] END C=0.1, class_weight=balanced, gamma=1, kernel=rbf;, score=0.628 total time=   0.0s
[CV 4/5] END C=0.1, class_weight=balanced, gamma=1, kernel=rbf;, score=0.608 total time=   0.0s
[CV 5/5] END C=0.1, class_weight=balanced, gamma=1, kernel=rbf;, score=0.574 total time=   0.0s
[CV 1/5] END C=0.1, class_weight=balanced, gamma=1, kernel=linear;, score=0.530 total time=   0.0s
[CV 2/5] END C=0.1, class_weight=balanced, gamma=1, kernel=linear;, score=0.534 total time=   0.0s
[CV 3/5] END C=0.1, class_weight=balanced, gamma=1, kernel=linear;, score=0.547 total time=   0.0s
[CV 4/5] END C=0.1, class_weight=balanced, gamma=1, kernel=linear;, score=0.628 total time=   0.0s
[CV 5/5] END C=0.1, class_weight=balanced, gamma=1, kernel=lin