In [4]:
import csv
import numpy as np
import math
from sklearn.cluster import KMeans
from sklearn.linear_model import LogisticRegression
import tensorflow as tf
from tqdm import tqdm_notebook
import pandas as pd

In [5]:
def import_data(file_path_input_diffn_pairs, file_path_input_same_pairs,human_observed_features):
    data_input_diffn_pairs = []
    data_input_same_pairs = []
    human_observed_features_matrix = []
    
    with open(file_path_input_same_pairs, 'rU') as f:
        reader = csv.reader(f)
        for row in reader:
            dataRow = []
            for column in row:
                dataRow.append(column)
            data_input_same_pairs.append(dataRow)
    
    with open(file_path_input_diffn_pairs, 'rU') as f:
        reader = csv.reader(f)
        for row in reader:
            dataRow = []
            for column in row:
                dataRow.append(column)
            data_input_diffn_pairs.append(dataRow) 

    with open(human_observed_features, 'rU') as f:
        reader = csv.reader(f)
        for row in reader:
            dataRow = []
            for column in row:
                dataRow.append(column)
            human_observed_features_matrix.append(dataRow) 
    
    input_same_pairs = np.array(data_input_same_pairs)
    input_diffn_pairs = np.array(data_input_diffn_pairs)
    human_observed_features_list = np.array(human_observed_features_matrix)
    
    return input_same_pairs, input_diffn_pairs, human_observed_features_list

# Partition the target vector as 80% of total for training
def GenerateTrainingTarget(rawTraining,TrainingPercent):
    TrainingLen = int(math.ceil(len(rawTraining)*(TrainingPercent*0.01)))
    t           = rawTraining[:TrainingLen]
    return t

def GenerateTestingTarget(rawTraining,TrainingCount, TestingPercent = 10):
    T_len = int(math.ceil(len(rawTraining)*0.01*TestingPercent))
    V_End = TrainingCount + T_len
    dataMatrix = rawTraining[TrainingCount+1:V_End]
    return dataMatrix

# Partition the data(input values) vector as 80% of total for training
def GenerateTrainingDataMatrix(rawData, TrainingPercent):
    T_len = int(math.ceil(len(rawData)*0.01*TrainingPercent))
    d2 = rawData[:T_len]
    return d2

def GenerateValidationDataMatrix(rawData, TrainingCount, ValPercent = 10):
    T_len = int(math.ceil(len(rawData)*0.01*ValPercent))
    V_End = TrainingCount + T_len
    dataMatrix = rawData[TrainingCount+1:V_End]
    return dataMatrix

def GenerateBigSigma(Data, MuMatrix,IsSynthetic):
    DataT       = np.transpose(Data)
    BigSigma    = np.zeros((len(DataT),len(DataT)))
    varVect     = []
    for i in range(0,len(np.transpose(Data[0]))):
        vct = []
        for j in range(0,1266):
            vct.append(DataT[i][j])    
        varVect.append(np.var(vct))
    
    for j in range(len(DataT)):
        BigSigma[j][j] = varVect[j]
    if IsSynthetic == True:
        BigSigma = np.dot(3,BigSigma)
    else:
        BigSigma = np.dot(200,BigSigma)
    return BigSigma

# Intermediate calculations of Gaussian Radial basis function
def GetScalar(DataRow,MuRow, BigSigInv):  
    R = np.subtract(DataRow,MuRow)
    T = np.dot(BigSigInv,np.transpose(R))  
    L = np.dot(R,T)
    return L

# Calculation of Gaussian Radial basis function using it's vector form of formula
def GetRadialBasisOut(DataRow,MuRow, BigSigInv):    
    phi_x = math.exp(-0.5*GetScalar(DataRow,MuRow,BigSigInv))
    return phi_x

def GetPhiMatrix(Data, MuMatrix, BigSigma, TrainingLen):
    PHI = np.zeros((int(TrainingLen),len(MuMatrix))) 
    BigSigInv = np.linalg.inv(BigSigma)
    for  C in range(0,len(MuMatrix)):
        for R in range(0,int(TrainingLen)):
            PHI[R][C] = GetRadialBasisOut(Data[R], MuMatrix[C], BigSigInv)
    return PHI

# Calculation of weight vector using the derived form of original linear regression equation.
def GetWeightsClosedForm(PHI, T, Lambda):
    Lambda_I = np.identity(len(PHI[0]))
    for i in range(0,len(PHI[0])):
        Lambda_I[i][i] = Lambda
    PHI_T       = np.transpose(PHI)
    PHI_SQR     = np.dot(PHI_T,PHI)
    PHI_SQR_LI  = np.add(Lambda_I,PHI_SQR)
    PHI_SQR_INV = np.linalg.inv(PHI_SQR_LI)
    INTER       = np.dot(PHI_SQR_INV, PHI_T)
    W           = np.dot(INTER, T)
    return W

In [6]:
def GetValTest(VAL_PHI,W):
    Y = np.dot(W,np.transpose(VAL_PHI))
    return Y

def GetErms(VAL_TEST_OUT,ValDataAct):
    sum = 0.0
    t=0
    accuracy = 0.0
    counter = 0
    val = 0.0
    for i in range (0,len(VAL_TEST_OUT)):
        sum = sum + math.pow((ValDataAct[i] - VAL_TEST_OUT[i]),2)
        if(np.around(VAL_TEST_OUT[i], 0) == np.around(ValDataAct[i],0)):
            counter+=1
    accuracy = ((counter/len(VAL_TEST_OUT))*100)
    return (str(accuracy) + ',' +  str(math.sqrt(sum/len(VAL_TEST_OUT))))

def GradientDescent(W, TRAINING_PHI, VAL_PHI, TEST_PHI, output_data_training, output_data_validation, output_data_testing):
    print("Entered GD")
    W_Now        = W
    La           = 2
    learningRate = 0.01
    L_Erms_Val   = []
    L_Erms_TR    = []
    L_Erms_Test  = []
    L_Accuracy_TR = 0.0
    L_Accuracy_Val = 0.0
    L_Accuracy_Test = 0.0

    for i in range(0,1000):
        Delta_E_D     = -np.dot((output_data_training[i]- np.dot(np.transpose(W_Now),TRAINING_PHI[i])),TRAINING_PHI[i])
        La_Delta_E_W  = np.dot(La,W_Now)
        Delta_E       = np.add(Delta_E_D,La_Delta_E_W)    
        Delta_W       = -np.dot(learningRate,Delta_E)
        W_T_Next      = W_Now + Delta_W
        W_Now         = W_T_Next

        #-----------------TrainingData Accuracy---------------------#
        TR_TEST_OUT   = GetValTest(TRAINING_PHI,W_T_Next) 
        Erms_TR       = GetErms(TR_TEST_OUT,output_data_training)
        L_Erms_TR.append(float(Erms_TR.split(',')[1]))
        L_Accuracy_TR = float(Erms_TR.split(',')[0])

        #-----------------ValidationData Accuracy---------------------#
        VAL_TEST_OUT  = GetValTest(VAL_PHI,W_T_Next) 
        Erms_Val      = GetErms(VAL_TEST_OUT,output_data_validation)
        L_Erms_Val.append(float(Erms_Val.split(',')[1]))
        L_Accuracy_Val = float(Erms_Val.split(',')[0])

        #-----------------TestingData Accuracy---------------------#
        TEST_OUT      = GetValTest(TEST_PHI,W_T_Next) 
        Erms_Test = GetErms(TEST_OUT,output_data_testing)
        L_Erms_Test.append(float(Erms_Test.split(',')[1]))
        L_Accuracy_Test = float(Erms_Test.split(',')[0])
    
    print ("E_rms Training   = " + str(np.around(min(L_Erms_TR),5)))
    print ("E_rms Validation = " + str(np.around(min(L_Erms_Val),5)))
    print ("E_rms Testing    = " + str(np.around(min(L_Erms_Test),5)))
    print("Training accuracy = " + str(L_Accuracy_TR))    
    print("Validation accuracy = " + str(L_Accuracy_Val))    
    print("Testing accuracy = " + str(L_Accuracy_Test))

In [7]:
def LinearRegressionMethod(input_data_training,input_data_validation,input_data_testing, output_data_training, output_data_validation, output_data_testing):
    C_Lambda = 0.03

    kmeans = KMeans(n_clusters=10, random_state=0).fit(input_data_training)
    Mu = kmeans.cluster_centers_
    
    BigSigma     = GenerateBigSigma(input_data_training, Mu, True)
    TRAINING_PHI = GetPhiMatrix(input_data_training, Mu, BigSigma, 1266)
    VAL_PHI      = GetPhiMatrix(input_data_validation, Mu, BigSigma, 158)
    TEST_PHI     = GetPhiMatrix(input_data_testing, Mu, BigSigma, 156) 
    W            = GetWeightsClosedForm(TRAINING_PHI,output_data_training,0.03)
    GradientDescent(W, TRAINING_PHI, VAL_PHI, TEST_PHI, output_data_training, output_data_validation, output_data_testing)    

In [8]:
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def GetLogisticTarget(W,input_data):
    
    A_temp = np.dot(input_data, W)
    A = sigmoid(A_temp)
    return A
    
def LogisticRegressionOwnImpl(input_data_training, input_data_validation, input_data_testing, output_data_training, output_data_validation, output_data_testing):
    
    NUM_OF_EPOCHS = 1000
    L_Erms_Val   = []
    L_Erms_TR    = []
    L_Erms_Test  = []
    LR = 0.0001
    W = np.random.rand(len(np.transpose(input_data_training)), 1) #(18,)

    for epoch in tqdm_notebook(range(NUM_OF_EPOCHS)):
        
        A = GetLogisticTarget(W, input_data_training)

        Z = np.subtract(A,output_data_training)
        DELTA_W = np.dot(np.transpose(input_data_training), Z)
        W = W - np.dot(LR, DELTA_W)


    #-----------------TrainingData Accuracy---------------------#
    TR_TEST_OUT   = np.dot(input_data_training,W) 
    Erms_TR       = GetErms(TR_TEST_OUT,output_data_training)
    L_Erms_TR.append(float(Erms_TR.split(',')[1]))
    L_Accuracy_TR = float(Erms_TR.split(',')[0])

    #-----------------ValidationData Accuracy---------------------#
    VAL_TEST_OUT  = np.dot(input_data_validation,W) 
    Erms_Val      = GetErms(VAL_TEST_OUT,output_data_validation)
    L_Erms_Val.append(float(Erms_Val.split(',')[1]))
    L_Accuracy_Val = float(Erms_Val.split(',')[0])

    #-----------------TestingData Accuracy---------------------#
    TEST_OUT      = np.dot(input_data_testing,W) 
    Erms_Test = GetErms(TEST_OUT,output_data_testing)
    L_Erms_Test.append(float(Erms_Test.split(',')[1]))
    L_Accuracy_Test = float(Erms_Test.split(',')[0])
    
    print ("E_rms Training   = " + str(np.around((L_Erms_TR),5)))
    print ("E_rms Validation = " + str(np.around((L_Erms_Val),5)))
    print ("E_rms Testing    = " + str(np.around((L_Erms_Test),5)))
    print("Training accuracy = " + str(L_Accuracy_TR))    
    print("Validation accuracy = " + str(L_Accuracy_Val))    
    print("Testing accuracy = " + str(L_Accuracy_Test))

In [9]:
def LogisticMethod(input_data_training, input_data_validation, input_data_testing, output_data_training, output_data_validation, output_data_testing):
    
    logisticRegression = LogisticRegression()
    logisticRegression.fit(input_data_training,output_data_training)

    L_Erms_TR_logistic = []
    L_Erms_VAL_logistic    = []
    L_Erms_Test_logistic  = []
    L_Accuracy_TR = 0.0
    L_Accuracy_Val = 0.0
    L_Accuracy_Test = 0.0

    logistic_output_train_data = logisticRegression.predict(input_data_training)
    Erms_train_logistic = GetErms(logistic_output_train_data,output_data_training)
    L_Erms_TR_logistic.append(float(Erms_train_logistic.split(',')[1]))
    L_Accuracy_Train = float(Erms_train_logistic.split(',')[0])

    logistic_output_valid_data = logisticRegression.predict(input_data_validation)
    # print(logistic_output_valid_data.shape)
    Erms_valid_logistic = GetErms(logistic_output_valid_data,output_data_validation)
    L_Erms_VAL_logistic.append(float(Erms_valid_logistic.split(',')[1]))
    L_Accuracy_Valid = float(Erms_valid_logistic.split(',')[0])

    logistic_output_test_data = logisticRegression.predict(input_data_testing)
    Erms_Test_logistic = GetErms(logistic_output_test_data,output_data_testing)
    L_Erms_Test_logistic.append(float(Erms_Test_logistic.split(',')[1]))
    L_Accuracy_Test = float(Erms_Test_logistic.split(',')[0])

    print ("E_rms Training   = " + str(np.around(min(L_Erms_TR_logistic),5)))
    print ("E_rms Validation = " + str(np.around(min(L_Erms_VAL_logistic),5)))
    print ("E_rms Testing    = " + str(np.around(min(L_Erms_Test_logistic),5)))

    print("Training Accuracy = " + str(L_Accuracy_Train))
    print("Validation Accuracy = " + str(L_Accuracy_Valid))
    print("Testing Accuracy  = " + str(L_Accuracy_Test))

In [10]:
def init_weights(shape):
    return tf.Variable(tf.random_normal(shape,stddev=0.01))

def GetAccuracyNeuralNetworks(processedTestingLabel, predictedTestLabel):
        wrong   = 0
        right   = 0

        predictedTestLabelList = []

        for i,j in zip(processedTestingLabel,predictedTestLabel):

            if np.argmax(i) == j:
                right = right + 1
            else:
                wrong = wrong + 1

        print("Errors: " + str(wrong), " Correct :" + str(right))
        print("Testing Accuracy: " + str(right/(right+wrong)*100))
        
def NeuralNetworkMethod(features, GSC_input_appended_features_shuffled_tr, 
                        GSC_input_appended_features_shuffled_test,
                        GSC_input_appended_target_shuffled_tr,
                        GSC_input_appended_target_shuffled_test):

    NUM_HIDDEN_NEURONS_LAYER_1 = 200
    LEARNING_RATE = 0.05
    NUM_OF_EPOCHS = 1500
    BATCH_SIZE = 128

    x = tf.placeholder(tf.float32, [None, features])
    y = tf.placeholder(tf.float32, [None, 1])
    
    # Initializing the input to hidden layer weights
    input_hidden_weights  = init_weights([features, NUM_HIDDEN_NEURONS_LAYER_1])
    # Initializing the hidden to output layer weights
    hidden_output_weights = init_weights([NUM_HIDDEN_NEURONS_LAYER_1, 1])

    # Computing values at the hidden layer
    hidden_layer = tf.nn.relu(tf.matmul(x, input_hidden_weights))
    # Computing values at the output layer
    output_layer = tf.matmul(hidden_layer, hidden_output_weights)
    
    error_function = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=output_layer, labels=y))

    prediction = tf.argmax(output_layer, 1)
    
    training = tf.train.GradientDescentOptimizer(LEARNING_RATE).minimize(error_function)
    
    training_accuracy = []

    with tf.Session() as sess:

        tf.global_variables_initializer().run()

        for epoch in tqdm_notebook(range(NUM_OF_EPOCHS)):
            
            #Shuffle the Training Dataset at each epoch
            p = np.random.permutation(range(len(GSC_input_appended_features_shuffled_tr)))
            processedTrainingData  = GSC_input_appended_features_shuffled_tr[p]
            processedTrainingLabel = GSC_input_appended_target_shuffled_tr[p]

            # Start batch training
            for start in range(0, len(GSC_input_appended_features_shuffled_tr), BATCH_SIZE):
                end = start + BATCH_SIZE
                sess.run(training, feed_dict={x: processedTrainingData[start:end], 
                                              y: processedTrainingLabel[start:end]})
            # Training accuracy for an epoch
            training_accuracy.append(np.mean(np.argmax(processedTrainingLabel, axis=1) ==
                                 sess.run(prediction, feed_dict={x: processedTrainingData,
                                                                 y: processedTrainingLabel})))
        # Testing
        predictedTestLabel = sess.run(prediction, feed_dict={x: GSC_input_appended_features_shuffled_test})
        GetAccuracyNeuralNetworks(predictedTestLabel, GSC_input_appended_target_shuffled_tr)

        return training_accuracy, predictedTestLabel

def NeuralNetworkMethodGSC(features, GSC_input_appended_features_shuffled_tr, 
                        GSC_input_appended_features_shuffled_test,
                        GSC_input_appended_target_shuffled_tr,
                        GSC_input_appended_target_shuffled_test):

    NUM_HIDDEN_NEURONS_LAYER_1 = 200
    LEARNING_RATE = 0.05
    NUM_OF_EPOCHS = 50
    BATCH_SIZE = 32

    x = tf.placeholder(tf.float32, [None, features])
    y = tf.placeholder(tf.float32, [None, 1])
    
    # Initializing the input to hidden layer weights
    input_hidden_weights  = init_weights([features, NUM_HIDDEN_NEURONS_LAYER_1])
    # Initializing the hidden to output layer weights
    hidden_output_weights = init_weights([NUM_HIDDEN_NEURONS_LAYER_1, 1])

    # Computing values at the hidden layer
    hidden_layer = tf.nn.relu(tf.matmul(x, input_hidden_weights))
    # Computing values at the output layer
    output_layer = tf.matmul(hidden_layer, hidden_output_weights)
    
    error_function = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=output_layer, labels=y))

    prediction = tf.argmax(output_layer, 1)
    
    training = tf.train.GradientDescentOptimizer(LEARNING_RATE).minimize(error_function)
    
    training_accuracy = []

    with tf.Session() as sess:

        tf.global_variables_initializer().run()

        for epoch in tqdm_notebook(range(NUM_OF_EPOCHS)):
            
            #Shuffle the Training Dataset at each epoch
            p = np.random.permutation(range(len(GSC_input_appended_features_shuffled_tr)))
            processedTrainingData  = GSC_input_appended_features_shuffled_tr[p]
            processedTrainingLabel = GSC_input_appended_target_shuffled_tr[p]

            # Start batch training
            for start in range(0, len(GSC_input_appended_features_shuffled_tr), BATCH_SIZE):
                end = start + BATCH_SIZE
                sess.run(training, feed_dict={x: processedTrainingData[start:end], 
                                              y: processedTrainingLabel[start:end]})
            # Training accuracy for an epoch
            training_accuracy.append(np.mean(np.argmax(processedTrainingLabel, axis=1) ==
                                 sess.run(prediction, feed_dict={x: processedTrainingData,
                                                                 y: processedTrainingLabel})))
        # Testing
        predictedTestLabel = sess.run(prediction, feed_dict={x: GSC_input_appended_features_shuffled_test})
        GetAccuracyNeuralNetworks(predictedTestLabel, GSC_input_appended_target_shuffled_tr)

        return training_accuracy, predictedTestLabel

In [20]:
def shuffleDataSet(input_appended_features_same_matrix, input_appended_features_diffn_matrix, input_appended_features_target_same, input_appended_features_target_diffn):
    input_size = len(input_appended_features_same_matrix)

    input_appended_features_same_matrix_withTarget = np.concatenate((np.array(input_appended_features_same_matrix), input_appended_features_target_same.reshape(input_size,1)), axis=1)
    input_appended_features_diffn_matrix_withTarget = np.concatenate((np.array(input_appended_features_same_matrix), input_appended_features_target_diffn.reshape(input_size,1)), axis=1)
    
    input_data_training_appended_withTarget = np.concatenate([input_appended_features_same_matrix_withTarget, input_appended_features_diffn_matrix_withTarget])
    input_data_training_appended_withTarget_shuffled=np.random.shuffle(input_data_training_appended_withTarget)
    
    feature_size = len(np.transpose(input_data_training_appended_withTarget))-1
    input_appended_features_target = input_data_training_appended_withTarget[:,[feature_size]] 
    input_appended_features = input_data_training_appended_withTarget[:,:-1]
    
    return input_appended_features, input_appended_features_target

def partitionDataSet(input_appended_features_target_diffn, TrainingPercent):
    
    training_target_diffn = GenerateTrainingTarget(input_appended_features_target_diffn, TrainingPercent)
    validation_target_diffn = GenerateTestingTarget(input_appended_features_target_diffn,len(training_target_diffn))
    testing_target_diffn = GenerateTestingTarget(input_appended_features_target_diffn, (len(training_target_diffn)+len(validation_target_diffn)))
    return training_target_diffn, validation_target_diffn, testing_target_diffn

# Pre-processing Human Observed Features Data Set

In [112]:
file_path_input_same_pairs = r'same_pairs.csv'
file_path_input_diffn_pairs = r'diffn_pairs.csv'
file_path_human_observed_features = r'HumanObserved-Features-Data.csv'

# Raw data 
input_same_pairs, input_diffn_pairs, human_observed_features_extract = import_data(file_path_input_diffn_pairs, file_path_input_same_pairs, file_path_human_observed_features)

# Deleting headings from raw data
input_same_pairs = np.delete(input_same_pairs,0, 0)
input_diffn_pairs = np.delete(input_diffn_pairs, 0, 0)
human_observed_features_extract = np.delete(human_observed_features_extract,0,1)

# Creating dictionary for img_id and corresponding features
d = {}

for i in range(1, 1027):
    list_features = []
    for j in human_observed_features_extract[i]:
        list_features.append(j)
    d[list_features[0]]  = list(map(float, list_features[1:]))

  
  


In [113]:
# Target for same pair

input_appended_features_target_same = np.full((791,), 1, dtype=float)
training_target_same = np.full((633,), 1, dtype=float)
validation_target_same = GenerateTestingTarget(input_appended_features_target_same,len(training_target_same))
testing_target_same = GenerateTestingTarget(input_appended_features_target_same, (len(training_target_same)+len(validation_target_same)))

# Target for diffn pair
input_appended_features_target_diffn = np.full((791,), 0, dtype=float)

training_target_diffn = GenerateTrainingTarget(input_appended_features_target_diffn, 80)
validation_target_diffn = GenerateTestingTarget(input_appended_features_target_diffn,len(training_target_diffn))
testing_target_diffn = GenerateTestingTarget(input_appended_features_target_diffn, (len(training_target_diffn)+len(validation_target_diffn)))

In [114]:
# Subtracting the two img_id features to create input for same pairs

input_same_pairs = input_same_pairs[:,[0,1]]

input_subtracted_features_same_matrix = []
for i in range(0,791):
    img_id1_features = []
    input_subtracted_features = []
    for j in input_same_pairs[i]:
        img_id1_features.append(d[j])
    input_subtracted_features = [abs(a_i - b_i) for a_i, b_i in zip(img_id1_features[0], img_id1_features[1])]
    input_subtracted_features_same_matrix.append(input_subtracted_features)

input_diffn_pairs = input_diffn_pairs[:,[0,1]]
input_subtracted_features_diffn_matrix = []
for i in range(0,791):
    img_id1_features = []
    input_subtracted_features = []
    for j in input_diffn_pairs[i]:
        img_id1_features.append(d[j])
    input_subtracted_features = [abs(a_i - b_i) for a_i, b_i in zip(img_id1_features[0], img_id1_features[1])]
    input_subtracted_features_diffn_matrix.append(input_subtracted_features)

In [115]:
# Partition into Training, validation and testing

# Input for same pair

training_same_feature_matrix_subtracted = np.array(GenerateTrainingDataMatrix(input_subtracted_features_same_matrix, 80))
validation_same_feature_matrix_subtracted = np.array(GenerateValidationDataMatrix(input_subtracted_features_same_matrix, 633))
testing_same_feature_matrix_subtracted = np.array(GenerateValidationDataMatrix(input_subtracted_features_same_matrix,(633+79)))

# Input for diffn pair

training_diffn_feature_matrix_subtracted = np.array(GenerateTrainingDataMatrix(input_subtracted_features_diffn_matrix, 80))
validation_diffn_feature_matrix_subtracted = np.array(GenerateValidationDataMatrix(input_subtracted_features_diffn_matrix, 633))
testing_diffn_feature_matrix_subtracted = np.array(GenerateValidationDataMatrix(input_subtracted_features_diffn_matrix,(633+79)))

In [116]:
# Final input using subtraction (Combine Same and different pairs input)

input_data_training_subtracted = np.concatenate([training_same_feature_matrix_subtracted, training_diffn_feature_matrix_subtracted])
input_data_testing_subtracted = np.concatenate([testing_same_feature_matrix_subtracted, testing_diffn_feature_matrix_subtracted])
input_data_validation_subtracted = np.concatenate([validation_same_feature_matrix_subtracted,validation_diffn_feature_matrix_subtracted])

In [117]:
# Appending the two img_id features to create input for same pairs

input_same_pairs = input_same_pairs[:,[0,1]]

input_appended_features_same_matrix = []
for i in range(0,791):
    input_appended_features = []
    for j in input_same_pairs[i]:
        for item in d[j]:
            temp = []
            temp = float(item)
            input_appended_features.append(temp)
    input_appended_features_same_matrix.append(input_appended_features)

# Appending the two img_id features to create input for diffn pairs

input_diffn_pairs = input_diffn_pairs[:,[0,1]]

input_appended_features_diffn_matrix = []
for i in range(0,791):
    input_appended_features = []
    for j in input_diffn_pairs[i]:
        for item in d[j]:
            temp = []
            temp = float(item)
            input_appended_features.append(temp)
    input_appended_features_diffn_matrix.append(input_appended_features)

In [118]:
# Partition into Training, validation and testing

# Input for same pair

training_same_feature_matrix = np.array(GenerateTrainingDataMatrix(input_appended_features_same_matrix, 80))
validation_same_feature_matrix = np.array(GenerateValidationDataMatrix(input_appended_features_same_matrix, 633))
testing_same_feature_matrix = np.array(GenerateValidationDataMatrix(input_appended_features_same_matrix,(633+79)))

# Input for diffn pair

training_diffn_feature_matrix = np.array(GenerateTrainingDataMatrix(input_appended_features_diffn_matrix, 80))
validation_diffn_feature_matrix = np.array(GenerateValidationDataMatrix(input_appended_features_diffn_matrix, 633))
testing_diffn_feature_matrix = np.array(GenerateValidationDataMatrix(input_appended_features_diffn_matrix,(633+79)))

In [119]:
# Final input using concatenation (Combine Same and different pairs input)

input_data_training = np.concatenate([training_same_feature_matrix, training_diffn_feature_matrix])
input_data_testing = np.concatenate([testing_same_feature_matrix, testing_diffn_feature_matrix])
input_data_validation = np.concatenate([validation_same_feature_matrix,validation_diffn_feature_matrix])

output_data_training = np.concatenate([training_target_same,training_target_diffn])
output_data_testing = np.concatenate([testing_target_same,testing_target_diffn])
output_data_validation = np.concatenate([validation_target_same,validation_target_diffn])

In [120]:
# Shuffling and partitioning data for Feature concatenated HOD

input_appended_features_shuffled, input_appended_features_target_shuffled = shuffleDataSet(input_appended_features_same_matrix, input_appended_features_diffn_matrix, input_appended_features_target_same, input_appended_features_target_diffn)

input_appended_features_shuffled_tr, input_appended_features_shuffled_val, input_appended_features_shuffled_test = partitionDataSet(input_appended_features_shuffled, 80)

input_appended_target_shuffled_tr, input_appended_target_shuffled_val, input_appended_target_shuffled_test = partitionDataSet(input_appended_features_target_shuffled,80)

In [121]:
# Shuffling and partitioning data for Feature subtracted HOD

input_subtracted_features_shuffled, input_subtracted_features_target_shuffled = shuffleDataSet(input_subtracted_features_same_matrix, input_subtracted_features_diffn_matrix, input_appended_features_target_same, input_appended_features_target_diffn)

input_subtracted_features_shuffled_tr, input_subtracted_features_shuffled_val, input_subtracted_features_shuffled_test = partitionDataSet(input_subtracted_features_shuffled,80)

input_subtracted_target_shuffled_tr, input_subtracted_target_shuffled_val, input_subtracted_target_shuffled_test = partitionDataSet(input_subtracted_features_target_shuffled,80)

# Linear Regression for Human Observed Features Data Set

In [122]:
print("--------------------------------------------------------------------")
print("------------ Linear Regression for Input Concatenated features----------")
LinearRegressionMethod(input_data_training,input_data_validation,input_data_testing,output_data_training, output_data_validation, output_data_testing)

--------------------------------------------------------------------
------------ Linear Regression for Input Concatenated features----------
Entered GD
E_rms Training   = 0.6099
E_rms Validation = 0.70453
E_rms Testing    = 0.70382
Training accuracy = 50.0
Validation accuracy = 50.0
Testing accuracy = 50.0


In [123]:
print("--------------------------------------------------------------------")
print("------------ Linear Regression for Input subtracted features--------")
LinearRegressionMethod(input_data_training_subtracted, input_data_validation_subtracted,input_data_testing_subtracted,output_data_training, output_data_validation, output_data_testing)

--------------------------------------------------------------------
------------ Linear Regression for Input subtracted features--------
Entered GD
E_rms Training   = 0.52303
E_rms Validation = 0.56265
E_rms Testing    = 0.60847
Training accuracy = 50.0
Validation accuracy = 50.0
Testing accuracy = 50.0


In [124]:
print("--------------------------------------------------------------------")
print("------------ Linear Regression for Input Concatenated features SHUFFLED DATA SET----------")

input_appended_target_shuffled_tr = input_appended_target_shuffled_tr.reshape(len(input_appended_target_shuffled_tr),)
LinearRegressionMethod(input_appended_features_shuffled_tr,input_appended_features_shuffled_val,input_appended_features_shuffled_test,input_appended_target_shuffled_tr, input_appended_target_shuffled_val, input_appended_target_shuffled_test)

--------------------------------------------------------------------
------------ Linear Regression for Input Concatenated features SHUFFLED DATA SET----------
Entered GD
E_rms Training   = 0.59501
E_rms Validation = 0.60971
E_rms Testing    = 0.5734
Training accuracy = 49.13112164296998
Validation accuracy = 52.53164556962025
Testing accuracy = 54.48717948717948


In [125]:
print("--------------------------------------------------------------------")
print("------------ Linear Regression for Input subtracted features SHUFFLED DATA SET--------")

input_subtracted_target_shuffled_tr = input_subtracted_target_shuffled_tr.reshape(len(input_subtracted_target_shuffled_tr),)
LinearRegressionMethod(input_subtracted_features_shuffled_tr, input_subtracted_features_shuffled_val, input_subtracted_features_shuffled_test,input_subtracted_target_shuffled_tr, input_subtracted_target_shuffled_val, input_subtracted_target_shuffled_test)

--------------------------------------------------------------------
------------ Linear Regression for Input subtracted features SHUFFLED DATA SET--------
Entered GD
E_rms Training   = 0.5346
E_rms Validation = 0.53069
E_rms Testing    = 0.53752
Training accuracy = 50.71090047393365
Validation accuracy = 47.46835443037975
Testing accuracy = 47.43589743589743


# Logistic Regression for Human Observed Features Data Set

In [126]:
print("-------------------Logistic Regression Implemented for concatenated Features for HOD SHUFFLED DATASET---------------")
input_appended_target_shuffled_tr = input_appended_target_shuffled_tr.reshape(len(input_appended_target_shuffled_tr),1)
LogisticRegressionOwnImpl(input_appended_features_shuffled_tr, input_appended_features_shuffled_val, 
                          input_appended_features_shuffled_test,input_appended_target_shuffled_tr, 
                          input_appended_target_shuffled_val, input_appended_target_shuffled_test)

-------------------Logistic Regression Implemented for concatenated Features for HOD SHUFFLED DATASET---------------


HBox(children=(IntProgress(value=0, max=1000), HTML(value='')))

E_rms Training   = [0.69272]
E_rms Validation = [0.6802]
E_rms Testing    = [0.67246]
Training accuracy = 49.13112164296998
Validation accuracy = 52.53164556962025
Testing accuracy = 54.14012738853503


In [127]:
print("-------------------Logistic Regression Implemented for subtracted Features for HOD SHUFFLED DATASET---------------")
input_subtracted_target_shuffled_tr = input_subtracted_target_shuffled_tr.reshape(len(input_subtracted_target_shuffled_tr),1)
LogisticRegressionOwnImpl(input_subtracted_features_shuffled_tr, input_subtracted_features_shuffled_val, 
                     input_subtracted_features_shuffled_test,input_subtracted_target_shuffled_tr, 
                     input_subtracted_target_shuffled_val, input_subtracted_target_shuffled_test)

-------------------Logistic Regression Implemented for subtracted Features for HOD SHUFFLED DATASET---------------


HBox(children=(IntProgress(value=0, max=1000), HTML(value='')))

E_rms Training   = [0.72744]
E_rms Validation = [0.76316]
E_rms Testing    = [0.75839]
Training accuracy = 50.71090047393365
Validation accuracy = 47.46835443037975
Testing accuracy = 47.13375796178344


In [128]:
print("-------------------Logistic Regression Implemented for concatenated Features for HOD using SKLEARN---------------")
LogisticMethod(input_data_training, input_data_validation, input_data_testing,output_data_training, output_data_validation, output_data_testing)

# LogisticRegressionGD(input_data_training, input_data_validation, input_data_testing,output_data_training, output_data_validation, output_data_testing)

-------------------Logistic Regression Implemented for concatenated Features for HOD using SKLEARN---------------




E_rms Training   = 0.27823
E_rms Validation = 0.13779
E_rms Testing    = 0.08006
Training Accuracy = 92.25908372827804
Validation Accuracy = 98.10126582278481
Testing Accuracy  = 99.35897435897436


In [129]:
print("--------------------------------------------------------------------")
print("------------ Logistic Regression for Input subtracted features for HOD using SKLEARN--------")
LogisticMethod(input_data_training_subtracted, input_data_validation_subtracted, input_data_testing_subtracted,output_data_training, output_data_validation, output_data_testing)

--------------------------------------------------------------------
------------ Logistic Regression for Input subtracted features for HOD using SKLEARN--------




E_rms Training   = 0.45665
E_rms Validation = 0.29767
E_rms Testing    = 0.37553
Training Accuracy = 79.14691943127961
Validation Accuracy = 91.13924050632912
Testing Accuracy  = 85.8974358974359


# Neural Networks for Human Observed Features Data set

In [140]:
print("------------Neural Networks for HOF input concatenated features SHUFFLED DATA SET------------------------")

training_accuracy, predictedTestLabel = NeuralNetworkMethod(18, input_appended_features_shuffled_tr,
                        input_appended_features_shuffled_test,input_appended_target_shuffled_tr,
                        input_appended_target_shuffled_test)

------------Neural Networks for HOF input concatenated features SHUFFLED DATA SET------------------------


HBox(children=(IntProgress(value=0, max=1500), HTML(value='')))

Errors: 72  Correct :85
Testing Accuracy: 54.14012738853503


In [141]:
print("------------Neural Networks for HOF input subtracted features SHUFFLED DATA SET------------------------")

training_accuracy, predictedTestLabel = NeuralNetworkMethod(9, input_subtracted_features_shuffled_tr, 
                                                            input_subtracted_features_shuffled_test,
                                                            input_subtracted_target_shuffled_tr, 
                                                            input_subtracted_target_shuffled_test)

------------Neural Networks for HOF input subtracted features SHUFFLED DATA SET------------------------


HBox(children=(IntProgress(value=0, max=1500), HTML(value='')))

Errors: 78  Correct :79
Testing Accuracy: 50.318471337579616


# GSC Data Set Pre-processing

In [11]:
GSC_file_path_input_same_pairs = r'GSC_same_pairs.csv'
GSC_file_path_input_diffn_pairs = r'GSC_diffn_pairs.csv'
GSC_file_path_GSC_features = r'GSC-Features.csv'

# Raw data 
GSC_input_same_pairs, GSC_input_diffn_pairs, GSC_features_extract = import_data(GSC_file_path_input_diffn_pairs, GSC_file_path_input_same_pairs, GSC_file_path_GSC_features)

# Deleting headings from raw data
GSC_input_same_pairs = np.delete(GSC_input_same_pairs,0, 0)
GSC_input_diffn_pairs = np.delete(GSC_input_diffn_pairs, 0, 0)

  
  


In [12]:
# Creating dictionary for img_id and corresponding features
GSC_d = {}

for i in range(1, 14073):
    list_features = []
    for j in GSC_features_extract[i]:
        list_features.append(j)
    GSC_d[list_features[0]]  = list(map(float, list_features[1:]))

In [13]:
# Appending the two img_id features to create input for same pairs

GSC_input_same_pairs = GSC_input_same_pairs[:,[0,1]]

GSC_input_appended_features_same_matrix = []
for i in range(0,71531):
    GSC_input_appended_features = []
    for j in GSC_input_same_pairs[i]:
        for item in GSC_d[j]:
            temp = []
            temp = float(item)
            GSC_input_appended_features.append(temp)
    GSC_input_appended_features_same_matrix.append(GSC_input_appended_features)

# Appending the two img_id features to create input for diffn pairs

GSC_input_diffn_pairs = GSC_input_diffn_pairs[:,[0,1]]

GSC_input_appended_features_diffn_matrix = []
for i in range(0,71531):
    GSC_input_appended_features = []
    for j in GSC_input_diffn_pairs[i]:
        for item in GSC_d[j]:
            temp = []
            temp = float(item)
            GSC_input_appended_features.append(temp)
    GSC_input_appended_features_diffn_matrix.append(GSC_input_appended_features)

In [14]:
# Target for same pair

GSC_input_appended_features_target_same = np.full((71531,), 1, dtype=float)
GSC_training_target_same = GenerateTrainingTarget(GSC_input_appended_features_target_same, 30)
GSC_validation_target_same = GenerateTestingTarget(GSC_input_appended_features_target_same,len(GSC_training_target_same))
GSC_testing_target_same = GenerateTestingTarget(GSC_input_appended_features_target_same, (len(GSC_training_target_same)+len(GSC_validation_target_same)))

# Target for diffn pair
GSC_input_appended_features_target_diffn = np.full((71531,), 0, dtype=float)

GSC_training_target_diffn = np.array(GenerateTrainingTarget(GSC_input_appended_features_target_diffn, 30))
GSC_validation_target_diffn = np.array(GenerateTestingTarget(GSC_input_appended_features_target_diffn,len(GSC_training_target_diffn)))
GSC_testing_target_diffn = np.array(GenerateTestingTarget(GSC_input_appended_features_target_diffn, (len(GSC_training_target_diffn)+len(GSC_validation_target_diffn))))

In [15]:
# Partition into Training, validation and testing

# Input for same pair

GSC_training_same_feature_matrix = np.array(GenerateTrainingDataMatrix(GSC_input_appended_features_same_matrix, 30))
GSC_validation_same_feature_matrix = np.array(GenerateValidationDataMatrix(GSC_input_appended_features_same_matrix, len(GSC_training_same_feature_matrix)))
GSC_testing_same_feature_matrix = np.array(GenerateValidationDataMatrix(GSC_input_appended_features_same_matrix,(len(GSC_training_same_feature_matrix)+len(GSC_validation_same_feature_matrix))))

# Input for diffn pair

GSC_training_diffn_feature_matrix = np.array(GenerateTrainingDataMatrix(GSC_input_appended_features_diffn_matrix, 30))
GSC_validation_diffn_feature_matrix = np.array(GenerateValidationDataMatrix(GSC_input_appended_features_diffn_matrix, len(GSC_training_diffn_feature_matrix)))
GSC_testing_diffn_feature_matrix = np.array(GenerateValidationDataMatrix(GSC_input_appended_features_diffn_matrix,(len(GSC_training_diffn_feature_matrix)+len(GSC_validation_same_feature_matrix))))

In [16]:
# Final input and target using concatenation (Combine Same and different pairs input)

GSC_input_data_training = np.concatenate([GSC_training_same_feature_matrix, GSC_training_diffn_feature_matrix])
GSC_input_data_testing = np.concatenate([GSC_testing_same_feature_matrix, GSC_testing_diffn_feature_matrix])
GSC_input_data_validation = np.concatenate([GSC_validation_same_feature_matrix,GSC_validation_diffn_feature_matrix])

GSC_output_data_training = np.concatenate([GSC_training_target_same,GSC_training_target_diffn])
GSC_output_data_testing = np.concatenate([GSC_testing_target_same,GSC_testing_target_diffn])
GSC_output_data_validation = np.concatenate([GSC_validation_target_same,GSC_validation_target_diffn])

In [17]:
# Subtracting the two img_id features to create input for same pairs

GSC_input_subtracted_features_same_matrix = []
for i in range(0,71531):
    img_id1_features = []
    input_subtracted_features = []
    for j in GSC_input_same_pairs[i]:
        img_id1_features.append(GSC_d[j])
    input_subtracted_features = [abs(a_i - b_i) for a_i, b_i in zip(img_id1_features[0], img_id1_features[1])]
    GSC_input_subtracted_features_same_matrix.append(input_subtracted_features)

GSC_input_subtracted_features_diffn_matrix = []
for i in range(0,71531):
    img_id1_features = []
    input_subtracted_features = []
    for j in GSC_input_diffn_pairs[i]:
        img_id1_features.append(GSC_d[j])
    input_subtracted_features = [abs(a_i - b_i) for a_i, b_i in zip(img_id1_features[0], img_id1_features[1])]
    GSC_input_subtracted_features_diffn_matrix.append(input_subtracted_features)

In [18]:
# Partition into Training, validation and testing

# Input for same pair

GSC_training_same_feature_matrix_subtracted = np.array(GenerateTrainingDataMatrix(GSC_input_subtracted_features_same_matrix, 30))
GSC_validation_same_feature_matrix_subtracted = np.array(GenerateValidationDataMatrix(GSC_input_subtracted_features_same_matrix, len(GSC_training_same_feature_matrix)))
GSC_testing_same_feature_matrix_subtracted = np.array(GenerateValidationDataMatrix(GSC_input_subtracted_features_same_matrix,(len(GSC_training_same_feature_matrix)+len(GSC_validation_same_feature_matrix))))

# Input for diffn pair

GSC_training_diffn_feature_matrix_subtracted = np.array(GenerateTrainingDataMatrix(GSC_input_subtracted_features_diffn_matrix, 30))
GSC_validation_diffn_feature_matrix_subtracted = np.array(GenerateValidationDataMatrix(GSC_input_subtracted_features_diffn_matrix, len(GSC_training_diffn_feature_matrix)))
GSC_testing_diffn_feature_matrix_subtracted = np.array(GenerateValidationDataMatrix(GSC_input_subtracted_features_diffn_matrix,(len(GSC_training_diffn_feature_matrix)+len(GSC_validation_diffn_feature_matrix))))

# Final input using subtraction (Combine Same and different pairs input)

GSC_input_data_training_subtracted = np.concatenate([GSC_training_same_feature_matrix_subtracted, GSC_training_diffn_feature_matrix_subtracted])
GSC_input_data_testing_subtracted = np.concatenate([GSC_testing_same_feature_matrix_subtracted, GSC_testing_diffn_feature_matrix_subtracted])
GSC_input_data_validation_subtracted = np.concatenate([GSC_validation_same_feature_matrix_subtracted,GSC_validation_diffn_feature_matrix_subtracted])

In [21]:
# Shuffling and partitioning data for Feature subtracted GSC

GSC_input_subtracted_features_shuffled, GSC_input_subtracted_features_target_shuffled = shuffleDataSet(GSC_input_subtracted_features_same_matrix, GSC_input_subtracted_features_diffn_matrix, GSC_input_appended_features_target_same, GSC_input_appended_features_target_diffn)

GSC_input_subtracted_features_shuffled_tr, GSC_input_subtracted_features_shuffled_val, GSC_input_subtracted_features_shuffled_test = partitionDataSet(GSC_input_subtracted_features_shuffled,30)

GSC_input_subtracted_target_shuffled_tr, GSC_input_subtracted_target_shuffled_val, GSC_input_subtracted_target_shuffled_test = partitionDataSet(GSC_input_subtracted_features_target_shuffled,30)

In [22]:
# Shuffling and partitioning data for Feature concatenate GSC

GSC_input_appended_features_shuffled, GSC_input_appended_features_target_shuffled = shuffleDataSet(GSC_input_appended_features_same_matrix, GSC_input_appended_features_diffn_matrix, GSC_input_appended_features_target_same, GSC_input_appended_features_target_diffn)

GSC_input_appended_features_shuffled_tr, GSC_input_appended_features_shuffled_val, GSC_input_appended_features_shuffled_test = partitionDataSet(GSC_input_appended_features_shuffled,30)

GSC_input_appended_target_shuffled_tr, GSC_input_appended_target_shuffled_val, GSC_input_appended_target_shuffled_test = partitionDataSet(GSC_input_appended_features_target_shuffled,30)

# Linear Regression for GSC Data Set

In [152]:
def LinearRegressionMethodforGSC(features, GSC_input_data_training, GSC_input_data_validation, GSC_input_data_testing,GSC_output_data_training, GSC_output_data_validation, GSC_output_data_testing):
    # Linear Regression for Input Appended GSC dataset

    kmeans = KMeans(n_clusters=10, random_state=0).fit(GSC_input_data_training)
    GSC_Mu = kmeans.cluster_centers_
    GSC_BigSigma     = GenerateBigSigma(GSC_input_data_training, GSC_Mu, True)
    GSC_BigSigma = GSC_BigSigma + np.identity(features)
    GSC_TRAINING_PHI = GetPhiMatrix(GSC_input_data_training, GSC_Mu, GSC_BigSigma, 42919)
    GSC_VAL_PHI      = GetPhiMatrix(GSC_input_data_validation, GSC_Mu, GSC_BigSigma, 14306)
    GSC_TEST_PHI     = GetPhiMatrix(GSC_input_data_testing, GSC_Mu, GSC_BigSigma, 14306) 
    GSC_W            = GetWeightsClosedForm(GSC_TRAINING_PHI,GSC_output_data_training,0.03)
    GradientDescent(GSC_W, GSC_TRAINING_PHI, GSC_VAL_PHI, GSC_TEST_PHI, GSC_output_data_training, GSC_output_data_validation, GSC_output_data_testing)    

In [153]:
print("--------------------------------------------------------------------")
print("------------ Linear Regression for Input appended features GSC SHUFFLED DATA SET--------")

GSC_input_appended_target_shuffled_tr = GSC_input_appended_target_shuffled_tr.reshape(len(GSC_input_appended_target_shuffled_tr),)
LinearRegressionMethodforGSC(1024, GSC_input_appended_features_shuffled_tr, GSC_input_appended_features_shuffled_val, GSC_input_appended_features_shuffled_test,GSC_input_appended_target_shuffled_tr, GSC_input_appended_target_shuffled_val, GSC_input_appended_target_shuffled_test)

--------------------------------------------------------------------
------------ Linear Regression for Input appended features GSC SHUFFLED DATA SET--------
Entered GD
E_rms Training   = 0.70799
E_rms Validation = 0.70765
E_rms Testing    = 0.70706
Training accuracy = 49.8753465830984
Validation accuracy = 49.92310918495736
Testing accuracy = 50.00699007409478


In [75]:
print("--------------------------------------------------------------------")
print("------------ Linear Regression for Input subtracted features SHUFFLED DATA SET--------")

GSC_input_subtracted_target_shuffled_tr = GSC_input_subtracted_target_shuffled_tr.reshape(len(GSC_input_subtracted_target_shuffled_tr),)
LinearRegressionMethodforGSC(512, GSC_input_subtracted_features_shuffled_tr, GSC_input_subtracted_features_shuffled_val, GSC_input_subtracted_features_shuffled_test,GSC_input_subtracted_target_shuffled_tr, GSC_input_subtracted_target_shuffled_val, GSC_input_subtracted_target_shuffled_test)

--------------------------------------------------------------------
------------ Linear Regression for Input subtracted features SHUFFLED DATA SET--------
(10, 512)
(512, 512)
(42919, 10)
(14306, 10)
(14306, 10)
(10,)
Entered GD
E_rms Training   = 0.7085
E_rms Validation = 0.70528
E_rms Testing    = 0.70676
Training accuracy = 49.803117500407744
Validation accuracy = 50.25863274150706
Testing accuracy = 50.04893051866349


# Logistic Regression for GSC Data set

In [76]:
print("--------------------------------------------------------------------")
print("------------ Logistic Regression for Input Appended features--------")
LogisticMethod(GSC_input_data_training, GSC_input_data_validation, GSC_input_data_testing, GSC_output_data_training, GSC_output_data_validation, GSC_output_data_testing)

--------------------------------------------------------------------
------------ Logistic Regression for Input Appended features--------




E_rms Training   = 0.09056
E_rms Validation = 0.7113
E_rms Testing    = 0.71301
Training Accuracy = 99.17986952469711
Validation Accuracy = 49.40584370194324
Testing Accuracy  = 49.16119110862575


In [77]:
print("--------------------------------------------------------------------")
print("------------ Logistic Regression for Input Subtracted features--------")
LogisticMethod(GSC_input_data_training_subtracted, GSC_input_data_validation_subtracted, GSC_input_data_testing_subtracted, GSC_output_data_training, GSC_output_data_validation, GSC_output_data_testing)

--------------------------------------------------------------------
------------ Logistic Regression for Input Subtracted features--------




E_rms Training   = 0.31751
E_rms Validation = 0.5111
E_rms Testing    = 0.51157
Training Accuracy = 89.91845293569432
Validation Accuracy = 73.87809310778695
Testing Accuracy  = 73.82916258912344


In [38]:
def LogisticRegressionOwnImplGSC(input_data_training, input_data_validation, input_data_testing, output_data_training, output_data_validation, output_data_testing):
    
    NUM_OF_EPOCHS = 500
    L_Erms_Val   = []
    L_Erms_TR    = []
    L_Erms_Test  = []
    LR = 0.000001
    W = np.random.rand(len(np.transpose(input_data_training)), 1) 

    for epoch in tqdm_notebook(range(NUM_OF_EPOCHS)):
        
        A = GetLogisticTarget(W, input_data_training)

        Z = np.subtract(A,output_data_training)
        DELTA_W = np.dot(np.transpose(input_data_training), Z)
        W = W - np.dot(LR, DELTA_W)


    #-----------------TrainingData Accuracy---------------------#
    TR_TEST_OUT   = np.dot(input_data_training,W) 
    Erms_TR       = GetErms(TR_TEST_OUT,output_data_training)
    L_Erms_TR.append(float(Erms_TR.split(',')[1]))
    L_Accuracy_TR = float(Erms_TR.split(',')[0])

    #-----------------ValidationData Accuracy---------------------#
    VAL_TEST_OUT  = np.dot(input_data_validation,W) 
    Erms_Val      = GetErms(VAL_TEST_OUT,output_data_validation)
    L_Erms_Val.append(float(Erms_Val.split(',')[1]))
    L_Accuracy_Val = float(Erms_Val.split(',')[0])

    #-----------------TestingData Accuracy---------------------#
    TEST_OUT      = np.dot(input_data_testing,W) 
    Erms_Test = GetErms(TEST_OUT,output_data_testing)
    L_Erms_Test.append(float(Erms_Test.split(',')[1]))
    L_Accuracy_Test = float(Erms_Test.split(',')[0])
    
    print ("E_rms Training   = " + str(np.around((L_Erms_TR),5)))
    print ("E_rms Validation = " + str(np.around((L_Erms_Val),5)))
    print ("E_rms Testing    = " + str(np.around((L_Erms_Test),5)))
    print("Training accuracy = " + str(L_Accuracy_TR))    
    print("Validation accuracy = " + str(L_Accuracy_Val))    
    print("Testing accuracy = " + str(L_Accuracy_Test))

In [36]:
print("-------------------Logistic Regression Implemented for concatenated Features for GSC SHUFFLED DATASET---------------")
GSC_input_appended_target_shuffled_tr = GSC_input_appended_target_shuffled_tr.reshape(len(GSC_input_appended_target_shuffled_tr),1)
LogisticRegressionOwnImplGSC(GSC_input_appended_features_shuffled_tr, GSC_input_appended_features_shuffled_val, 
                          GSC_input_appended_features_shuffled_test,GSC_input_appended_target_shuffled_tr, 
                          GSC_input_appended_target_shuffled_val, GSC_input_appended_target_shuffled_test)

-------------------Logistic Regression Implemented for concatenated Features for GSC SHUFFLED DATASET---------------


HBox(children=(IntProgress(value=0, max=1000), HTML(value='')))


E_rms Training   = [2.04723]
E_rms Validation = [2.08091]
E_rms Testing    = [2.06709]
Training accuracy = 19.695239870453644
Validation accuracy = 19.802879910527054
Testing accuracy = 19.537257094925206


In [39]:
print("-------------------Logistic Regression Implemented for subtracted Features for GSC SHUFFLED DATASET---------------")
GSC_input_subtracted_target_shuffled_tr = GSC_input_subtracted_target_shuffled_tr.reshape(len(GSC_input_subtracted_target_shuffled_tr),1)
LogisticRegressionOwnImplGSC(GSC_input_subtracted_features_shuffled_tr, GSC_input_subtracted_features_shuffled_val, 
                     GSC_input_subtracted_features_shuffled_test,GSC_input_subtracted_target_shuffled_tr, 
                     GSC_input_subtracted_target_shuffled_val, GSC_input_subtracted_target_shuffled_test)

-------------------Logistic Regression Implemented for subtracted Features for GSC SHUFFLED DATASET---------------


HBox(children=(IntProgress(value=0, max=500), HTML(value='')))


E_rms Training   = [2.00995]
E_rms Validation = [2.03787]
E_rms Testing    = [2.02233]
Training accuracy = 19.94221673384748
Validation accuracy = 19.718999021389628
Testing accuracy = 19.739969243673983


# Neural Networks for GSC Data set

In [138]:
print("------------Neural Networks for GSC input concatenated features SHUFFLED DATA SET------------------------")

training_accuracy, predictedTestLabel = NeuralNetworkMethodGSC(1024, GSC_input_appended_features_shuffled_tr,
                        GSC_input_appended_features_shuffled_test,GSC_input_appended_target_shuffled_tr,
                        GSC_input_appended_target_shuffled_test)

------------Neural Networks for GSC input concatenated features SHUFFLED DATA SET------------------------


HBox(children=(IntProgress(value=0, max=50), HTML(value='')))

Errors: 7145  Correct :7161
Testing Accuracy: 50.055920592758284


In [137]:
print("------------Neural Networks for GSC input subtracted features SHUFFLED DATA SET------------------------")

training_accuracy, predictedTestLabel = NeuralNetworkMethodGSC(512, GSC_input_subtracted_features_shuffled_tr, 
                                                            GSC_input_subtracted_features_shuffled_test,
                                                            GSC_input_subtracted_target_shuffled_tr, 
                                                            GSC_input_subtracted_target_shuffled_test)

------------Neural Networks for GSC input subtracted features SHUFFLED DATA SET------------------------


HBox(children=(IntProgress(value=0, max=50), HTML(value='')))

Errors: 7171  Correct :7135
Testing Accuracy: 49.87417866629386
