In [105]:
from sklearn.cluster import KMeans
import numpy as np
import pandas as pd
import math
import os

maxAcc = 0.0
maxIter = 0
C_Lambda = 0.03
TrainingPercent = 80
ValidationPercent = 10
TestPercent = 10
M = 10
PHI = []
IsSynthetic = False

# Create Pair Data by Concatenating Features

In [93]:
def create_pair_data_concat(df, df_pair):
    df.rename(columns={'img_id': 'img_id_A'}, inplace=True)
    df_new = pd.merge(df_pair, df, on="img_id_A")
    
    df.rename(columns={'img_id_A': 'img_id_B'}, inplace=True)
    df_new = pd.merge(df_new, df, on="img_id_B")
    df.rename(columns={'img_id_B': 'img_id'}, inplace=True)
    return df_new  

In [94]:
def create_pair_data_subtract(df, df_final):
    df_target = df_final['target']
    df.rename(columns={'img_id': 'img_id_A'}, inplace=True)
    df1 = pd.merge(df_final, df, on="img_id_A")
    df1.drop(['img_id_A','img_id_B','target'], axis=1, inplace=True)
    df.rename(columns={'img_id_A': 'img_id_B'}, inplace=True)
    df2 = pd.merge(df_final, df, on="img_id_B")
    df.rename(columns={'img_id_B': 'img_id'}, inplace=True)
    df2.drop(['img_id_A','img_id_B','target' ], axis=1, inplace=True)
    df_sub = df1.sub(df2)
    df_final = np.absolute(df_sub)
    return df_final, df_target

In [95]:
def GenerateTrainingTarget(rawTraining,TrainingPercent = 80):
    TrainingLen = int(math.ceil(len(rawTraining)*(TrainingPercent*0.01)))
    t           = rawTraining[:TrainingLen]
    return t

def GenerateTrainingDataMatrix(rawData, TrainingPercent = 80):
    T_len = int(math.ceil(len(rawData[0])*0.01*TrainingPercent))
    d2 = rawData[:,0:T_len]
    return d2

def GenerateValData(rawData, ValPercent, TrainingCount): 
    valSize = int(math.ceil(len(rawData[0])*ValPercent*0.01))
    V_End = TrainingCount + valSize
    dataMatrix = rawData[:,TrainingCount+1:V_End]
    return dataMatrix

def GenerateValTargetVector(rawData, ValPercent, TrainingCount): 
    valSize = int(math.ceil(len(rawData)*ValPercent*0.01))
    V_End = TrainingCount + valSize
    t =rawData[TrainingCount+1:V_End]
    return t

def GetErms(VAL_TEST_OUT,ValDataAct):
    sum = 0.0
    t=0
    accuracy = 0.0
    counter = 0
    val = 0.0
    for i in range (0,len(VAL_TEST_OUT)):
        sum = sum + math.pow((ValDataAct[i] - VAL_TEST_OUT[i]),2)
        if(int(np.around(VAL_TEST_OUT[i], 0)) == ValDataAct[i]):
            counter+=1
    accuracy = (float((counter*100))/float(len(VAL_TEST_OUT)))
    return (str(accuracy) + ',' +  str(math.sqrt(sum/len(VAL_TEST_OUT))))

In [96]:
def generate_raw_data(df, df_diff, df_same, feature_operation, dataType):
    if feature_operation == "Concat":
        df_diff = df_diff.sample(n=df_same.shape[0])
        df_final = pd.concat([df_same, df_diff]).sample(frac=1).reset_index(drop=True)
        df_final = create_pair_data_concat(df, df_final)
        df_target = df_final['target']
        df_final.drop(['img_id_A','img_id_B','target' ], axis=1, inplace=True)
    if feature_operation == "Subtract":
        df_diff = df_diff.sample(n=df_same.shape[0])
        df_final = pd.concat([df_same, df_diff]).sample(frac=1).reset_index(drop=True)
        df_final, df_target = create_pair_data_subtract(df, df_final)
    uniques = df_final.apply(lambda x: x.nunique())
    df_final = df_final.drop(uniques[uniques==1].index, axis=1)
    return df_final, df_target   

In [97]:
def sigmoid_func(W, X):
    return 1.0/(1 + np.exp(-np.dot(X, np.transpose(W))))

In [106]:
# ------------- Implementation for Stochastic Gradient Descent --------------- #
def generate_ERMS(TrainingTarget, TrainingData, ValData, TestData, ValDataAct, TestDataAct, dataType):
    if dataType == "GSC":
        range_loop = 200
    else:
        range_loop = TrainingTarget.shape[0]
    W_Now = np.ones(TrainingData.shape[0])
    La           = 2
    learningRate = 0.01
    L_Erms_Val   = []
    L_Erms_TR    = []
    L_Erms_Test  = []
    W_Mat        = []
    
    for i in range(0,range_loop):
        print(i)
        step1 = sigmoid_func(W_Now, np.transpose(TrainingData))
        step2 = np.subtract(step1, TrainingTarget)
        Delta_E_D =  np.dot(TrainingData, step2)/TrainingTarget.shape[0]
        La_Delta_E_W  = np.dot(La,W_Now)
        Delta_E       = np.add(Delta_E_D,La_Delta_E_W) 
        Delta_W       = -(learningRate * Delta_E)
        W_T_Next      = W_Now + Delta_W
        W_Now         = W_T_Next

        #-----------------TrainingData Accuracy---------------------#
        TR_TEST_OUT   = sigmoid_func(W_T_Next,np.transpose(TrainingData)) 
        Erms_TR       = GetErms(TR_TEST_OUT,TrainingTarget)
        L_Erms_TR.append(float(Erms_TR.split(',')[0]))

        #-----------------ValidationData Accuracy---------------------#
        VAL_TEST_OUT  = sigmoid_func(W_T_Next,np.transpose(ValData)) 
        Erms_Val      = GetErms(VAL_TEST_OUT,ValDataAct)
        L_Erms_Val.append(float(Erms_Val.split(',')[0]))

        #-----------------TestingData Accuracy---------------------#
        TEST_OUT      = sigmoid_func(W_T_Next,np.transpose(TestData)) 
        Erms_Test = GetErms(TEST_OUT,TestDataAct)
        L_Erms_Test.append(float(Erms_Test.split(',')[0]))
        
    print ('----------Gradient Descent Solution--------------------')
    print ("Accuracy Training   = " + str(np.around(max(L_Erms_TR),5)))
    print ("Accuracy Validation = " + str(np.around(max(L_Erms_Val),5)))
    print ("Accuracy Testing    = " + str(np.around(max(L_Erms_Test),5)))  
    

In [107]:
def train_model(RawData, RawTarget, dataType):
    TrainingTarget = np.array(GenerateTrainingTarget(RawTarget,TrainingPercent))
    TrainingData   = GenerateTrainingDataMatrix(RawData,TrainingPercent)
    
    ValDataAct = np.array(GenerateValTargetVector(RawTarget,ValidationPercent, (len(TrainingTarget))))
    ValData    = GenerateValData(RawData,ValidationPercent, (len(TrainingTarget)))
    
    TestDataAct = np.array(GenerateValTargetVector(RawTarget,TestPercent, (len(TrainingTarget)+len(ValDataAct))))
    TestData = GenerateValData(RawData,TestPercent, (len(TrainingTarget)+len(ValDataAct)))
    
    generate_ERMS(TrainingTarget, TrainingData, ValData, TestData, ValDataAct, TestDataAct, dataType)

In [108]:
df_hof = pd.read_csv("HOF/HumanObserved-Features-Data.csv",index_col=0)
df_diff_hof = pd.read_csv("HOF/diffn_pairs.csv")
df_same_hof = pd.read_csv("HOF/same_pairs.csv")

# print("-----------------------HOF : Concat--------------------")
# df_final, df_target = generate_raw_data(df_hof, df_diff_hof, df_same_hof, "Concat", "HOF")
# RawData = np.transpose(df_final.as_matrix())
# RawTarget = df_target.as_matrix()
# train_model(RawData, RawTarget, "HOF")

# print("-----------------------HOF : Subtract--------------------")
# df_final, df_target = generate_raw_data(df_hof, df_diff_hof, df_same_hof, "Subtract", "HOF")
# RawData = np.transpose(df_final.as_matrix())
# RawTarget = df_target.as_matrix()
# train_model(RawData, RawTarget,"HOF")

df_gsc = pd.read_csv("GSC/GSC-Features.csv")
df_diff_gsc = pd.read_csv("GSC/diffn_pairs.csv")
df_same_gsc = pd.read_csv("GSC/same_pairs.csv")

print("-----------------------GSC : Concat--------------------")
df_final, df_target = generate_raw_data(df_gsc, df_diff_gsc, df_same_gsc, "Concat", "GSC")
RawData = np.transpose(df_final.values)
RawTarget = df_target.values

print(RawData.shape)
train_model(RawData, RawTarget, "GSC")



-----------------------GSC : Concat--------------------
(1017, 143062)
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74


KeyboardInterrupt: 

In [None]:
print("-----------------------GSC : Subtract--------------------")
df_final, df_target = generate_raw_data(df_gsc, df_diff_gsc, df_same_gsc, "Concat", "GSC")
RawData = np.transpose(df_final.as_matrix())
RawTarget = df_target.as_matrix()
train_model(RawData, RawTarget, "GSC")