In [1]:
import numpy as np
from scipy.stats import pearsonr
import heapq
from heapq import heappush, heappop, heappushpop
from sklearn.preprocessing import StandardScaler
from sklearn.svm import LinearSVC
import itertools
import pickle

In [2]:
all_data = np.load("all_data.npy") #holds all the data from channels
category_info = np.load("words_in_categories.npy") #category_info[cat][ptr] returns the number of the word(0...62) of the ptr'th word in the category cat
lengths = np.load("category_lengths.npy") #lengths[cat] is the number of words in category cat

In [3]:
total_words = 63 

tStart = 0 #start time
tEnd = 650 #end time
tWidth = 100 #width of time slice
tIncr = 50 #increment in start time
tEx = 10 #number of examples to downsample to
tNtoAvg = int(tWidth/tEx) #number of timestep values to average to form one example

training_amt = 8 #8 examples for training, 2 for testing
testing_amt = 10 - training_amt

np.random.seed(63)

In [4]:
TrainingData = np.zeros((total_words,5,training_amt,256,650))#gives the pertinent data from all_data for the two categories
TestingData = np.zeros( (total_words,5,testing_amt,256,650)) #^
wordptr = -1 #the index of the current word, iterates from 0...total_words

for i in range(63):
    wordptr+=1

    excl = [-1]*10 #excl[j] = the j'th presentation number which should be saved for testing (e.g. excl[0] = 0 means the first presentation of the wordptr'th word should be saved for testing). Ignore -1's.
    
    for pres in range(testing_amt):
        while(1): #this loop repeatedly generates a random presentation until one which hasn't been reserved for testing has been found, and then breaks it
            nxtrand = np.random.randint(0,10)
            if(excl[nxtrand]==-1):
                excl[nxtrand]=nxtrand
                break
    for bandnum in range(5):
        ptr2 = 0 #points to which presentation(0...9) of wordptr'th word we are currently copying to TrainingData
        for pres in range(10):
            if(excl[pres]!=-1): #if reserved for testing, don't include in training data
                continue
           
            TrainingData[wordptr][bandnum][ptr2]=all_data[bandnum][i][pres] #sets the channel x time matrix for TrainingData[bandnum][wordptr][ptr2]
            ptr2+=1 #move to next presentation

    for bandnum in range(5): #this loop is same as above, except now we only want the testing presentations
        ptr2=0
        for pres in range(10):
            if(excl[pres]==-1):
                continue
            TestingData[wordptr][bandnum][ptr2] = all_data[bandnum][i][excl[pres]]
            ptr2+=1

In [5]:
toSelect = 5 #number of top features to select

train_feature_vectors = np.zeros((total_words, training_amt,toSelect * tEx))
test_feature_vectors = np.zeros((total_words, testing_amt, toSelect * tEx))
timeSequences = np.zeros((total_words,5,12,training_amt,256,tEx))

In [6]:
time_pointer = 0
for t in range(tStart, tEnd-tWidth+1, tIncr):
    tEx_pointer = 0
    for tEStart in range(t,t+tWidth-tEx+1,tNtoAvg):
        timeSequences[:,:,time_pointer,:,:,tEx_pointer] = np.average(TrainingData[:,:,:,:,tEStart:tEStart+tNtoAvg], axis = 4)
        tEx_pointer+=1
    time_pointer+=1

btcwpv_matrix = np.transpose(timeSequences, (1, 2, 4, 0, 3, 5)) #band,time,channel,word,pres,value matrix in that order
print(btcwpv_matrix.shape)

(5, 12, 256, 63, 8, 10)


In [12]:
import numpy as np
from sklearn.metrics import mean_squared_error
from math import sqrt

index_to_cat = ["Tools","Animals","Buildings","Body Parts","Furniture","Vehicles","Kitchen Utensils", 
"Building Parts", "Clothing", "Insects", "Vegetables","Man-made objects"]

tools = 0
animals = 1

def word_averaged(word, btcwpv_matrix, _b, _t, _c):
    word_averaged = np.zeros((10)) 
    for pres_counter in range(8):
        word_averaged += btcwpv_matrix[_b, _t, _c, word, pres_counter]
    return word_averaged

            
for tool in category_info[tools]:
    if tool!=-1:
        
        BEST = [0]*4
        
        for _b in range(5):
            for _t in range(12):
                for _c in range(256):
                    
                    averaged_tool = word_averaged(tool, btcwpv_matrix, _b, _t, _c)

                    rmse = 0
                    for pres in range(8):
                        #calculate rmse
                        #rmse += sqrt(mean_squared_error(averaged_tool, btcwpv_matrix[_b,_t,_c,tool,pres]))
                        rmse += np.linalg.norm(btcwpv_matrix[_b, _t, _c, tool, pres]-averaged_tool)
                    rmse/=8
                    dist = 0
                    c1 = 0
                    for animal in category_info[animals]:
                        if animal!=-1:
                            tmpdis = 0
                            averaged_animal = word_averaged(animal, btcwpv_matrix, _b, _t, _c)
                            tmpdis = np.linalg.norm(averaged_tool-averaged_animal) #* np.linalg.norm(averaged_tool-averaged_animal)
                            
                            averaged_animal = word_averaged(animal,btcwpv_matrix,_b,_t,_c)
                            rmseanimal = 0
                            for pres in range(8):
                                rmseanimal += np.linalg.norm(btcwpv_matrix[_b,_t,_c,animal,pres]-averaged_animal)**2
                            rmseanimal/=8
                            dist += (max(0,tmpdis-rmseanimal))**2
                            
                            c1+=1
                    dist/=c1
        
                    MEASURE = dist/rmse
                    
                    if (MEASURE > BEST[0]):
                        BEST[0] = dist/rmse
                        BEST[1] = _b
                        BEST[2] = _t
                        BEST[3] = _c
        print("\n")
        print("BTC for " + str(tool) + ": " + str(BEST))
        averaged_tool = word_averaged(tool, btcwpv_matrix, BEST[1], BEST[2], BEST[3])
        for tool2 in category_info[tools]:
            if(tool2==-1):
                continue
            averaged_tool2 = word_averaged(tool2, btcwpv_matrix, BEST[1], BEST[2], BEST[3])
            
            rmse = 0
            dist = np.linalg.norm(averaged_tool2 - averaged_tool)
            for pres in range(8):
                rmse += np.linalg.norm(btcwpv_matrix[BEST[1],BEST[2],BEST[3], tool2, pres] - averaged_tool2)
            rmse/=8
            print("For tool " + str(tool2) + " rmse is : " + str(rmse))
            print("For tool " + str(tool2) + " distance is : " + str(dist))
        
        for animal2 in category_info[animals]:
            if(animal2==-1):
                continue
            averaged_animal = word_averaged(animal2, btcwpv_matrix, BEST[1], BEST[2], BEST[3])
            dist = np.linalg.norm(averaged_animal - averaged_tool)
            rmse = 0
            for pres in range(8):
                rmse += np.linalg.norm(btcwpv_matrix[BEST[1],BEST[2],BEST[3], animal2, pres] - averaged_animal)
            rmse/=8
            print("For animal " + str(animal2) + " rmse is : " + str(rmse))
            print("For animal " + str(animal2) + " distance is : " + str(dist))
        
        '''
        print("\t", "RMSE FROM OTHER TOOL PRESENTATIONS:", rmse/8)
        print("-")
        print("\t", "DISTANCE FROM ANIMALS PRESENTATIONS:")
        for animal in category_info[animals]:
            if animal!=-1:
                averaged_animal = word_averaged(animal, btcwpv_matrix, _b, _t, _c)
                print("\t", np.linalg.norm(word_averaged(tool, btcwpv_matrix, BEST[1], BEST[2], BEST[3])-averaged_animal))
        '''



BTC for 5: [5.0698317718126811, 0, 10, 57]
For tool 5 rmse is : 57.2600106852
For tool 5 distance is : 0.0
For tool 17 rmse is : 10.6949940172
For tool 17 distance is : 72.2547844831
For tool 29 rmse is : 13.0362484517
For tool 29 distance is : 80.1027657046
For tool 41 rmse is : 16.1301986932
For tool 41 distance is : 76.3933205987
For tool 53 rmse is : 9.27832675319
For tool 53 distance is : 60.2009233191
For tool 62 rmse is : 36.1718721103
For tool 62 distance is : 25.289372344
For animal 3 rmse is : 19.8518481233
For animal 3 distance is : 87.0683252462
For animal 15 rmse is : 5.92694595577
For animal 15 distance is : 69.8339532572
For animal 27 rmse is : 18.4956858154
For animal 27 distance is : 45.6081690225
For animal 39 rmse is : 6.03186658325
For animal 39 distance is : 66.9473160539
For animal 51 rmse is : 17.6916455002
For animal 51 distance is : 45.3668611922


BTC for 17: [34.84379325341996, 0, 10, 17]
For tool 5 rmse is : 66.480633608
For tool 5 distance is : 636.911582