In [27]:
import numpy as np
import cv2 as cv
import os
from sklearn.model_selection import train_test_split
from sklearn.cluster import KMeans
from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler


def load_data(path):
    images = []
    labels = []
    img_folders = []
    folders = os.listdir(path)
    for folder in folders[1:]:
        img_folders.append(folder)
        
        label = folder.split("_")[0]
        labels.append(label)
        
    img_folders = np.array(img_folders)
    labels = np.array(labels)
    
    training_folders, testing_folders, y_train, y_test = train_test_split(img_folders, labels, test_size=0.3, random_state=0, stratify=labels)
    print(training_folders.shape,testing_folder.shape)
    
    np.save("training_folders",training_folders)
    np.save("testing_folders",testing_folders)
    np.save("training_labels",y_train)
    np.save("testing_labels",y_test)
    
    return training_folders, testing_folder, y_train, y_test

def find_features(path,training_folders):
    sift = cv.xfeatures2d.SIFT_create()
    descriptor_list = []
    for folder in training_folders:
        file_path = path + folder
        images = os.listdir(file_path)
        video_descriptors_list = []
        for img in images : 
            img_path = file_path + "/" + img
            image = cv.imread(img_path,0)
            kp,des = sift.detectAndCompute(image, None)
            video_descriptors_list.append(des)
        descriptor_list.append(video_descriptors_list)
    
    descriptor_list = np.array(descriptor_list)
    print(type(descriptor_list[0]),type(descriptor_list[0][0]))
    
    np.save("./Data/training_descriptors",descriptor_list)
    print(descriptor_list.shape)

def cluster(descriptor_stack):
    kmeans_obj = KMeans(n_clusters = 20)
    clustered_objects = kmeans_obj.fit_predict(descriptor_stack)
    clustered_objects = np.array(clustered_objects)
    np.save("clustered_objects",clustered_objects)
    return clustered_objects,kmeans_obj
    
def stack_vertically(descriptor_list):
    descriptor_stack = []
    for i in range(descriptor_list.shape[0]):
#         print(descriptor_list[i].shape)
        for j in range(descriptor_list[i].shape[0]):
#             print(descriptor_list[i][j].shape)
#             print(i,j)
            if descriptor_list[i][j] is not None:
                descriptor_stack.extend(descriptor_list[i][j])
    
    descriptor_stack = np.array(descriptor_stack)
    np.save("./Data/descriptor_stack",descriptor_stack)
    return np.array(descriptor_stack)

def create_bovw_histogram(clustered_objects,training_folders,descriptor_list,path):
    
    histogram = np.array([np.zeros(20) for i in range(training_folders.shape[0])])
    
    for i in range(descriptor_list.shape[0]):
        for j in range(descriptor_list[i].shape[0]):
            for k in range(descriptor_list[i][j].shape[0]):
                index = clustered_objects[i + j + k]
                
                histogram[i][index] += 1
    
    np.save("./Data/histogram",histogram)
    return histogram

def normalize_histogram(histogram):
    scale = StandardScaler().fit(histogram)
    standardized_histogram = scale.transform(histogram)
    
    standardized_histogram = np.array(standardized_histogram)
    
    np.save("standardized_histogram",standardized_histogram)
    return standardized_histogram,scale

def train_bovw(histogram,training_labels,clf):
    clf.fit(histogram, training_labels)
    return clf


    
def recognize_test_videos(testing_label,testing_folder,path,clf,scale,kmeans_obj):
    test_histogram = [np.zeros(20) for i in range(testing_folder.shape[0])]

    predicted_labels = []
    
    for i in range(testing_folder.shape[0]):
        folder = testing_folder[i]
        file_path = path + folder
        images = os.listdir(file_path)
        
#         test_histogram = [0 for j in range(20)]
        for image in images : 
            img_path = file_path + "/" + img
            image = cv.imread(img_path,0)
            kp,des = sift.detectAndCompute(image, None)
            test_clusters = kmeans_obj.predict(des)
            
            for each in test_clusters:
                test_histogram[i][each] += 1
        
        test_histogram[i] = scale.transform(test_histogram[i])
    
    score = clf.score(test_histogram,testing_label)
    print(score*100)


In [28]:
path = "./Data/Frames/"
training_folders, testing_folder, y_train, y_test = load_data(path)
# find_features(path,training_folders)
descriptor_list = np.load("./Data/training_descriptors.npy")

descriptor_stack = stack_vertically(descriptor_list)
# descriptor_stack = np.load("./Data/descriptor_stack.npy")


#clustering
clustered_objects,kmeans_obj = cluster(descriptor_stack)

#creating visual vocabulary
histogram = create_bovw_histogram(clustered_objects,training_folders,descriptor_list,path)

#normalizing the histogram
standardized_histogram, scale = normalize_histogram(histogram)

# Training
clf  = SVC()
clf = train_bovw(standardized_histogram,y_train,clf)

#Testing
predicted_labels = recognize_test_videos(y_test,testing_folder,path,clf,scale,kmeans_obj)








(3009,) (1290,)


KeyboardInterrupt: 