In [1]:
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC

In [2]:
clf = make_pipeline(StandardScaler(), SVC(gamma='auto', kernel='rbf'))

In [None]:
# Pre Train SVM

def trainSVM(clf, train_data, test_data, train_labels, test_labels):
    clf.fit(train_data, train_labels)
    predict_test = clf.predict(test_data)
    return clf, predict_test

In [None]:
import os
import json
import cv2
from sklearn.preprocessing import StandardScaler

class Dataloader:
    
    
    def __init__(self, path, label_file, test_label):
        self.train_path = os.path.join(path, 'training')
        self.test_path = os.path.join(path, 'testing')
        self.label_path = os.path.join(path, label_file)
        self.test_label_path = os.path.join(path, test_label)
        self.train_data = []
        self.train_images = []
        self.train_file_name = []
        self.test_data = []
        self.test_images = []
        self.test_file_name = []
        self.labels_train = []
        self.labels_test = []
        
        self.TOTAL_SAMPLES_PER_CLASS = 100
        
        
    def _assert_exist(self, label_path):
        msg = 'Gile is not availble: %s' % label_path
        assert os.path.exists(label_path), msg
        
    def load_labels(self, label_path, test_label):
        """
        Loads the training and testing labels from a json file.
        """
        self._assert_exist(label_path)
        self._assert_exist(test_label)
        with open(label_path, 'r') as f:
            label_data = json.load(f)
        self.labels_train = label_data['labels']
        with open(test_label, 'r') as f:
            test_label = json.load(f)
        self.labels_test = test_label['labels']
        
    def scaled_data(self, train_data, test_data):
        """
        This method helps scaling/normalizing data.
        """
        raw_scaler = StandardScaler().fit(train_data)
        scaled_train_data = raw_scaler.transform(train_data)
        scaled_test_data = raw_scaler.transform(test_data)
        return scaled_train_data, scaled_test_data, raw_scaler
    
    def load_dataset(self, train_path, test_path):
        """
        This method loads the images for training the classifier.
        """
        # For ResNet 256x256
        WIDTH = 256
        HEIGHT = 256
        for filename in sorted(os.listdir(train_path)):
            self.train_file_name.append(filename)
            image = cv2.imread(os.path.join(train_path, filename))
            image = cv2.resize(image, (WIDTH, HEIGHT), interpolation=cv2.INTER_AREA)
            self.train_data.append(np.reshape(np.array(image), WIDTH*HEIGHT*3))
            self.train_images.append(image)
            
        for filename in sorted(os.listdir(test_path)):
            self.test_file_name.append(filename)
            image = cv2.imread(os.path.join(test_path, filename))
            image = cv2.resize(image, (WIDTH, HEIGHT), interpolation=cv2.INTER_AREA)
            self.test_images.append(image)
            self.test_data.append(np.reshape(np.array(image), WIDTH*HEIGHT*3))
            
    def smaller_dataset(self, dataset, no_samples_per_class, no_of_classes):
        """
        This method helps training with a subset of the dataset.
        """
        
        start = 0
        end = no_samples_per_class
        new_dataset = []
        labels = []
        for i in range(no_of_classes):
            new_data = dataset[start:end]
            start += self.TOTAL_SAMPLES_PER_CLASS
            end = start + no_samples_per_class
            new_dataset.extend(new_data)
            labels.extend([i+1] * no_samples_per_class)
        return new_dataset, labels