In [1]:
from pathlib import Path
import numpy as np
from sklearn import svm, metrics, datasets
from sklearn.utils import Bunch
# from sklearn.model_selection import GridSearchCV
from sklearn.ensemble import BaggingClassifier, RandomForestClassifier
from sklearn.multiclass import OneVsRestClassifier
import time
import os
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
from torchvision import transforms
from tqdm import tqdm
import pickle

In [3]:
def load_feature_files(args, feature_path):
    # Set random seed
    torch.random.manual_seed(args['random_seed'])

    # Define transformation
    test_transform = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.5,), (0.5,))
    ])

    train_valid_transform = test_transform
    if args['augmentation']:
        train_valid_transform = transforms.Compose([
            transforms.RandomResizedCrop((28,28)),
            transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.2),
            transforms.RandomHorizontalFlip(),
            transforms.RandomVerticalFlip(),
            transforms.ToTensor(),
            transforms.RandomErasing(),
            transforms.Normalize((0.5,), (0.5,))
        ])

    # Load dataset
    require_download = os.path.exists('./dataset')
    train_valid_dataset = torchvision.datasets.FashionMNIST('./dataset', train=True, transform=train_valid_transform, download=True)
    test_dataset = torchvision.datasets.FashionMNIST('./dataset', train=False, transform=test_transform, download=True)

    # Split train and validation
    torch.random.manual_seed(args['random_seed'])
    train_dataset, valid_dataset = torch.utils.data.random_split(train_valid_dataset, [54000, 6000])

    # Generate dataloader
    train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=args['batch_size'], shuffle=True, num_workers=args['num_worker'])
    valid_loader = torch.utils.data.DataLoader(valid_dataset, batch_size=args['batch_size'], shuffle=False, num_workers=args['num_worker'])
    test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=args['batch_size'], shuffle=False, num_workers=args['num_worker'])
    
    train_ys, valid_ys, test_ys = [], [], []
    # Train
    for x, y in tqdm(train_loader):
        train_ys.append(y)
    train_ys = torch.cat(train_ys, dim=0)
    train_ys = np.array(train_ys)

    # Valid
    for x, y in tqdm(valid_loader):
        valid_ys.append(y)
    valid_ys = torch.cat(valid_ys, dim=0)
    valid_ys = np.array(valid_ys)

    # Test
    for x, y in tqdm(test_loader):
        test_ys.append(y)
    test_ys = torch.cat(test_ys, dim=0)
    test_ys = np.array(test_ys)
    
    (train_feats, valid_feats, test_feats) = torch.load(feature_path)
    
    train_feats_flat = torch.flatten(train_feats, start_dim=1, end_dim=-1)
    train_feats_flat = np.array(train_feats_flat.cpu())
    valid_feats_flat = torch.flatten(valid_feats, start_dim=1, end_dim=-1)
    valid_feats_flat = np.array(valid_feats_flat.cpu())
    test_feats_flat = torch.flatten(test_feats, start_dim=1, end_dim=-1)
    test_feats_flat = np.array(test_feats_flat.cpu())
    

    return Bunch(train=(train_feats_flat, train_ys),
                 valid=(valid_feats_flat, valid_ys),
                 test=(test_feats_flat, test_ys))

In [6]:
args = {
    'batch_size': 256,
    'num_worker': 32,
    'random_seed': 8771795,
    'augmentation': False,
    'num_epoch': 10,
    'device': 'cuda'
}

# dataset = load_feature_files(args, 'features/scaterring_J4.pt')

In [7]:
for j in range(1,5):
    with open('dataset/'+str(j)+'.pkl', 'wb') as f:
        dataset = load_feature_files(args, 'features/scaterring_J'+str(j)+'.pt')
        pickle.dump(dataset,f)

100%|██████████| 211/211 [00:01<00:00, 190.12it/s]
100%|██████████| 24/24 [00:00<00:00, 60.72it/s]
100%|██████████| 40/40 [00:00<00:00, 92.41it/s]
100%|██████████| 211/211 [00:01<00:00, 125.38it/s]
100%|██████████| 24/24 [00:01<00:00, 21.29it/s]
100%|██████████| 40/40 [00:01<00:00, 35.80it/s]
100%|██████████| 211/211 [00:01<00:00, 121.32it/s]
100%|██████████| 24/24 [00:01<00:00, 21.05it/s]
100%|██████████| 40/40 [00:01<00:00, 33.88it/s]
100%|██████████| 211/211 [00:01<00:00, 122.24it/s]
100%|██████████| 24/24 [00:01<00:00, 21.30it/s]
100%|██████████| 40/40 [00:01<00:00, 34.45it/s]


In [4]:
# with open('dataset/1.pkl', 'rb') as f:
#     dataset = pickle.load(f)

# n_estimators = 10
# start = time.time()
# clf = OneVsRestClassifier(BaggingClassifier(svm.SVC(kernel='linear', probability=True), max_samples=1.0 / n_estimators, n_estimators=n_estimators), n_jobs=-1)
# clf.fit(dataset.train[0], dataset.train[1])
# end = time.time()
# print("Bagging SVC", end - start)

Bagging SVC 744.9581112861633


In [None]:
fout = open('result','w')
for j in range(1,5):
    with open('dataset/'+str(j)+'.pkl', 'rb') as f:
        dataset = pickle.load(f)

#     svc = svm.SVC()
#     svc.fit(dataset.train[0], dataset.train[1])
#     y_pred = svc.predict(dataset.test[0])

    n_estimators = 10
    start = time.time()
    clf = OneVsRestClassifier(BaggingClassifier(svm.SVC(kernel='linear', probability=True), max_samples=1.0 / n_estimators, n_estimators=n_estimators), n_jobs=-1)
    clf.fit(dataset.train[0], dataset.train[1])
    end = time.time()
    print("Bagging SVC", end - start)
    
    start = time.time()
    y_pred = clf.predict(dataset.test[0])
    print("Classification report for - \n{}:\n{}\n".format(
        str(j), metrics.classification_report(dataset.test[1], y_pred)))
    end = time.time()
    print("predict", end - start)
    
    fout.write("Classification report for - \n{}:\n{}\n".format(
        str(j), metrics.classification_report(dataset.test[1], y_pred)))
    
fout.close()   