In [1]:
################################################################################################
# Without feature extraction. 
################################################################################################


###############################
#  cla = '1' or '2' response: speed  > 200
###############################
cla = '2'

In [2]:
import numpy as np
import pandas as pd
import os
import glob
from PIL import Image
import torch
import torchvision.transforms as transforms
import torchvision 
from torch.utils import data as D
import pickle
import torch.nn as nn
import torch.optim as optim
from torch.optim import lr_scheduler
from util import *

In [3]:
######################################################################################################################################################
# preparation.
######################################################################################################################################################

# ############################################################################################
# # Extract directory addresses of the img and label files
# ############################################################################################
class DConfig(object):
    parent_folder = './data/compcars/data/label/'
    attri = './data/compcars/data/misc/attributes.txt'

op = DConfig()
# obtain the file addressess in a folder
def listdir_nohidden(path):
    return glob.glob(os.path.join(path, '*'))

In [4]:
# ############################################################################################
# # Read the attribute file and drop the lines with missing 'speed'
# ############################################################################################
attriArray = np.empty((0, 6))
with open(op.attri, newline='\n') as trainfile:
    for line in trainfile:
        line3 = line.replace("\n","")
        line4 = line3.split(' ')
        attriArray = np.concatenate((attriArray, np.array(line4).reshape(1,-1)), axis = 0)

attriArray = attriArray[1:,:]


In [5]:
########################
# Read the stored dataset
########################
infile = open('subsetRes4.p', 'rb')
subsetRes = pickle.load(infile)
infile.close()

labelArray_label1 = subsetRes['labelArray_label1']
labelArray_label3 = subsetRes['labelArray_label3']
labelArray_label2 = subsetRes['labelArray_label2']


if cla == '1':
    labelArray4 = labelArray_label1
elif cla == '3':
    labelArray4 = labelArray_label3
elif cla == '2':
    labelArray4 = labelArray_label2
else:
    print('error')
    
########################
# the response label is whether the max speed of the car is larger than 'sped'
########################
sped = 200

group1Ind = np.where(labelArray4[:,5] > sped)[0]
group2Ind = np.where(labelArray4[:,5] <= sped)[0]

########################
# split the image indices into training and validation
########################
np.random.seed(10)
shuffleInd1 = np.arange(group1Ind.shape[0])
np.random.shuffle(shuffleInd1)

shuffleInd2 = np.arange(group2Ind.shape[0])
np.random.shuffle(shuffleInd2)

halfNum1 = round(shuffleInd1.shape[0]/2)
group1IndTrain, group1IndVal = group1Ind[shuffleInd1[:halfNum1]], group1Ind[shuffleInd1[halfNum1]:]

halfNum2 = round(shuffleInd2.shape[0]/2)
group2IndTrain, group2IndVal = group2Ind[shuffleInd2[:halfNum2]], group2Ind[shuffleInd2[halfNum2]:]

# combine the splitted indices for response = 1 and response = 0
TrainInd = np.concatenate((group1IndTrain, group2IndTrain))
ValInd = np.concatenate((group1IndVal, group2IndVal))

In [6]:
###############################
#  Define the dataset object. Apply necessary transform for alexnet
###############################
# Data augmentation and normalization for training
# Just normalization for validation
data_transforms = {
    'train': transforms.Compose([
    	# new trial
    	# transforms.RandomPerspective(),
        # classical
        transforms.RandomResizedCrop(224),
        transforms.RandomHorizontalFlip(),

        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
    'val': transforms.Compose([
        transforms.Resize(256),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
}
class CompcarsDS(D.Dataset):
    """
    A customized data loader.
    """
    def __init__(self, Ind, mode):
        """ Intialize the dataset
        """

        self.filenames = labelArray4[Ind,3]

        self.len = len(self.filenames)
        self.transform = data_transforms[mode]
                           
    # You must override __getitem__ and __len__
    def __getitem__(self, index):
        """ Get a sample from the dataset
        """
        image = Image.open(self.filenames[index])
        model_id = self.filenames[index].split("/")[6]
        # obtain the car speed
        label = int(int(attriArray[np.where(attriArray[:,0] == model_id),1][0,0]) > sped)
        return self.transform(image), label
    
    def __len__(self):
        """
        Total number of samples in the dataset
        """
        return self.len


# prepare train val datasets and data loaders
datTrain = CompcarsDS(Ind = TrainInd, mode = 'train')
datVal= CompcarsDS(Ind = ValInd, mode = 'val')

image_datasets = {'train': datTrain, 'val': datVal}

batch_size = 6

# Set num_workers=0 to extract raw data.
dataloaders = {x: torch.utils.data.DataLoader(image_datasets[x], batch_size=batch_size,
                                             shuffle=True, num_workers=0)
              for x in ['train', 'val']}
dataset_sizes = {x: len(image_datasets[x]) for x in ['train', 'val']}
print('dataset_sizes: ', dataset_sizes)

dataset_sizes:  {'train': 1090, 'val': 1875}


In [7]:
import csv
import os

def export_dataloader_to_csv(dataloader, output_file):
    with open(output_file, mode='w', newline='') as f:
        writer = csv.writer(f)
        
        for inputs, labels in dataloader:
            # inputs: (batch_size, 3, 224, 224)
            batch_size = inputs.size(0)
            inputs_flat = inputs.view(batch_size, -1).numpy()  # shape: (batch_size, 150528)
            labels_np = labels.numpy().reshape(-1, 1)  # shape: (batch_size, 1)
            batch_data = np.hstack((inputs_flat, labels_np))  # shape: (batch_size, 150529)

            writer.writerows(batch_data)

# Export train and val
export_dataloader_to_csv(dataloaders['train'], "TrainSpeed_" + cla + "without_feature_extraction.csv")
export_dataloader_to_csv(dataloaders['val'], "ValSpeed_" + cla + "without_feature_extraction.csv")