In [1]:
import os
import os.path as osp
import numpy as np
import glob
import re
from IPython import embed

In [2]:
class Market1501(object):
    dataset_dir='data/Market-1501-v15.09.15'
    def __init__(self,root='./',**kwargs):
        self.dataset_dir=osp.join(root,self.dataset_dir)
        self.train_dir=osp.join(self.dataset_dir,'bounding_box_train')
        self.gallery_dir=osp.join(self.dataset_dir,'bounding_box_test')
        self.query_dir=osp.join(self.dataset_dir,'query')
        
        train, num_train_pids, num_train_imgs=self._process_dir(self.train_dir,relabel=True)
        query, num_query_pids, num_query_imgs=self._process_dir(self.query_dir,relabel=False)
        gallery, num_gallery_pids, num_gallery_imgs=self._process_dir(self.gallery_dir,relabel=False)
        
        num_total_pids=num_train_pids+num_query_pids
        num_total_imgs=num_train_imgs+num_query_imgs
        
        print("=> Market1501 loaded")
        print("------------------------------------------------------------------------")
        print("  subset: train  \t| num_id: {:5d}  \t|  num_imgs:{:8d}  ".format(num_train_pids,num_train_imgs))
        print("  subset: query  \t| num_id: {:5d}  \t|  num_imgs:{:8d}  ".format(num_query_pids,num_query_imgs))
        print("  subset: gallery \t| num_id: {:5d}  \t|  num_imgs:{:8d}  ".format(num_gallery_pids,num_gallery_imgs))
        print("------------------------------------------------------------------------")
        print("  total \t\t\t| num_id: {:5d}  \t|  num_imgs:{:8d}  ".format(num_total_pids,num_total_imgs))
        print("------------------------------------------------------------------------")
        self.train=train
        self.query=query
        self.gallery=gallery
        self.num_train_pids=num_train_pids
        self.num_query_pids=num_query_pids
        self.num_gallery_pids=num_gallery_pids
        
    def _process_dir(self,dir_path,relabel=False):
        img_paths=glob.glob(osp.join(dir_path,'*.jpg'))
        pid_container=set()
        for img_path in img_paths:
            pid=int(img_path.split("\\")[-1].split("_")[0])
            if pid==-1:continue
            pid_container.add(pid)
        pid2label={pid:label for label,pid in enumerate(pid_container)}
        dataset=[]
        for img_path in img_paths:
            str_list=img_path.split("\\")[-1].split("_")
            pid=int(str_list[0])
            cid=int(str_list[1][1:2])
            if pid==-1:continue
            assert 0<=pid <=1501
            assert 1<=cid<=6
            cid+=-1
            if relabel:
                pid=pid2label[pid]
            dataset.append((img_path,pid,cid))
        num_pids=len(pid_container)
        num_imgs=len(img_paths)
        return dataset, num_pids, num_imgs

        

if __name__=='__main__':
    data=Market1501()

Python 3.7.3 (default, Mar 27 2019, 17:13:21) [MSC v.1915 64 bit (AMD64)]
Type 'copyright', 'credits' or 'license' for more information
IPython 7.19.0 -- An enhanced Interactive Python. Type '?' for help.

In [1]: pid2label
Out[1]: 
{2: 0,
 7: 1,
 10: 2,
 11: 3,
 12: 4,
 20: 5,
 22: 6,
 23: 7,
 27: 8,
 28: 9,
 30: 10,
 32: 11,
 35: 12,
 37: 13,
 42: 14,
 43: 15,
 46: 16,
 47: 17,
 48: 18,
 52: 19,
 53: 20,
 56: 21,
 57: 22,
 59: 23,
 64: 24,
 65: 25,
 67: 26,
 68: 27,
 69: 28,
 70: 29,
 76: 30,
 77: 31,
 79: 32,
 81: 33,
 82: 34,
 84: 35,
 86: 36,
 88: 37,
 90: 38,
 93: 39,
 95: 40,
 97: 41,
 98: 42,
 99: 43,
 100: 44,
 104: 45,
 105: 46,
 106: 47,
 107: 48,
 108: 49,
 110: 50,
 111: 51,
 114: 52,
 115: 53,
 116: 54,
 117: 55,
 118: 56,
 121: 57,
 122: 58,
 123: 59,
 125: 60,
 127: 61,
 129: 62,
 132: 63,
 134: 64,
 135: 65,
 136: 66,
 139: 67,
 140: 68,
 141: 69,
 142: 70,
 143: 71,
 148: 72,
 149: 73,
 150: 74,
 151: 75,
 158: 76,
 159: 77,
 160: 78,
 162: 79,
 164: 80,
 166: 81,
 16

 1500: 750}

In [2]: exit()

Python 3.7.3 (default, Mar 27 2019, 17:13:21) [MSC v.1915 64 bit (AMD64)]
Type 'copyright', 'credits' or 'license' for more information
IPython 7.19.0 -- An enhanced Interactive Python. Type '?' for help.

In [1]: exit

Python 3.7.3 (default, Mar 27 2019, 17:13:21) [MSC v.1915 64 bit (AMD64)]
Type 'copyright', 'credits' or 'license' for more information
IPython 7.19.0 -- An enhanced Interactive Python. Type '?' for help.

In [1]: quit

=> Market1501 loaded
------------------------------------------------------------------------
  subset: train  	| num_id:   751  	|  num_imgs:   12936  
  subset: query  	| num_id:   750  	|  num_imgs:    3368  
  subset: gallery 	| num_id:   751  	|  num_imgs:   19732  
------------------------------------------------------------------------
  total 			| num_id:  1501  	|  num_imgs:   16304  
------------------------------------------------------------------------


In [5]:
type(data.train[0])

tuple

In [6]:
data.query

[('./data/Market-1501-v15.09.15\\query\\0001_c1s1_001051_00.jpg', 1, 0),
 ('./data/Market-1501-v15.09.15\\query\\0001_c2s1_000301_00.jpg', 1, 1),
 ('./data/Market-1501-v15.09.15\\query\\0001_c3s1_000551_00.jpg', 1, 2),
 ('./data/Market-1501-v15.09.15\\query\\0001_c4s6_000810_00.jpg', 1, 3),
 ('./data/Market-1501-v15.09.15\\query\\0001_c5s1_001426_00.jpg', 1, 4),
 ('./data/Market-1501-v15.09.15\\query\\0001_c6s1_009601_00.jpg', 1, 5),
 ('./data/Market-1501-v15.09.15\\query\\0003_c1s6_015971_00.jpg', 3, 0),
 ('./data/Market-1501-v15.09.15\\query\\0003_c3s3_064744_00.jpg', 3, 2),
 ('./data/Market-1501-v15.09.15\\query\\0003_c4s6_015641_00.jpg', 3, 3),
 ('./data/Market-1501-v15.09.15\\query\\0003_c5s3_065187_00.jpg', 3, 4),
 ('./data/Market-1501-v15.09.15\\query\\0003_c6s3_088392_00.jpg', 3, 5),
 ('./data/Market-1501-v15.09.15\\query\\0004_c1s6_016996_00.jpg', 4, 0),
 ('./data/Market-1501-v15.09.15\\query\\0004_c2s3_059152_00.jpg', 4, 1),
 ('./data/Market-1501-v15.09.15\\query\\0004_c3s3_0