# 데이터 라벨링 역할 분담

## Labeling

- image shape(w, h)가 bbox가 되는 경우가 최대한 없도록 작업을 진행함
- class id는 고려하지 않고 bounding box 생성을 목표로 진행함

| 구분   | class  | file (image) | 수량  | 비고                                            |
| ------ | ------ | ------------ | ----- | ----------------------------------------------- |
| 김동주 | ~ 5483 | ~ 77222.jpg  | 12712 |                                                 |
| 박다빈 | 5491 ~ | 77364.jpg ~  | 12579 | coco(xml)로 라벨링 한 후 metadata 형식으로 파싱 |

---

## 목표

Product10K 데이터셋의 `train.csv`을 열람하여 개인에게 할당된 클래스들에 해당하는 데이터 정보(파일명)만 추출하는 것

In [1]:
CSV_FILE_INPUT = input('Products10k 데이터셋에 포함된 train.csv 의 경로: ')

CLASSES_TODO = {
    '김동주': [ 99, 168, 238, 325, 397, 543, 577, 743, 780, 854, 907, 943, 1016, 1019, 1072, 1107, 1178, 1244, 1210, 1215, 1269, 1299, 1368, 1441, 1478, 1481, 1508, 1524, 1532, 1572, 1578, 1737, 1830, 1859, 1863, 1869, 1883, 1888, 1897, 1915, 1921, 1933, 1945, 1950, 1967, 1968, 1978, 1982, 1991, 1999, 2001, 2016, 2019, 2020, 2039, 2040, 2049, 2055, 2061, 2066, 2077, 2084, 2096, 2103, 2120, 2123, 2129, 2130, 2148, 2150, 2161, 2173, 2174, 2193, 2194, 2212, 2213, 2218, 2222, 2234, 2236, 2247, 2252, 2268, 2299, 2320, 2327, 2332, 2339, 2345, 2362, 2374, 2377, 2390, 2395, 2430, 2432, 2436, 2475, 2487, 2520, 2523, 2529, 2541, 2548, 2550, 2575, 2584, 2591, 2592, 2596, 2598, 2607, 2609, 2627, 2634, 2642, 2644, 2654, 2681, 2690, 2702, 2711, 2717, 2747, 2758, 2759, 2768, 2770, 2803, 2807, 2840, 2846, 2847, 2850, 2852, 2854, 2856, 2867, 2872, 2877, 2900, 2907, 2909, 2910, 2924, 2934, 2972, 2982, 2995, 3000, 3014, 3030, 3047, 3074, 3083, 3084, 3104, 3109, 3122, 3128, 3130, 3136, 3143, 3177, 3206, 3212, 3231, 3290, 3291, 3297, 3298, 3319, 3322, 3324, 3332, 3333, 3335, 3339, 3348, 3375, 3385, 3387, 3388, 3393, 3413, 3436, 3444, 3467, 3476, 3490, 3497, 3508, 3510, 3511, 3513, 3544, 3570, 3571, 3579, 3582, 3591, 3592, 3600, 3601, 3602, 3608, 3613, 3632, 3634, 3640, 3641, 3648, 3655, 3658, 3688, 3700, 3703, 3708, 3733, 3741, 3756, 3774, 3778, 3784, 3788, 3797, 3804, 3818, 3822, 3833, 3840, 3844, 3849, 3856, 3861, 3865, 3872, 3878, 3880, 3884, 3885, 3889, 3891, 3897, 3918, 3921, 3926, 3933, 3934, 3938, 3939, 3953, 3957, 3966, 3972, 3984, 3990, 4003, 4004, 4015, 4018, 4022, 4025, 4027, 4029, 4032, 4034, 4039, 4041, 4052, 4053, 4056, 4061, 4075, 4083, 4095, 4104, 4105, 4115, 4131, 4132, 4133, 4134, 4137, 4140, 4165, 4167, 4179, 4181, 4209, 4214, 4218, 4227, 4233, 4240, 4246, 4249, 4254, 4258, 4264, 4265, 4269, 4270, 4294, 4296, 4312, 4323, 4329, 4333, 4335, 4340, 4356, 4375, 4383, 4384, 4396, 4397, 4406, 4409, 4433, 4438, 4449, 4452, 4455, 4460, 4463, 4470, 4476, 4482, 4483, 4493, 4505, 4508, 4513, 4520, 4522, 4525, 4526, 4533, 4536, 4538, 4548, 4565, 4568, 4571, 4572, 4575, 4577, 4578, 4581, 4589, 4606, 4612, 4632, 4634, 4639, 4641, 4643, 4648, 4658, 4662, 4674, 4679, 4689, 4696, 4698, 4705, 4711, 4724, 4726, 4727, 4737, 4738, 4740, 4749, 4750, 4751, 4754, 4755, 4761, 4762, 4765, 4767, 4769, 4783, 4789, 4817, 4822, 4830, 4834, 4868, 4875, 4878, 4885, 4898, 4914, 4920, 4922, 4928, 4936, 4939, 4943, 4946, 4947, 4954, 4956, 4963, 4971, 4990, 4995, 4998, 5016, 5026, 5034, 5039, 5045, 5063, 5065, 5066, 5068, 5073, 5076, 5082, 5085, 5087, 5088, 5091, 5094, 5098, 5101, 5110, 5117, 5118, 5120, 5122, 5128, 5137, 5139, 5141, 5156, 5157, 5159, 5161, 5171, 5179, 5189, 5196, 5198, 5201, 5214, 5218, 5219, 5220, 5223, 5229, 5237, 5242, 5248, 5250, 5252, 5258, 5265, 5269, 5270, 5276, 5277, 5278, 5287, 5300, 5302, 5304, 5307, 5311, 5313, 5322, 5325, 5326, 5327, 5340, 5344, 5363, 5379, 5386, 5391, 5394, 5405, 5412, 5424, 5425, 5440, 5446, 5449, 5450, 5457, 5465, 5470, 5474, 5479, 5483, ],
    '박다빈': [ 5491, 5505, 5513, 5514, 5521, 5524, 5532, 5534, 5535, 5548, 5552, 5556, 5557, 5560, 5562, 5570, 5584, 5593, 5597, 5599, 5602, 5603, 5604, 5605, 5609, 5619, 5622, 5623, 5624, 5628, 5638, 5640, 5645, 5674, 5677, 5679, 5680, 5692, 5696, 5698, 5705, 5706, 5708, 5709, 5715, 5717, 5720, 5721, 5724, 5729, 5730, 5743, 5745, 5750, 5755, 5760, 5779, 5780, 5785, 5786, 5787, 5789, 5794, 5796, 5801, 5809, 5817, 5830, 5837, 5838, 5842, 5861, 5866, 5869, 5884, 5909, 5920, 5953, 5954, 5964, 5989, 5995, 6000, 6004, 6018, 6030, 6031, 6032, 6036, 6041, 6046, 6049, 6055, 6058, 6085, 6099, 6100, 6101, 6102, 6113, 6129, 6137, 6178, 6179, 6202, 6205, 6206, 6215, 6218, 6229, 6259, 6264, 6285, 6294, 6323, 6348, 6357, 6360, 6385, 6388, 6396, 6410, 6411, 6423, 6425, 6434, 6440, 6463, 6478, 6481, 6483, 6485, 6491, 6492, 6499, 6505, 6514, 6524, 6529, 6531, 6541, 6553, 6563, 6568, 6579, 6580, 6585, 6593, 6595, 6596, 6626, 6628, 6643, 6647, 6649, 6661, 6673, 6675, 6684, 6690, 6704, 6718, 6719, 6721, 6722, 6744, 6768, 6775, 6776, 6799, 6801, 6810, 6811, 6834, 6860, 6868, 6869, 6877, 6880, 6953, 6964, 6968, 6980, 6991, 6992, 7006, 7014, 7027, 7028, 7030, 7051, 7120, 7130, 7150, 7163, 7169, 7171, 7172, 7190, 7198, 7221, 7232, 7234, 7241, 7243, 7296, 7304, 7312, 7318, 7392, 7397, 7415, 7422, 7424, 7428, 7443, 7446, 7471, 7473, 7478, 7479, 7484, 7492, 7517, 7534, 7560, 7561, 7578, 7585, 7606, 7614, 7619, 7630, 7633, 7675, 7693, 7707, 7724, 7742, 7767, 7769, 7774, 7788, 7790, 7794, 7817, 7821, 7824, 7831, 7833, 7836, 7843, 7850, 7857, 7876, 7878, 7890, 7893, 7895, 7907, 7911, 7914, 7917, 7921, 7923, 7924, 7951, 7968, 7971, 7976, 7979, 7980, 7989, 8000, 8001, 8004, 8006, 8018, 8032, 8047, 8050, 8054, 8075, 8077, 8080, 8081, 8091, 8095, 8104, 8108, 8113, 8118, 8123, 8132, 8136, 8146, 8155, 8157, 8163, 8166, 8171, 8173, 8176, 8188, 8192, 8224, 8226, 8230, 8240, 8243, 8245, 8253, 8257, 8262, 8265, 8267, 8298, 8300, 8309, 8320, 8323, 8328, 8330, 8336, 8338, 8342, 8344, 8345, 8351, 8354, 8358, 8365, 8367, 8376, 8385, 8387, 8389, 8391, 8392, 8401, 8405, 8410, 8411, 8412, 8418, 8423, 8437, 8444, 8451, 8452, 8453, 8454, 8468, 8469, 8480, 8490, 8503, 8504, 8510, 8524, 8525, 8526, 8537, 8545, 8547, 8550, 8560, 8565, 8580, 8582, 8595, 8597, 8598, 8611, 8618, 8623, 8624, 8638, 8642, 8651, 8652, 8686, 8690, 8698, 8701, 8708, 8720, 8735, 8741, 8746, 8762, 8779, 8795, 8805, 8811, 8816, 8823, 8824, 8827, 8875, 8894, 8901, 8902, 8913, 8914, 8915, 8916, 8919, 8928, 8930, 8935, 8936, 8952, 8969, 8977, 8987, 8993, 9007, 9011, 9036, 9037, 9040, 9041, 9042, 9051, 9055, 9069, 9070, 9081, 9085, 9118, 9122, 9125, 9130, 9171, 9172, 9179, 9181, 9182, 9183, 9184, 9191, 9207, 9228, 9237, 9245, 9255, 9289, 9292, 9293, 9302, 9305, 9307, 9312, 9313, 9320, 9321, 9330, 9338, 9344, 9351, 9366, 9368, 9369, 9375, 9385, 9388, 9397, 9398, 9413, 9417, 9425, 9440, 9443, 9452, 9463, 9472, 9478, 9481, 9501, 9508, 9512, 9528, 9581, 9589, 9591, 9601, 9604, 9605, 9606, 9625, 9630, 9633, 9638, 9641, 9648, 9669, 9684, 9686, 9688, ],
}

## Pandas 라이브러리에 대한 간단한 사용법 실습

In [2]:
import pandas as pd

df = pd.read_csv(CSV_FILE_INPUT)
df.loc[df['class'].isin(CLASSES_TODO['김동주'])]

Unnamed: 0,name,class,group
759,760.jpg,99,5
760,761.jpg,99,5
761,762.jpg,99,5
762,763.jpg,99,5
763,764.jpg,99,5
...,...,...,...
77217,77218.jpg,5483,186
77218,77219.jpg,5483,186
77219,77220.jpg,5483,186
77220,77221.jpg,5483,186


## Pandas 라이브러리를 이용하여 목표 수행

In [3]:
import pandas as pd

df = pd.read_csv(CSV_FILE_INPUT)

for name, classes in CLASSES_TODO.items():
    _df = pd.DataFrame(df.loc[df['class'].isin(classes)])
    _df.to_csv(f'{name}.csv', index=False)