In [2]:
import os
import numpy as np
import pandas as pd
import shutil
import imagehash
from PIL import Image
import cv2
import sys

# **Remove .DS files**

In [3]:
! find . -name ".DS_Store" -print -delete

# **Generate files.csv (file name + their label)**

In [3]:
d = {}
folder = ['canal','electric','flooding','light','road','sanitary','sewer','sidewalk','stray','traffic','unused','unsure','duplicate']
for l in folder:
    _, _, files = next(os.walk(os.path.join('.',l)))
    if '.DS_Store' in files: 
        files.remove('.DS_Store')
    for f in files:
        d[f] = l
        
df = pd.DataFrame(d.items(),columns=['filename','class'])
df.to_csv('files.csv',index=False)

In [4]:
df.shape

(6524, 2)

# **Rearrange original files into Poon's label**

In [5]:
# Read csv
df = pd.read_csv('files.csv')
df.head(3)

Unnamed: 0,filename,class
0,train_canal_img_441.jpg,canal
1,train_canal_img_85.jpg,canal
2,train_canal_img_125.jpg,canal


In [26]:
d = dict(df.values)

folder = ['canal','electric','flooding','light','road','sanitary','sewer','sidewalk','stray','traffic','unused','unsure','duplicate']
for f in folder:
    if not os.path.exists(os.path.join('.',f)):
        os.makedirs(os.path.join('.',f))

for f in folder:
    
    _, _, files = next(os.walk(os.path.join('.',f)))
    if '.DS_Store' in files: 
        files.remove('.DS_Store')
        
    for file in files:
        if file in d:
            # move file
            old = os.path.join('.',f,file)
            new = os.path.join('.',d[file],file)
            shutil.move(old,new)

# **Count files in each folder**

In [27]:
counter = {}
folder = ['canal','electric','flooding','light','road','sanitary','sewer','sidewalk','stray','traffic','unused','unsure','duplicate']
for l in folder:
    _, _, files = next(os.walk(os.path.join('.',l)))
    if '.DS_Store' in files: 
        files.remove('.DS_Store')
    counter[l] = len(files)
    
print('total files =',sum(counter.values()))
print(counter)


total files = 13274
{'canal': 626, 'electric': 1082, 'flooding': 1344, 'light': 1270, 'road': 1350, 'sanitary': 449, 'sewer': 618, 'sidewalk': 694, 'stray': 307, 'traffic': 511, 'unused': 3547, 'unsure': 95, 'duplicate': 1381}


# **Label helper tool**

In [6]:
basePath = "."
unusedPath = "./unused"

folder = ['canal','electric','flooding','light','road','sanitary','sewer','sidewalk','stray','traffic','unused','unsure','duplicate']
for f in folder:
    if not os.path.exists(os.path.join('.',f)):
        os.makedirs(os.path.join('.',f))

In [7]:
def changeLabel(fpath, fnames, label) :
    fname = fpath.split('/')[-1]
#     fnames.remove(fname)
    newPath = basePath + '/' + label + '/' + fname
    shutil.copy(fpath, newPath)
    lastMove = False
    return newPath

def deleteFile(fpath, fnames) :
    fname = fpath.split('/')[-1]
    fnames.remove(fname)
    newPath = unusedPath + '/' + fname
    shutil.move(fpath, newPath)
    lastMove = True
    return newPath

def restoreFile(lastPath, newPath) :
    if lastMove:
        shutil.move(newPath, lastPath)
    else:
        os.remove(newPath)
    
labelKeyMap = {
    # ['canal','electric','flooding','light','road','sanitary','sewer','sidewalk','stray','traffic']
    '8': 'sidewalk',
    '6': 'sanitary',
    '7': 'sewer',
    '5': 'road',
    '1': 'canal',
    '4': 'light',
    '2': 'electric',
    '3': 'flooding',
    '9': 'stray',
    '0': 'traffic'
}

In [15]:
# Specifify folder to explore and image index to open 
subPath = 'traffic'
pos = 0

imgDir = basePath + '/' + subPath
# imgDir = '../testim'
fnames = os.listdir(imgDir)
if sys.platform == 'darwin' and '.DS_Store' in fnames: 
    fnames.remove('.DS_Store')

# Check if path exists
if len(fnames) == 0 :
    raise Exception("empty folder")
if not os.path.exists(unusedPath) :
    os.mkdir(unusedPath)
    
# Create a OpenCV Window
windowName = 'tools v2'
cv2.namedWindow(windowName)

last_pos = pos
prev_fnames = fnames.copy()
lastPath = ""
newPath = ""
lastMove = True

while True :
    decIter = lambda p : max(0, p-1)
    incIter = lambda p : min(len(fnames)-1, p+1)
    boundIter = lambda p : max(0, min(len(fnames)-1, p))
    
    pos = boundIter(pos)
    imgPath = imgDir + '/' + fnames[pos]
    img = cv2.imread(imgPath)
    img = cv2.putText(
      img = img,
#       text = f"{pos}/{len(fnames)-1}\n{imgPath}",
      text = "canal:1, electric:2, flooding:3, light:4, road:5",
      org = (50, 50),
      fontFace = cv2.FONT_HERSHEY_DUPLEX,
      fontScale = 1,
      color = (0, 255, 0),
      thickness = 3
    )
    img = cv2.putText(
      img = img,
#       text = f"{pos}/{len(fnames)-1}\n{imgPath}",
      text = "sanitary:6, sewer:7, sidewalk:8, stray:9, traffic:0",
      org = (50, 100),
      fontFace = cv2.FONT_HERSHEY_DUPLEX,
      fontScale = 1,
      color = (0, 255, 0),
      thickness = 3
    )
    img = cv2.putText(
      img = img,
#       text = f"{pos}/{len(fnames)-1}\n{imgPath}",
      text = " ? = delete | k = undo delete",
      org = (50, 150),
      fontFace = cv2.FONT_HERSHEY_DUPLEX,
      fontScale = 1.3,
      color = (0, 0, 255),
      thickness = 3
    )
    img = cv2.putText(
      img = img,
#       text = f"{pos}/{len(fnames)-1}\n{imgPath}",
      text = " , = back | . = next",
      org = (50, 200),
      fontFace = cv2.FONT_HERSHEY_DUPLEX,
      fontScale = 1.3,
      color = (0, 0, 255),
      thickness = 3
    )
    img = cv2.putText(
      img = img,
#       text = f"{pos}/{len(fnames)-1}\n{imgPath}",
      text = str(pos)+"/"+str(len(fnames)-1)+" "+imgPath,
      org = (50, 250),
      fontFace = cv2.FONT_HERSHEY_DUPLEX,
      fontScale = 1.3,
      color = (125, 246, 55),
      thickness = 3
    )
    cv2.imshow(windowName, img)

    # Break the loop if 'q' is pressed
    wkey = cv2.waitKey(1) & 0xFF
    if chr(wkey) == 'q':
        break
    # navigate left
    elif wkey == ord(',') :
        pos = decIter(pos)
        wkey = 0
        continue
    # navigate right
    elif wkey == ord('.') :
        pos = incIter(pos)
        wkey = 0
        continue
    # temporary delete
    elif wkey == ord('?') :
        last_pos = pos
        prev_fnames = fnames.copy()
        lastPath = imgPath
        newPath = deleteFile(imgPath, fnames)
        wkey = 0
        continue
    # permanent delete
    # edit last move
    elif wkey == ord('k') :
        if lastPath == '' or newPath == '' :
            wkey = 0
            continue
        fnames = prev_fnames.copy()
        restoreFile(lastPath, newPath)
        newPath = ''
        lastPath = ''
        pos = last_pos
        wkey = 0
        continue
    # move to different label folder
    elif chr(wkey) in labelKeyMap.keys() :
        last_pos = pos
        label = labelKeyMap[chr(wkey)]
        if label == subPath :
            continue
        prev_fnames = fnames.copy()
        lastPath = imgPath
        newPath = changeLabel(imgPath, fnames, label)
        wkey = 0
        continue

# Release the webcam and close the window
cv2.destroyAllWindows()
cv2.waitKey(1)
print(f"last pos: {pos}")
with open("cleaner_last_pos.txt", 'a') as f :
    f.write(f"\n{subPath} {pos}")
    f.close()

last pos: 539


In [3]:
from collections import defaultdict

path = './TraffyFondue/train'
def gen_filename():
    classes = os.listdir(path)
    d = defaultdict(list)
    for c in classes:
        if c == '.DS_Store':
            continue
        images = os.listdir(path+'/' + c)
        sorted_filenames = sorted(images, key=lambda x: int(x.split("_")[3].split(".")[0]))
        
        for image in sorted_filenames:
            if image == '.DS_Store': continue
            d[c].append(image)

    flatten = [(v, k) for k, values in d.items() for v in values]
    df = pd.DataFrame(flatten, columns=['filename', 'class'])
    df.to_csv('filenames.csv', index=False)

gen_filename()


In [4]:
df = pd.read_csv("filenames.csv")

In [7]:
df['class'].value_counts()

road         1591
duplicate    1381
sidewalk      742
sewer         628
canal         618
traffic       540
sanitary       31
unused         17
flooding       13
light           2
electric        1
stray           1
Name: class, dtype: int64