In [22]:
import numpy as np
import pandas as pd
import os, shutil
from sklearn.utils import shuffle

In [23]:
# Paths
dataset_dir = '../VegetableImages'
train_dir = os.path.join(dataset_dir, 'train')
test_dir = os.path.join(dataset_dir, 'test')
valid_dir = os.path.join(dataset_dir, 'validation')

# Directories of images (Class names for each dir)
train_dirs = os.listdir(train_dir)
test_dirs = os.listdir(test_dir)
valid_dirs = os.listdir(valid_dir)

In [24]:
# Generate (image, label) pairs
def generateImagesLabels(classNames, dirName):
    image_paths = []
    labels = []
    for className in classNames:
        for img in os.listdir(dirName + '/' + className):
            image_paths.append(dirName + '/' + className + '/' + img)
            labels.append(className)
    return image_paths, labels

train_Images, train_Labels = generateImagesLabels(train_dirs, train_dir)
val_Images, val_Labels = generateImagesLabels(valid_dirs, valid_dir)
test_Images, test_Labels = generateImagesLabels(test_dirs, test_dir)

In [25]:
filePathColumn = 'Imagepath'
labelColumn = 'Label'

train_df = pd.DataFrame(list(zip(train_Images, train_Labels)),columns = [filePathColumn, labelColumn])
val_df = pd.DataFrame(list(zip(val_Images, val_Labels)),columns = [filePathColumn, labelColumn])
test_df = pd.DataFrame(list(zip(test_Images, test_Labels)),columns = [filePathColumn, labelColumn])

allDataset_df = pd.concat([train_df, val_df, test_df])

In [26]:
veg_list = list(train_df[labelColumn].unique())
veg_dict = dict(zip(veg_list,range(len(veg_list))))

def shuffle_label(df):
    df[labelColumn] = df[labelColumn].replace(veg_dict)
    df = shuffle(df)
    return df

train_df = shuffle_label(train_df)
val_df = shuffle_label(val_df)
test_df = shuffle_label(test_df)
allDataset_df = shuffle_label(allDataset_df)

In [27]:
allDataset_df.head()

Unnamed: 0,Imagepath,Label
2384,../VegetableImages/validation/Potato/1383.jpg,11
1821,../VegetableImages/validation/Cucumber/1223.jpg,9
12517,../VegetableImages/train/Pumpkin/0526.jpg,12
13995,../VegetableImages/train/Radish/0996.jpg,13
1271,../VegetableImages/test/Capsicum/1075.jpg,6


In [28]:
train_df.to_csv('../CSV/' + 'trainset.csv', index=False)
val_df.to_csv('../CSV/' + 'valset.csv', index=False)
test_df.to_csv('../CSV/' + 'testset.csv', index=False)
allDataset_df.to_csv('../CSV/' + 'dataset.csv', index=False)