In [1]:
import os
import argparse
import shutil
from tqdm import tqdm
import pandas as pd

# Silence TensorFlow messages
os.environ['TF_CPP_MIN_LOG_LEVEL']='3'
import tensorflow as tf
import numpy as np
import PIL

In [2]:
os.chdir(r'C:\Users\steph\ML\Dom\tfrec_images')
changedDirectory = os.getcwd()
print(" The current working directory After changing the directory is : ", changedDirectory)

 The current working directory After changing the directory is :  C:\Users\steph\ML\Dom\tfrec_images


In [3]:
def _bytes_feature(value):
  '''Returns a bytes_list from a string / byte'''
  if isinstance(value, type(tf.constant(0))):
    value = value.numpy() # BytesList won't unpack a string from an EagerTensor.
  return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))

In [4]:
def _float_feature(value):
  '''Returns a float_list from a float / double'''
  return tf.train.Feature(float_list=tf.train.FloatList(value=[value]))

In [5]:
def _int64_feature(value):
  ''' Returns an int64_list from a bool / enum / int / uint '''
  return tf.train.Feature(int64_list=tf.train.Int64List(value=[value]))

In [6]:
def _calc_num_shards(img_list, img_shard):
  ''' calculate number of shards'''
  last_shard =  len(img_list) % img_shard
  if last_shard != 0:
    num_shards =  (len(img_list) // img_shard) + 1
  else:
    num_shards =  (len(img_list) // img_shard)
  return last_shard, num_shards

In [7]:
def _create_images_labels(label_file):
  ''' create lists of image filenames and their labels '''  
  linesList = label_file.readlines()
  labels_list=[]
  fileNames_list=[]
  for line in linesList:
    fileName, label = line.split()
    labels_list.append(int(label.strip()))
    fileNames_list.append(fileName.strip())
  return labels_list, fileNames_list

In [8]:
def write_tfrec(tfrec_filename, image_dir, img_list, labels_list):
  ''' write TFRecord file '''

  with tf.io.TFRecordWriter(tfrec_filename) as writer:

    for i in range(len(img_list)):
      filePath = os.path.join(image_dir, img_list[i])

      # read the JPEG source file into a tf string
      image = tf.io.read_file(filePath)

      # get the shape of the image from the JPEG file header
      image_shape = tf.io.extract_jpeg_shape(image, output_type=tf.dtypes.int32, name=None)

      # features dictionary
      feature_dict = {
        'label' : _int64_feature(int(label_list[i])),
        'height': _int64_feature(image_shape[0]),
        'width' : _int64_feature(image_shape[1]),
        'chans' : _int64_feature(image_shape[2]),
        'image' : _bytes_feature(image)
      }

      # Create Features object
      features = tf.train.Features(feature = feature_dict)

      # create Example object
      tf_example = tf.train.Example(features=features)

      # serialize Example object into TfRecord file
      writer.write(tf_example.SerializeToString())

  return


In [9]:

def make_tfrec(image_dir, img_shard, tfrec_base, label_file, num_images):

  # make destination directory
  #os.makedirs(tfrec_dir, exist_ok=True)
  #print('Directory',tfrec_dir,'created',flush=True)

  # make lists of images and their labels
  all_labels, all_images = _create_images_labels(label_file)
  print('Found',len(all_labels),'images and labels in',label_file)

  if (num_images != 0 and num_images < len(all_images)):
    all_images = all_images[:num_images]
    all_labels = all_labels[:num_images]
    print('Using',num_images,'images..')
  else:
    print('Using',len(all_labels),'images..')

  # calculate how many shards we will generate and number of images in last shard
  last_shard, num_shards = _calc_num_shards(all_images, img_shard)
  print (num_shards,'TFRecord files will be created.')
  if (last_shard>0):
    print ('Last TFRecord file will have',last_shard,'images.')

  # create TFRecord files (shards)
  start = 0

  for i in tqdm(range(num_shards)):

    tfrec_filename = tfrec_base+'_'+str(i)+'.tfrecord'
    write_path = os.path.join(tfrec_dir, tfrec_filename)

    if (i == num_shards-1):
      write_tfrec(write_path, image_dir, all_images[start:], all_labels[start:])
    else:
      end = start + img_shard
      write_tfrec(write_path, image_dir, all_images[start:end], all_labels[start:end])
      start = end

  return





In [22]:

file.close()

_divider = '-------------------------------------'

for r in range(8):
    tfrec_base = rf'D:\Dom\tfrec_images\core_stack{r}'
    image_dir = rf'C:\Users\steph\ML\Dom\tfrec_images\core_stack{r}'
    img_list = os.listdir(rf'C:\Users\steph\ML\Dom\tfrec_images\core_stack{r}')
    img_shard = len(img_list)
    num_images = len(img_list)

    label_list = []
    
    for x in img_list:
        label_split = os.path.splitext(x[:-5]+'.jpeg')
        label_list_elem = label_split[0]
        label_list.append(label_list_elem)
    
    if len(label_list) == len(img_list):
        label_file = np.vstack((label_list,img_list))
        print(label_file.shape)
        
        
        file_path = rf'C:\Users\steph\ML\Dom\tfrec_images\core_stack{r}\acc_files\label_file.txt'
        
        print(file_path)
        
        directory = file_path
        
        if not os.path.isdir(directory):
            os.mkdir(directory)
        file = open(file_path, "w")
    
        for line in label_file:
            file.write(line)
            file.write('\n')
        file.close()
        
        make_tfrec(image_dir, img_shard, tfrec_base, label_file, num_images)
        
    
    else: 
        print('label_list and img_list do not have same length')
        print('len(label_list) = ',len(label_list))
        print('len(img_list) = ',len(img_list))
        break
    
    

NameError: name 'file' is not defined

In [1]:
#!/usr/bin/python

import os, sys, stat
r=0
# label_file.txt exists at the path below
os.chmod(rf'C:\Users\steph\ML\Dom\tfrec_images\core_stack{r}\acc_files\label_file.txt', stat.S_IRWXU)

print ("Changed mode successfully!!")
file_path = rf'C:\Users\steph\ML\Dom\tfrec_images\core_stack{r}\acc_files\label_file.txt'
file = open(file_path, "w")
file.close()

Changed mode successfully!!


PermissionError: [Errno 13] Permission denied: 'C:\\Users\\steph\\ML\\Dom\\tfrec_images\\core_stack0\\acc_files\\label_file.txt'

In [None]:
from pathlib import Path
my_file = Path(rf'C:\Users\steph\ML\Dom\tfrec_images\core_stack{r}\acc_files\label_file.txt')
Path("out.txt").open("w")

In [None]:
type(label_file)
print(label_file)

In [None]:
r = 0
directory = os.getcwd()
print(directory)
filename = os.path.join(r'C:',os.sep, 'Users',os.sep, 'steph',os.sep,
                    'ML',os.sep, 'Dom',os.sep, 'tfrec_images',os.sep, f'core_stack{r}',
                    os.sep, 'label_file.txt')
print(filename)
label_file = np.array(label_file)
MyFile=open(os.path.join(directory,f'core_stack{r}','acc_files','label_file' + '.txt'),'w')
for x in label_file:
    p = str(x)
    MyFile.write(p)
    MyFile.write('\n')

In [None]:
r=0

path = os.path.join(r'C:',os.sep, 'Users',os.sep, 'steph',os.sep,
                    'ML',os.sep, 'Dom',os.sep, 'tfrec_images',os.sep, f'core_stack{r}',
                    os.sep, 'label_file.txt')

directory = os.getcwd()
    filename = os.path.join(directory, 'label_file' + '.txt')
  print('Writing', filename)
  writer = tf.io.Writer(filename)
  for index in range(num_examples):
    image_raw = images[index].tostring()
    example = tf.train.Example(features=tf.train.Features(feature={
        'height': _int64_feature(rows),
        'width': _int64_feature(cols),
        'depth': _int64_feature(depth),
        'label': _int64_feature(int(labels[index])),
        'image_raw': _bytes_feature(image_raw)}))
    writer.write(example.SerializeToString())
    

        
#make_tfrec(image_dir, img_shard, tfrec_base, label_file, num_images)

In [None]:
make_tfrec(image_dir, img_shard, tfrec_base, label_file, num_images)