# **RICE DISEASE IDENTIFICATION - TENSORFLOW RECORDS**

Only need to be excuted once. Is not required to be executed before every training run because we want to keep consistency and that the mopdels learn on the same data sets everytime running

Source: https://github.com/jlaihong/Speed-up-TensorFlow-code-using-TFRecords

### **Imports**

In [5]:
import os
import random

### **Create shuffeld_labels.txt**

In [6]:
image_folder = 'data/all'
label_mapping = {
    'NONE': -1,
    'BACTERIALBLIGHT': 0,
    'BACTERAILBLIGHT': 0, #there where some spelling mistake while labeling
    'BLAST': 1,
    'BROWNSPOT': 2,
    'TUNGRO': 3
}

image_files = [f for f in os.listdir(image_folder) if f.endswith(".jpg")]
random.shuffle(image_files)

output_labels_file = 'data/shuffled_labels.txt'

with open(output_labels_file, 'w') as f:
    for image_filename in image_files:
        label = "NONE"
        label_int = -1
        for keyword, value in label_mapping.items():
            if keyword.lower() in image_filename.lower():
                if keyword == 'BACTERAILBLIGHT':
                    keyword = 'BACTERIALBLIGHT'
                label_int = value
                label = keyword
                break

        f.write(image_filename + ',' + str(label) + ',' + str(label_int) + '\n')

print(f'>> Shuffled labels saved to {output_labels_file}')

>> Shuffled labels saved to data/shuffled_labels.txt


### **Create TensorflowRecords File**

In [7]:
import tensorflow as tf
from PIL import Image

shuffled_paths = [] 
shuffled_labels = []
shuffled_int_labels = []

with open("data/shuffled_labels.txt") as label_file:
    label_file_lines = label_file.readlines()

for line in label_file_lines:
    image_path, image_label, image_int_label = line.strip().split(",")
    shuffled_paths.append(image_path)
    shuffled_labels.append(image_label)
    shuffled_int_labels.append(int(image_int_label))

def _string_feature(value):
    return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value.encode('utf-8')]))

def _int64_feature(value):
    return tf.train.Feature(int64_list=tf.train.Int64List(value=value))

def _float_feature(value):
    return tf.train.Feature(float_list=tf.train.FloatList(value=value))


def read_and_decode(filename):
    IMG_CHANNELS = 3
    img = tf.io.read_file(filename)
    img = tf.image.decode_jpeg(img, channels=IMG_CHANNELS)
    img = tf.image.convert_image_dtype(img, tf.float32)
    return img

# Write TensorflowRecords
with tf.io.TFRecordWriter("data/rice_images.tfrecords") as writer:
    for path, label, int_label in zip(shuffled_paths, shuffled_labels, shuffled_int_labels):
        image = read_and_decode("data/all/" + path)
        dims = image.shape

        example = tf.train.Example(features=tf.train.Features(feature={
            'image': _float_feature(image.numpy().flatten()),  # Flatten the image to 1D array
            'shape': _int64_feature([dims[0], dims[1], dims[2]]),
            'label': _string_feature(label),
            'label_int': _int64_feature([int_label])
        }))
        
        writer.write(example.SerializeToString())

