# FlowerID Image Mapping


In [None]:
# Import dependencies
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import os
from sklearn.datasets import load_files
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder

## Get the total, available, and used memory of the system in a readable format like "GB" or "MB".

In [None]:
import psutil
def get_size(bytes, suffix="B"):
    factor = 1024
    for unit in ["", "K", "M", "G", "T", "P"]:
        if bytes < factor:
            return f"{bytes:.2f}{unit}{suffix}"
        bytes /= factor
print("="*40, "Memory Info", "="*40)
svmem = psutil.virtual_memory()
print(f"Total: {get_size(svmem.total)}")
print(f"Available: {get_size(svmem.available)}")
print(f"Used: {get_size(svmem.used)}")

Total: 12.67GB
Available: 11.47GB
Used: 906.18MB


In [None]:
# Update the path to a local directory where the 'flowers' dataset is located
#data_dir = '/Users/yashadakulkarni/Desktop/WORK/DataAnalyticsBootcamp/HOMEWORK/Flower-Identification-using-deep-learning/flowers'

# Import & Load Data

In [None]:
#Import Data
import os
import requests
from zipfile import ZipFile
from io import BytesIO
from sklearn.datasets import load_files
# Define the URL of the dataset
url = 'https://github.com/SriPenumatcha/Flower-Identification-using-deep-learning/archive/refs/heads/main.zip'

# Download the dataset
response = requests.get(url)
if response.status_code == 200:

    # Extract the dataset
    with ZipFile(BytesIO(response.content)) as zip_file:
        zip_file.extractall('/tmp')

# Path to the extracted dataset
data_dir = '/tmp/Flower-Identification-using-deep-learning-main/flowers'

# Load the data
data = load_files(data_dir)

In [None]:
# List the contents of the Folder
folders = os.listdir(data_dir)
print(folders)

['daisy', 'sunflower', 'rose', 'tulip', 'dandelion']


# DATA Processing and Exploration

# We have a folder for each flower class or type. Load this data into two NumPy arrays:

- X: Filenames (features)
- y: Flower names (target labels)

In [None]:
X = np.array(data['filenames'])
y = np.array(data['target'])
labels = np.array(data['target_names'])

# Arrays
print('Data files - ',X)
print('Target labels - ',y)

Data files -  ['/tmp/Flower-Identification-using-deep-learning-main/flowers/sunflower/7176729016_d73ff2211e.jpg'
 '/tmp/Flower-Identification-using-deep-learning-main/flowers/dandelion/18385846351_3a2bf60427_n.jpg'
 '/tmp/Flower-Identification-using-deep-learning-main/flowers/dandelion/98992760_53ed1d26a9.jpg'
 ...
 '/tmp/Flower-Identification-using-deep-learning-main/flowers/dandelion/7184780734_3baab127c2_m.jpg'
 '/tmp/Flower-Identification-using-deep-learning-main/flowers/sunflower/10386540106_1431e73086_m.jpg'
 '/tmp/Flower-Identification-using-deep-learning-main/flowers/sunflower/164670176_9f5b9c7965.jpg']
Target labels -  [3 1 1 ... 1 3 3]


In [None]:
#Save the names of the imagefiles so they can be used later
imagefile_names = data['filenames']
print(imagefile_names)

['/tmp/Flower-Identification-using-deep-learning-main/flowers/sunflower/7176729016_d73ff2211e.jpg'
 '/tmp/Flower-Identification-using-deep-learning-main/flowers/dandelion/18385846351_3a2bf60427_n.jpg'
 '/tmp/Flower-Identification-using-deep-learning-main/flowers/dandelion/98992760_53ed1d26a9.jpg'
 ...
 '/tmp/Flower-Identification-using-deep-learning-main/flowers/dandelion/7184780734_3baab127c2_m.jpg'
 '/tmp/Flower-Identification-using-deep-learning-main/flowers/sunflower/10386540106_1431e73086_m.jpg'
 '/tmp/Flower-Identification-using-deep-learning-main/flowers/sunflower/164670176_9f5b9c7965.jpg']


The numbers in the **Target labels as [3 1 1 ... 1 3 3]** are corresponding to class label. We need to change them to a vector of 5 elements (5 classes)

In [None]:
import cv2

In [None]:
print(X.shape)

(4317,)


In [None]:
from keras.preprocessing.image import img_to_array
#from tensorflow.keras.preprocessing.image import img_to_array, load_img
data = []
target_labels = []
img_gray = []

# Iterate over the folders in the root directory
for folder_name in os.listdir(data_dir):
    folder_path = os.path.join(data_dir, folder_name)

    # Iterate over the images in each folder
    for img_name in os.listdir(folder_path):
        img_path = os.path.join(folder_path, img_name)

        # Load the image using OpenCV (cv2)
        img = cv2.imread(img_path, cv2.IMREAD_COLOR)
        r, g, b = img[:, :, 0], img[:, :, 1], img[:, :, 2]
        img_grayscale = 0.2989 * r + 0.5870 * g + 0.1140 * b

        # Convert RGB image to grayscale
        #img_grayscale = img.convert('L')

        # Append the image data and target label to the lists
        data.append(img)
        target_labels.append(folder_name)
        img_gray.append(img_grayscale)


# Create a DataFrame
df = pd.DataFrame({'image_data': data, 'grayscale_data': img_gray, 'target_label': target_labels, 'image_filename': imagefile_names})

# Shuffle the DataFrame
#df = df.sample(frac=1).reset_index(drop=True)

# Split the data into training and testing sets
#train_data = df.iloc[:int(0.8*len(df))]
#test_data = df.iloc[int(0.8*len(df)):]


In [None]:
df.head()

Unnamed: 0,image_data,grayscale_data,target_label,image_filename
0,"[[[83, 44, 0], [83, 44, 0], [83, 44, 0], [83, ...","[[50.6367, 50.6367, 50.6367, 50.6367, 50.6367,...",daisy,/tmp/Flower-Identification-using-deep-learning...
1,"[[[31, 0, 1], [31, 1, 0], [32, 3, 0], [32, 3, ...","[[9.379900000000001, 9.8529, 11.3258, 11.3258,...",daisy,/tmp/Flower-Identification-using-deep-learning...
2,"[[[217, 240, 236], [226, 236, 236], [233, 234,...","[[232.6453, 232.98739999999998, 234.1337, 231....",daisy,/tmp/Flower-Identification-using-deep-learning...
3,"[[[0, 0, 0], [0, 0, 0], [0, 0, 0], [0, 0, 0], ...","[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,...",daisy,/tmp/Flower-Identification-using-deep-learning...
4,"[[[40, 27, 13], [107, 82, 2], [115, 68, 6], [1...","[[29.287, 80.34429999999999, 74.9735, 74.126, ...",daisy,/tmp/Flower-Identification-using-deep-learning...


In [None]:
df.columns

Index(['image_data', 'grayscale_data', 'target_label', 'image_filename'], dtype='object')

In [None]:
df.to_csv("FlowerID2.csv")
#converting to csv spoils the image data column. Let us check if the column is correct in the df:



In [None]:
df["image_data"].head()
#It's massive! As it should be...

In [None]:
df["grayscale_data"].head()

In [None]:
#Done till above cell. More to follow.

## Use OneHotEncoder to convert to categorical data

In [None]:
# Initialize OneHotEncoder
encoder = OneHotEncoder(sparse=False)

In [None]:
# Reshape y to a 2D array
y_reshaped = y.reshape(-1, 1)

In [None]:
# Perform one-hot encoding
y_onehot = encoder.fit_transform(y_reshaped)



In [None]:
# Display the one-hot encoded target labels
print('One-hot encoded target labels:')
print(y_onehot)

One-hot encoded target labels:
[[0. 0. 0. 1. 0.]
 [0. 1. 0. 0. 0.]
 [0. 1. 0. 0. 0.]
 ...
 [0. 1. 0. 0. 0.]
 [0. 0. 0. 1. 0.]
 [0. 0. 0. 1. 0.]]


Removing the .pyc or .py files from X and y:
This loop iterates over the positions of .pyc or .py files found in the previous step.
np.delete(X, pos) removes the elements at the specified positions from the array X.
Similarly, np.delete(y, pos) removes the corresponding elements from the array y.

In [None]:
# Find positions of .pyc or .py files in X
pos = [i for i, filename in enumerate(X) if filename.endswith('.pyc') or filename.endswith('.py')]

# Remove elements at the specified positions from X and y
X = np.delete(X, pos)
y = np.delete(y, pos)

# Verify the updated X and y arrays
print('Updated Data files - ', X)
print('Updated Target labels - ', y)

Updated Data files -  ['/tmp/Flower-Identification-using-deep-learning-main/flowers/sunflower/7176729016_d73ff2211e.jpg'
 '/tmp/Flower-Identification-using-deep-learning-main/flowers/dandelion/18385846351_3a2bf60427_n.jpg'
 '/tmp/Flower-Identification-using-deep-learning-main/flowers/dandelion/98992760_53ed1d26a9.jpg'
 ...
 '/tmp/Flower-Identification-using-deep-learning-main/flowers/dandelion/7184780734_3baab127c2_m.jpg'
 '/tmp/Flower-Identification-using-deep-learning-main/flowers/sunflower/10386540106_1431e73086_m.jpg'
 '/tmp/Flower-Identification-using-deep-learning-main/flowers/sunflower/164670176_9f5b9c7965.jpg']
Updated Target labels -  [3 1 1 ... 1 3 3]


In [None]:
# rescale the training data
X = X.astype('float32')/255
# Confirming number of class
num_classes = len(np.unique(y))
print('Number of classes : ', num_classes)

Number of classes :  5


In [None]:
y

array([3, 1, 1, ..., 1, 3, 3])

In [None]:
from keras.utils import to_categorical
y = to_categorical(y)
y[0]

array([0., 0., 0., 1., 0.])

In [None]:
from sklearn.model_selection import train_test_split
# split the data into subsets and explore their shapes
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.2)
print('The test Data Shape ', X_test.shape[0])
X_test, X_valid, y_test, y_valid = train_test_split(X_test,y_test, test_size = 0.5)
print('The training Data Shape ', X_valid.shape[0])

The test Data Shape  864
The training Data Shape  432


In [None]:
print('The train Data Shape ', X_train.shape[1:])

The train Data Shape  (150, 150, 3)


In [None]:
# Check the shape of X_train (input data) and X_test
print("Shape of X_train (input data):", X_train.shape)
print("Shape of X_test (input data):", X_test.shape)

Shape of X_train (input data): (3453, 150, 150, 3)
Shape of X_test (input data): (432, 150, 150, 3)


In [None]:
# Use grayscale images

In [None]:
# Check the shape of y_train (target labels) and y_test
print("Shape of y_train (target labels):", y_train.shape)
print("Shape of y_test (target labels):", y_test.shape)

Shape of y_train (target labels): (3453, 5)
Shape of y_test (target labels): (432, 5)
