# The total, available, and used memory of the system in a readable format like "GB" or "MB".

In [1]:
import psutil
def get_size(bytes, suffix="B"):
    factor = 1024
    for unit in ["", "K", "M", "G", "T", "P"]:
        if bytes < factor:
            return f"{bytes:.2f}{unit}{suffix}"
        bytes /= factor
print("="*40, "Memory Info", "="*40)
svmem = psutil.virtual_memory()
print(f"Total: {get_size(svmem.total)}")
print(f"Available: {get_size(svmem.available)}")
print(f"Used: {get_size(svmem.used)}")

Total: 15.73GB
Available: 7.55GB
Used: 8.18GB


# Import Files

In [2]:
# Update the path to a local directory where the 'flowers' dataset is located
data_dir = 'flowers'

In [3]:
# Import Libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import os
from sklearn.datasets import load_files
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder

# Loading the DATA

In [4]:
# Load the data from the updated local directory
data = load_files(data_dir)

# DATA Processing and Exploration

In [5]:
# List the content of the Folder
folders = os.listdir(data_dir)
print(folders)

['daisy', 'dandelion', 'rose', 'sunflower', 'tulip']


# We have a folder for each flower class or type, and you want to load the data into two NumPy arrays:

- X: Filenames (training data)
- y: Flower names (target labels)

In [6]:
X = np.array(data['filenames'])
y = np.array(data['target'])
labels = np.array(data['target_names'])

# Arrays
print('Data files - ',X)
print('Target labels - ',y)

Data files -  ['flowers\\sunflower\\7176729016_d73ff2211e.jpg'
 'flowers\\dandelion\\18385846351_3a2bf60427_n.jpg'
 'flowers\\dandelion\\98992760_53ed1d26a9.jpg' ...
 'flowers\\dandelion\\7184780734_3baab127c2_m.jpg'
 'flowers\\sunflower\\10386540106_1431e73086_m.jpg'
 'flowers\\sunflower\\164670176_9f5b9c7965.jpg']
Target labels -  [3 1 1 ... 1 3 3]


The numbers in the **Target labels as [3 1 1 ... 1 3 3]** are corresponding to class label. We need to change them to a vector of 5 elements (5 classes)

In [7]:
# Initialize OneHotEncoder
encoder = OneHotEncoder(sparse=False)

In [8]:
# Reshape y to a 2D array
y_reshaped = y.reshape(-1, 1)

In [9]:
# Perform one-hot encoding
y_onehot = encoder.fit_transform(y_reshaped)

In [10]:
# Display the one-hot encoded target labels
print('One-hot encoded target labels:')
print(y_onehot)

One-hot encoded target labels:
[[0. 0. 0. 1. 0.]
 [0. 1. 0. 0. 0.]
 [0. 1. 0. 0. 0.]
 ...
 [0. 1. 0. 0. 0.]
 [0. 0. 0. 1. 0.]
 [0. 0. 0. 1. 0.]]


## Removing the .pyc or .py files from X and y:
- This loop iterates over the positions of .pyc or .py files found in the previous step.
- np.delete(X, pos) removes the elements at the specified positions from the array X.
- Similarly, np.delete(y, pos) removes the corresponding elements from the array y.

In [13]:
# Find positions of .pyc or .py files in X
pos = [i for i, filename in enumerate(X) if filename.endswith('.pyc') or filename.endswith('.py')]

# Remove elements at the specified positions from X and y
X = np.delete(X, pos)
y = np.delete(y, pos)

# Verify the updated X and y arrays
print('Updated Data files - ', X)
print('Updated Target labels - ', y)

Updated Data files -  ['flowers\\sunflower\\7176729016_d73ff2211e.jpg'
 'flowers\\dandelion\\18385846351_3a2bf60427_n.jpg'
 'flowers\\dandelion\\98992760_53ed1d26a9.jpg' ...
 'flowers\\dandelion\\7184780734_3baab127c2_m.jpg'
 'flowers\\sunflower\\10386540106_1431e73086_m.jpg'
 'flowers\\sunflower\\164670176_9f5b9c7965.jpg']
Updated Target labels -  [3 1 1 ... 1 3 3]


# Loading the Images

## Converts a list of image file paths into an array of image data, resizes the images, and prints the shape of the resulting array and the first training item. The steps include:
- Importing necessary functions from Keras:
- Defining the convert_img_to_arr function:
- Converting the list of image arrays to a NumPy array:
- Printing the shape of the array and the first training item:

In [19]:
from keras.preprocessing import image
from keras.applications.imagenet_utils import preprocess_input
def convert_img_to_arr(img_path, target_size):
    img = image.load_img(img_path, target_size=target_size)
    img = image.img_to_array(img)
    img = np.expand_dims(img, axis=0)
    img = preprocess_input(img)
    return img

In [20]:
# Define the target size for resizing the images
target_size = (224, 224)

In [21]:
# List of image file paths
image_paths = ['flowers/sunflower/7176729016_d73ff2211e.jpg', 'flowers/dandelion/18385846351_3a2bf60427_n.jpg', ...]

In [22]:
# Convert image paths to NumPy array of image data
image_data = np.array([convert_img_to_arr(img_path, target_size) for img_path in image_paths])

AttributeError: 'ellipsis' object has no attribute 'read'

In [None]:
print('Shape of the image data array:', image_data.shape)
print('First training item:')
print(image_data[0])

In [12]:
from keras.preprocessing.image import img_to_array, load_img
import numpy as np

def convert_img_to_arr(file_path_list):
    arr = []
    #size=64,64
    img_width, img_height = 150, 150
    for file_path in file_path_list:
        img = load_img(file_path, target_size=(img_width, img_height))
        img = img_to_array(img)
        arr.append(img)
        #arr.append(cv2.resize(img,size))
    return arr

X = np.array(convert_img_to_arr(X))
print(X.shape)
print('First training item : ', X[0])

ImportError: cannot import name 'img_to_array' from 'keras.preprocessing.image' (C:\Users\linds\anaconda3\envs\PythonData\lib\site-packages\keras\preprocessing\image.py)

## **Key Points:**
- The shape of training data is (4317, 150, 150, 3)
- 4317 is the number of training items or files
- (150,150) is the target size or image size provided while loading image
- 3 refers to the depth for colored images ( RGB channels )

# Have a look at some beautiful Flower Images

In [None]:
import matplotlib.pyplot as plt
import numpy as np

# Debugging: Check data type and range
print(f"Data type of X: {X.dtype}")
print(f"Min value in X: {X.min()}")
print(f"Max value in X: {X.max()}")

# Debugging: Check label indexing
print(f"Labels: {y[:5]}")
print(f"Corresponding folder names: {[folders[label] for label in y[:5]]}")

# Debugging: Check image shape
print(f"Shape of first image: {X[0].shape}")

fig = plt.figure(figsize=(16, 9))
for i in range(5):
    ax = fig.add_subplot(1, 5, i + 1, xticks=[], yticks=[])
    ax.imshow(X[i].astype(np.uint8))
    ax.set_title(folders[y[i]])
plt.show()

In [None]:
# rescale the training data
X = X.astype('float32')/255
# Confirming number of class
num_classes = len(np.unique(y))
print('Number of classes : ', num_classes)

In [None]:
y

In [None]:
from keras.utils import to_categorical
y = to_categorical(y)
y[0]

In [None]:
from sklearn.model_selection import train_test_split
# splite the data into subsets and explore their shapes
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.2)
print('The test Data Shape ', X_test.shape[0])
X_test, X_valid, y_test, y_valid = train_test_split(X_test,y_test, test_size = 0.5)
print('The training Data Shape ', X_valid.shape[0])

In [None]:
print('The train Data Shape ', X_train.shape[1:])