### In this script we prepare data for modeling
- BY: Abdelraouf Hawash 
- DATE: 30 / 3 / 2022

### *import libraries*

In [1]:
import preprocessor
import numpy as np
import cv2

### *data generating*

#### define data source and destination

In [2]:
source = ["./root_data/empty", "./root_data/horizontal", "./root_data/QR", "./root_data/slide", "./root_data/vertical"]

dest = ["./raw_data/empty", "./raw_data/horizontal", "./raw_data/QR", "./raw_data/slide","./raw_data/vertical"]

#### if we need to change the names to avoid arabic characters (indexing)

In [4]:
for s in source:
    preprocessor.indexing(s)

#### if we need to resize images and index them

In [6]:
for (s,d) in zip(source,dest):
    preprocessor.data_resize_and_indexing(source = s, dest = d, FX = 0.08, FY = 0.08)

./root_data/empty/empty.jpg  -  (960, 1280, 3) >>> ./raw_data/empty/0.jpg  -  (77, 102, 3) 

./root_data/horizontal/horizontal.jpg  -  (1280, 960, 3) >>> ./raw_data/horizontal/0.jpg  -  (102, 77, 3) 

./root_data/QR/QR_code.jpg  -  (512, 512, 3) >>> ./raw_data/QR/0.jpg  -  (41, 41, 3) 

./root_data/slide/slide.jpg  -  (1280, 960, 3) >>> ./raw_data/slide/0.jpg  -  (102, 77, 3) 

./root_data/vertical/veritical.jpg  -  (1280, 960, 3) >>> ./raw_data/vertical/0.jpg  -  (102, 77, 3) 



#### augmentation

In [8]:
source = ["./raw_data/empty","./raw_data/horizontal","./raw_data/QR",
          "./raw_data/slide","./raw_data/vertical"]

dest = ["./augmented_data/empty","./augmented_data/horizontal","./augmented_data/QR",
          "./augmented_data/slide","./augmented_data/vertical"]

In [9]:
# in our application , flipping is a suitable augmentation 

for (s,d) in zip(source,dest):
    preprocessor.flipping(s ,d , horizontal = True, vertical = True , Indexing = True)


./raw_data/empty/0.jpg
./raw_data/horizontal/0.jpg
./raw_data/QR/0.jpg
./raw_data/slide/0.jpg
./raw_data/vertical/0.jpg


#### labeling data

generate data with specific label

In [2]:
source = ["./augmented_data/empty","./augmented_data/horizontal","./augmented_data/QR"]
dest = ["./output/empty","./output/horizontal","./output/QR"]
labels = ["empty","horizontal","QR"]

In [3]:
for (s,d,l) in zip(source,dest,labels):
    preprocessor.data_generator_labeled(s, d , l)

./augmented_data/empty/0.jpg
./augmented_data/empty/1.jpg
./augmented_data/empty/2.jpg
./augmented_data/horizontal/0.jpg
./augmented_data/horizontal/2.jpg
./augmented_data/horizontal/1.jpg
./augmented_data/QR/1.jpg
./augmented_data/QR/2.jpg
./augmented_data/QR/0.jpg


generate data with manually labeling

###### classify slides

In [2]:
source = "./augmented_data/slide"
dest = "./output/slide"
preprocessor.data_generator(source, dest)

qt.qpa.plugin: Could not find the Qt platform plugin "wayland" in "/home/raouf/.local/lib/python3.10/site-packages/cv2/qt/plugins"


center
right1
center


True

###### classify vertical

In [3]:
source = "./augmented_data/vertical"
dest = "./output/vertical"
preprocessor.data_generator(source, dest)

lef1
right1
right1


True

#### make all data in one file

In [4]:
source = ["./output/empty","./output/horizontal","./output/QR" , "./output/slide","./output/vertical"]
dest = './output/all_data'

gather data

In [5]:
source_data0 = []
X_data0 = []
y_data0 = []
    
for i in source:
    S = np.load(f'{i}/source_data.npy')
    X = np.load(f'{i}/X_data.npy')
    Y = np.load(f'{i}/y_data.npy')
    for (s,x,y) in zip(S,X,Y):
        source_data0.append(s)
        X_data0.append(x)
        y_data0.append(y)
    print(i)

./output/empty
./output/horizontal
./output/QR
./output/slide
./output/vertical


shuffle data

In [6]:
source_data = []
X_data = []
y_data = []
randomize = np.arange(len(source_data0))
np.random.shuffle(randomize)
for i in randomize:
    source_data.append(source_data0[i])
    X_data.append(X_data0[i])
    y_data.append(y_data0[i])

#### save data in files to be loaded

In [7]:
if not os.path.exists(dest):
    os.mkdir(os.path.join(dest))
np.save(f'{dest}/source_data.npy', source_data)
np.save(f'{dest}/X_data.npy', X_data)
np.save(f'{dest}/y_data.npy', y_data)

### show our generated data

loading data from files

In [2]:
source_data = np.load('./output/all_data/source_data.npy')
X_data = np.load('./output/all_data/X_data.npy')
Y_data = np.load('./output/all_data/y_data.npy')

print(X_data)
print(X_data.shape)
print(Y_data)

[[15 15 15 ... 16 16 16]
 [ 1  9  9 ...  8  7  7]
 [ 7  7  7 ...  8  8  7]
 ...
 [15 15 15 ... 15 15 16]
 [ 9  9  9 ...  7  6  6]
 [ 7  7  7 ...  9  8  8]]
(15, 400)
['QR' 'right1' 'right1' 'center' 'center' 'empty' 'horizontal' 'empty'
 'right1' 'QR' 'horizontal' 'empty' 'QR' 'horizontal' 'lef1']


In [3]:
preprocessor.show_processed_img(X_data[0])

qt.qpa.plugin: Could not find the Qt platform plugin "wayland" in "/home/raouf/.local/lib/python3.10/site-packages/cv2/qt/plugins"


113

In [3]:
classes = ['QR','empty','horizontal','lef3','left2','lef1','center','right1','right2','right3']

In [4]:
y_data = [preprocessor.conv_str2list(i,classes=classes) for i in Y_data]
y0 = y_data[0]
print(y0)

[1, 0, 0, 0, 0, 0, 0, 0, 0, 0]


In [5]:
y0_label = preprocessor.conv_list2str(y0,classes=classes)
print(y0_label)

QR
