In [1]:
import numpy as np
import pandas as pd
import cv2
import os

Connect to Drive if needed

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


Get **source_path** and **save_path**

In [3]:
source_path='/content/drive/MyDrive/spectrograms'
save_path="/content/drive/MyDrive"

Process each image on the **source_path** and append them to **imageid_list**

In [4]:
imageid_list = list()
for root, dirs, files in os.walk(source_path):
  for file in files:
    classid = file.split("-")[1]
    img = cv2.imread(os.path.join(root,file))
    gray_img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    gray_img = cv2.resize(gray_img, dsize=(64,64), interpolation=cv2.INTER_AREA)
    gray_img = gray_img.astype('float32') / 255.0
    gray_img = cv2.normalize(gray_img, None, alpha=0,beta=200, norm_type=cv2.NORM_MINMAX)
    imageid_list.append([gray_img,classid])

print(f"Total images: {len(imageid_list)}")

Total images: 8732


Turn **imageid_list** into **DataFrame**

In [5]:
df = pd.DataFrame(imageid_list, columns=['img','label'])

Save **DataFrame** to **save_path** using **pickle**

In [6]:
import pickle

with open(f'{save_path}/dataset.pickle', 'wb') as output:
    pickle.dump(df, output)

Split **DataFrame** into **X_train, y_train, X_val, y_val, X_test** and 
**y_test** datasets

In [7]:
from sklearn.model_selection import train_test_split

In [8]:
X=df['img']
y=df['label'].astype(np.float32)
X = np.array([np.array(val) for val in X])
X = X.reshape(list(X.shape) + [1])

X_train, X_test, y_train, y_test = train_test_split(X,y ,
                                   random_state=42, 
                                   test_size=0.2, 
                                   shuffle=True)

X_train, X_val, y_train, y_val = train_test_split(X_train, y_train,
                                  random_state=42,
                                  test_size=0.25,
                                  shuffle=True)

In [9]:
print(len(X_train))
print(len(X_val))
print(len(X_test))

5238
1747
1747


Save **X_train, y_train, X_val, y_val, X_test** and **y_test** to **save_path** using **numpy**

In [10]:
np.save(f"{save_path}/X_train", X_train)
np.save(f"{save_path}/X_test", X_test)
np.save(f"{save_path}/X_val", X_val)
np.save(f"{save_path}/y_train", y_train)
np.save(f"{save_path}/y_test", y_test)
np.save(f"{save_path}/y_val", y_val)