# Image Classifier
## 1. Building a Data Pipeline

### data setup in colab


In [7]:
import zipfile
import os

# Step 1: Create the data directory and subdirectories (if they don't exist)
data_folder = '/content/data'  # Main folder
happy_folder = os.path.join(data_folder, 'happy')  # Subfolder for happy data
sad_folder = os.path.join(data_folder, 'sad')  # Subfolder for sad data

# Create the subdirectories
os.makedirs(happy_folder, exist_ok=True)
os.makedirs(sad_folder, exist_ok=True)

# Step 2: Specify the paths to your ZIP files
zip_paths = ['/content/happy.zip', '/content/sad.zip']  # List of ZIP file paths
folders = [happy_folder, sad_folder]  # Corresponding folders for each ZIP file

# Step 3: Extract each ZIP file into the appropriate folder
for zip_path, folder in zip(zip_paths, folders):
    try:
        with zipfile.ZipFile(zip_path, 'r') as zip_ref:
            zip_ref.extractall(folder)
        print(f"Files from {zip_path} extracted to:", folder)
    except FileNotFoundError:
        print(f"File {zip_path} not found!")


Files from /content/happy.zip extracted to: /content/data/happy
Files from /content/sad.zip extracted to: /content/data/sad


### 1.1. Install Packages

In [9]:
%pip install tensorflow
%pip install tensorflow-gpu==2.17.0
%pip install opencv-python
%pip install matplotlib

[31mERROR: Could not find a version that satisfies the requirement tensorflow-gpu==2.17.0 (from versions: 2.8.0rc0, 2.8.0rc1, 2.8.0, 2.8.1, 2.8.2, 2.8.3, 2.8.4, 2.9.0rc0, 2.9.0rc1, 2.9.0rc2, 2.9.0, 2.9.1, 2.9.2, 2.9.3, 2.10.0rc0, 2.10.0rc1, 2.10.0rc2, 2.10.0rc3, 2.10.0, 2.10.1, 2.11.0rc0, 2.11.0rc1, 2.11.0rc2, 2.11.0, 2.12.0)[0m[31m
[0m[31mERROR: No matching distribution found for tensorflow-gpu==2.17.0[0m[31m


### 1.2. Data Setup

In [10]:
import tensorflow as tf
import os

In [11]:
# Limiting GPU memory consumption - OOM error
gpus = tf.config.experimental.list_physical_devices('GPU')
for gpu in gpus:
    tf.config.experimental.set_memory_growth(gpu, True)

### 1.3. Remove Dodgy Data

In [13]:
import cv2
import imghdr

In [14]:
data_dir = 'data'
image_exts = ['jpeg','jpg','png','bmp']

In [15]:
for image_class in os.listdir(data_dir):
    for image in os.listdir(os.path.join(data_dir, image_class)):
      print(image)

image39.jpeg
image7.jpeg
sad-group-people-problems-17033671.jpg
close-up-sad-man_23-2150896155.jpg
889508df280eabc26c9e77695a0de8b0.jpg
a-lonely-and-sad-person-sitting-on-a-bench-created-with-generative-ai-technology-photo.jpg
kisspng-shoulder-product-5d1a1ca1992ff2.3181155215619923536275.jpg
image26.jpeg
sad-370x207-1.jpg
image18.jpeg
young-sad-man-crying-frustrated-person-sitting-vector-42585872.jpg
nov-2022-cover-illustration-sad-person-looking-out-window.jpg
dering-frustrated-man-expressing-no-idea-white-people-man-character-60484638.jpg
image4.jpeg
image15.jpeg
image6.jpeg
DealingwithDepressionwithoutMedication-1.jpg
3602821b39e42ccebd3f382d9beb428f.jpg
5198494-depressed-girl-pain-people-stress-worried-problem-photocase-stock-photo-large.jpeg
image34.jpeg
a-sad-mature-man-alone-in-darkness-sitting-and-thinking-HPIF02784.jpg
image42.jpeg
6145b905-b820-47c6-b757-7444e811f0f7.jpg
guy-2617866_1280.jpg
360_F_214879726_9bSXn2uHEDM1dwurJeo9oijgaExkzY0z.jpg
225365065.jpg
sad-woman-silhoue

In [18]:
img = cv2.imread(os.path.join('data','happy','happy1.jpg'))

In [21]:
print(img)

AttributeError: 'NoneType' object has no attribute 'shape'

In [None]:
# Dodgy script
for image_class in os.listdir(data_dir):
    for image in os.listdir(os.path.join(data_dir, image_class)):
        image_path = os.path.join(data_dir, image_class, image)
        try:
            # Load data to open cv and Match path
            img = cv2.imread(image_path)
            tip = imghdr.what(image_path)
            # if the image does not exist in path
            if tip not in image_exts:
                print('Image not in ext list {}'.format(image_path))
                os.remove(image_path)
        except Exception as e:
            print('Issue with image {}'.format(image_path))