# Imports here 👇

In [1]:
import numpy as np
from typing import Tuple
from google.cloud import storage
from keras import Model, Sequential, layers, regularizers, optimizers
from keras.callbacks import EarlyStopping, ModelCheckpoint
import tensorflow as tf
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split

2024-03-07 11:37:41.635479: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-03-07 11:37:41.640318: I external/local_tsl/tsl/cuda/cudart_stub.cc:31] Could not find cuda drivers on your machine, GPU will not be used.
2024-03-07 11:37:41.678864: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-03-07 11:37:41.678904: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-03-07 11:37:41.681224: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to

In [2]:
from ai_detector.params import *

# Loading the Data

### Fake data first 👇

In [3]:
 # get the data from GCS
storage_client = storage.Client()

In [4]:
# create train data
X = []
y = []
blob_fake = storage_client.list_blobs(BUCKET_NAME_FAKE,
                                        prefix="IF-CC1M",
                                        max_results=5)

In [5]:
for blob in blob_fake:
    bytes_out = blob.download_as_bytes()
    array_tensor = tf.convert_to_tensor(bytes_out)
    img_array = tf.io.decode_image(array_tensor)
    X.append(img_array)
    y.append(1)

X, y

2024-03-07 11:37:50.541910: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:887] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2024-03-07 11:37:50.542119: W tensorflow/core/common_runtime/gpu/gpu_device.cc:2256] Cannot dlopen some GPU libraries. Please make sure the missing libraries mentioned above are installed properly if you would like to use GPU. Follow the guide at https://www.tensorflow.org/install/gpu for how to download and setup the required libraries for your platform.
Skipping registering GPU devices...


([<tf.Tensor: shape=(256, 256, 3), dtype=uint8, numpy=
  array([[[17, 17, 16],
          [21, 20, 22],
          [21, 23, 21],
          ...,
          [40, 37, 38],
          [34, 33, 33],
          [ 9,  6, 11]],
  
         [[14, 14, 14],
          [20, 18, 18],
          [22, 22, 22],
          ...,
          [37, 36, 38],
          [34, 32, 34],
          [ 8,  7, 10]],
  
         [[12, 11, 12],
          [24, 23, 24],
          [33, 32, 32],
          ...,
          [37, 37, 35],
          [33, 32, 32],
          [ 8,  7,  8]],
  
         ...,
  
         [[ 6,  3,  3],
          [34, 33, 33],
          [39, 36, 38],
          ...,
          [37, 35, 35],
          [36, 34, 37],
          [11,  9, 10]],
  
         [[ 0,  1,  3],
          [31, 31, 31],
          [39, 37, 38],
          ...,
          [36, 36, 37],
          [38, 35, 38],
          [11, 10, 10]],
  
         [[ 2,  3,  5],
          [35, 32, 34],
          [37, 37, 38],
          ...,
          [37, 37, 38],
  

In [6]:
len(X)

5

### Now for the real data 👇

In [7]:
blob_real = storage_client.list_blobs(BUCKET_NAME_REAL,
                                        prefix="extracted",
                                        max_results=5)
for blob in blob_real:
    bytes_out = blob.download_as_bytes()
    array_tensor = tf.convert_to_tensor(bytes_out)
    img_array = tf.io.decode_image(array_tensor)
    X.append(img_array)
    y.append(0)

X, y

([<tf.Tensor: shape=(256, 256, 3), dtype=uint8, numpy=
  array([[[17, 17, 16],
          [21, 20, 22],
          [21, 23, 21],
          ...,
          [40, 37, 38],
          [34, 33, 33],
          [ 9,  6, 11]],
  
         [[14, 14, 14],
          [20, 18, 18],
          [22, 22, 22],
          ...,
          [37, 36, 38],
          [34, 32, 34],
          [ 8,  7, 10]],
  
         [[12, 11, 12],
          [24, 23, 24],
          [33, 32, 32],
          ...,
          [37, 37, 35],
          [33, 32, 32],
          [ 8,  7,  8]],
  
         ...,
  
         [[ 6,  3,  3],
          [34, 33, 33],
          [39, 36, 38],
          ...,
          [37, 35, 35],
          [36, 34, 37],
          [11,  9, 10]],
  
         [[ 0,  1,  3],
          [31, 31, 31],
          [39, 37, 38],
          ...,
          [36, 36, 37],
          [38, 35, 38],
          [11, 10, 10]],
  
         [[ 2,  3,  5],
          [35, 32, 34],
          [37, 37, 38],
          ...,
          [37, 37, 38],
  

In [8]:
len(X)

10

In [9]:
len(y)

10

### We need to reshape the real data to fit with the fake one 😒

The **fake data** is comprized of images of `shape` (256,256) whereas the **real data** has images of `shape` (512, 512).

We need to `resize` the **real** images to (256,256) to avoid any comflicts between the data.

There is a function for this: `tf.image.resize` 🤩

#### Let's try this out with **one** image 👌

In [13]:
tf.image.resize(X[-1], [256,256])

<tf.Tensor: shape=(256, 256, 3), dtype=float32, numpy=
array([[[ 53.5,  46.5,  53.5],
        [ 50. ,  43. ,  50. ],
        [ 49.5,  43.5,  47.5],
        ...,
        [ 37. ,  40. ,  45. ],
        [ 38.5,  39.5,  44.5],
        [ 38. ,  37. ,  42. ]],

       [[ 49.5,  47.5,  52.5],
        [ 46. ,  44. ,  49. ],
        [ 45. ,  43. ,  46. ],
        ...,
        [ 37.5,  40.5,  45.5],
        [ 38.5,  39.5,  43.5],
        [ 39.5,  38.5,  43.5]],

       [[ 46.5,  45.5,  50.5],
        [ 46. ,  45. ,  50. ],
        [ 46. ,  46. ,  48. ],
        ...,
        [ 36. ,  41. ,  46. ],
        [ 36.5,  40. ,  44. ],
        [ 38. ,  37. ,  42. ]],

       ...,

       [[188. , 152. , 154. ],
        [197. , 158. , 161. ],
        [195.5, 154.5, 156. ],
        ...,
        [ 38. ,  35. ,  42. ],
        [ 36.5,  33.5,  40.5],
        [ 35. ,  32. ,  41. ]],

       [[243.5, 246.5, 253.5],
        [245. , 244.5, 252.5],
        [240. , 235.5, 245. ],
        ...,
        [ 38. ,  32.5,

### Let's add our magic code to our loading functions 🙌

In [26]:
X = []
y = []
blob_fake = storage_client.list_blobs(BUCKET_NAME_FAKE,
                                        prefix="IF-CC1M",
                                        max_results=5)
for blob in blob_fake:
    bytes_out = blob.download_as_bytes()
    array_tensor = tf.convert_to_tensor(bytes_out)
    img_array = tf.io.decode_image(array_tensor)
    img_array = tf.image.resize(img_array, [256,256])
    X.append(img_array)
    y.append(1)

X, y

([<tf.Tensor: shape=(256, 256, 3), dtype=float32, numpy=
  array([[[17., 17., 16.],
          [21., 20., 22.],
          [21., 23., 21.],
          ...,
          [40., 37., 38.],
          [34., 33., 33.],
          [ 9.,  6., 11.]],
  
         [[14., 14., 14.],
          [20., 18., 18.],
          [22., 22., 22.],
          ...,
          [37., 36., 38.],
          [34., 32., 34.],
          [ 8.,  7., 10.]],
  
         [[12., 11., 12.],
          [24., 23., 24.],
          [33., 32., 32.],
          ...,
          [37., 37., 35.],
          [33., 32., 32.],
          [ 8.,  7.,  8.]],
  
         ...,
  
         [[ 6.,  3.,  3.],
          [34., 33., 33.],
          [39., 36., 38.],
          ...,
          [37., 35., 35.],
          [36., 34., 37.],
          [11.,  9., 10.]],
  
         [[ 0.,  1.,  3.],
          [31., 31., 31.],
          [39., 37., 38.],
          ...,
          [36., 36., 37.],
          [38., 35., 38.],
          [11., 10., 10.]],
  
         [[ 2.,  3., 

In [15]:
blob_real = storage_client.list_blobs(BUCKET_NAME_REAL,
                                        prefix="extracted",
                                        max_results=5)
for blob in blob_real:
    bytes_out = blob.download_as_bytes()
    array_tensor = tf.convert_to_tensor(bytes_out)
    img_array = tf.io.decode_image(array_tensor)
    img_array = tf.image.resize(img_array, [256,256])
    X.append(img_array)
    y.append(0)

X, y

([<tf.Tensor: shape=(256, 256, 3), dtype=float32, numpy=
  array([[[17., 17., 16.],
          [21., 20., 22.],
          [21., 23., 21.],
          ...,
          [40., 37., 38.],
          [34., 33., 33.],
          [ 9.,  6., 11.]],
  
         [[14., 14., 14.],
          [20., 18., 18.],
          [22., 22., 22.],
          ...,
          [37., 36., 38.],
          [34., 32., 34.],
          [ 8.,  7., 10.]],
  
         [[12., 11., 12.],
          [24., 23., 24.],
          [33., 32., 32.],
          ...,
          [37., 37., 35.],
          [33., 32., 32.],
          [ 8.,  7.,  8.]],
  
         ...,
  
         [[ 6.,  3.,  3.],
          [34., 33., 33.],
          [39., 36., 38.],
          ...,
          [37., 35., 35.],
          [36., 34., 37.],
          [11.,  9., 10.]],
  
         [[ 0.,  1.,  3.],
          [31., 31., 31.],
          [39., 37., 38.],
          ...,
          [36., 36., 37.],
          [38., 35., 38.],
          [11., 10., 10.]],
  
         [[ 2.,  3., 

In [16]:
X[-1]

<tf.Tensor: shape=(256, 256, 3), dtype=float32, numpy=
array([[[ 53.5,  46.5,  53.5],
        [ 50. ,  43. ,  50. ],
        [ 49.5,  43.5,  47.5],
        ...,
        [ 37. ,  40. ,  45. ],
        [ 38.5,  39.5,  44.5],
        [ 38. ,  37. ,  42. ]],

       [[ 49.5,  47.5,  52.5],
        [ 46. ,  44. ,  49. ],
        [ 45. ,  43. ,  46. ],
        ...,
        [ 37.5,  40.5,  45.5],
        [ 38.5,  39.5,  43.5],
        [ 39.5,  38.5,  43.5]],

       [[ 46.5,  45.5,  50.5],
        [ 46. ,  45. ,  50. ],
        [ 46. ,  46. ,  48. ],
        ...,
        [ 36. ,  41. ,  46. ],
        [ 36.5,  40. ,  44. ],
        [ 38. ,  37. ,  42. ]],

       ...,

       [[188. , 152. , 154. ],
        [197. , 158. , 161. ],
        [195.5, 154.5, 156. ],
        ...,
        [ 38. ,  35. ,  42. ],
        [ 36.5,  33.5,  40.5],
        [ 35. ,  32. ,  41. ]],

       [[243.5, 246.5, 253.5],
        [245. , 244.5, 252.5],
        [240. , 235.5, 245. ],
        ...,
        [ 38. ,  32.5,

Now that the `shapes` are the same, we can plug all this in an array to fit in our model

In [28]:
X = np.array(X)
y = np.array(y)

In [30]:
X.shape

(5, 256, 256, 3)

### Let's put all this in a single `function` ✌️

In [38]:
def load_data(num_images = 10):
    """
        Function to load the data from the Cloud Storage.
        You can specify how many images you want using the num_images parameters.
        If num_images isn't an even numbe, you'll receive num_images + 1.
        You'll receive a even amount of Real and Fake images
    """
    # Check if the param is even
    if num_images % 2 != 0:
        num_images += 1

    # get the data from GCS
    storage_client = storage.Client()

    # Create base arrays to store our images
    X = []
    y = []

    # Create the real and fake blobs comming from Cloud Storage
    blob_fake = storage_client.list_blobs(BUCKET_NAME_FAKE,
                                        prefix="IF-CC1M",
                                        max_results=num_images/2)

    blob_real = storage_client.list_blobs(BUCKET_NAME_REAL,
                                        prefix="extracted",
                                        max_results=num_images/2)

    # First loop to capture the fake images
    for blob in blob_fake:
        bytes_out = blob.download_as_bytes()
        array_tensor = tf.convert_to_tensor(bytes_out)
        img_array = tf.io.decode_image(array_tensor)
        img_array = tf.image.resize(img_array, [256,256])
        X.append(img_array)
        y.append(1)

    # Second loop to capture the real images
    for blob in blob_real:
        bytes_out = blob.download_as_bytes()
        array_tensor = tf.convert_to_tensor(bytes_out)
        img_array = tf.io.decode_image(array_tensor)
        img_array = tf.image.resize(img_array, [256,256])
        X.append(img_array)
        y.append(0)

    # Turn X and y into numpy arrays to use them in our model
    X = np.array(X)
    y = np.array(y)

    return X, y

In [51]:
X, y = load_data(50)
X.shape

ValueError: setting an array element with a sequence. The requested array has an inhomogeneous shape after 3 dimensions. The detected shape was (50, 256, 256) + inhomogeneous part.