# P8 Déployer un modèle dans le cloud - Upload dataset to AWS S3 bucket using boto3

In [7]:
import random

import boto3
import requests
from tqdm import tqdm

import glob
from PIL import Image
import io

## Load dataset

In [8]:
dataset_folder = "/opt/spark/fruits-360-dataset/LightTraining"

In [9]:
def iterate_fruits_360_dataset(n_images: int = 1000):

    # List all training images in dataset
    images_paths = [file for file in glob.iglob(dataset_folder + '/**', recursive=True) if ('Training' in file and '.jpg' in file)]

    # Yield training images
    images_paths = random.sample(images_paths, n_images) if n_images else images_paths
    for image_path in images_paths:
        image_label = image_path.split('/')[-2].replace(' ', '')
        image_name = image_path.split('/')[-1]
        pil_image = Image.open(image_path)

        yield pil_image , image_label, image_name

## Check access and current data in S3 bucket

In [10]:
S3_BUCKET_NAME = 'cloud-fruits-p8-bucket'

s3_input_images_to_process_bucket_folder = 'input_images_to_process'

In [11]:
print("... list images from Fruits 360 dataset in S3 input_images_to_process bucket")

# Connect to S3 storage
s3_bucket = boto3.resource('s3').Bucket(S3_BUCKET_NAME)

# list images from Fruits 360 dataset
image_keys = [image.key for image in s3_bucket.objects.filter(Prefix=s3_input_images_to_process_bucket_folder) if '.jpg' in image.key]
print(len(image_keys), image_keys)

... list images from Fruits 360 dataset in S3 input_images_to_process bucket
50 ['input_images_to_process/Apricot_146_100.jpg', 'input_images_to_process/Apricot_148_100.jpg', 'input_images_to_process/Apricot_183_100.jpg', 'input_images_to_process/Apricot_r_112_100.jpg', 'input_images_to_process/Apricot_r_284_100.jpg', 'input_images_to_process/Apricot_r_285_100.jpg', 'input_images_to_process/Apricot_r_286_100.jpg', 'input_images_to_process/Apricot_r_315_100.jpg', 'input_images_to_process/Apricot_r_319_100.jpg', 'input_images_to_process/Banana_152_100.jpg', 'input_images_to_process/Banana_153_100.jpg', 'input_images_to_process/Banana_215_100.jpg', 'input_images_to_process/Banana_281_100.jpg', 'input_images_to_process/Banana_282_100.jpg', 'input_images_to_process/Banana_35_100.jpg', 'input_images_to_process/Banana_37_100.jpg', 'input_images_to_process/Banana_r_256_100.jpg', 'input_images_to_process/Banana_r_308_100.jpg', 'input_images_to_process/Banana_r_309_100.jpg', 'input_images_to_pro

## Upload to S3 bucket

In [16]:
# Connect to S3 storage
s3_bucket = boto3.resource('s3').Bucket(S3_BUCKET_NAME)

# Empty S3 storage
#s3_bucket.objects.all().delete()

# Iterate over Fruits 360 dataset and upload training images to S3 storage
for pil_image, img_label, img_name in tqdm(iterate_fruits_360_dataset(n_images=50)):
    print(pil_image, img_label, img_name)
    
    # Save the image to an in-memory file
    in_mem_file = io.BytesIO()
    pil_image.save(in_mem_file, format=pil_image.format)
    in_mem_file.seek(0)
    
    # Upload image to s3
    s3_bucket.upload_fileobj(in_mem_file, Key=f'{s3_input_images_to_process_bucket_folder}/{img_label}_{img_name}')    

0it [00:00, ?it/s]

<PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=100x100 at 0x7F872C770160> Raspberry r_126_100.jpg


3it [00:00,  5.61it/s]

<PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=100x100 at 0x7F8755484040> Apricot r_108_100.jpg
<PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=100x100 at 0x7F872C5D3F70> Banana 152_100.jpg
<PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=100x100 at 0x7F872C5D2F20> Kiwi r_182_100.jpg
<PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=100x100 at 0x7F872C5D3460> Kiwi r_180_100.jpg


7it [00:00, 10.44it/s]

<PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=100x100 at 0x7F872C5D2FE0> Kiwi r_210_100.jpg
<PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=100x100 at 0x7F872C5D3F70> Raspberry r_94_100.jpg
<PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=100x100 at 0x7F872C5D2E60> Apricot 179_100.jpg
<PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=100x100 at 0x7F872C5D33A0> Banana 11_100.jpg


11it [00:01, 13.19it/s]

<PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=100x100 at 0x7F872C847FD0> Banana 279_100.jpg
<PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=100x100 at 0x7F872C594820> Apricot r_110_100.jpg
<PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=100x100 at 0x7F872C847FD0> Banana r_308_100.jpg
<PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=100x100 at 0x7F8755484040> Raspberry r_123_100.jpg


15it [00:01, 14.51it/s]

<PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=100x100 at 0x7F872C5948E0> Apricot r_319_100.jpg
<PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=100x100 at 0x7F872C5D3F70> Banana r_256_100.jpg
<PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=100x100 at 0x7F872C8465C0> Raspberry r_323_100.jpg
<PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=100x100 at 0x7F872C5D3310> Banana 153_100.jpg


19it [00:01, 14.97it/s]

<PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=100x100 at 0x7F872C6B3160> Raspberry r_127_100.jpg
<PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=100x100 at 0x7F872C5D3220> Raspberry r_322_100.jpg
<PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=100x100 at 0x7F872D53ED40> Kiwi 218_100.jpg
<PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=100x100 at 0x7F872C5D3400> Banana 282_100.jpg


23it [00:01, 15.52it/s]

<PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=100x100 at 0x7F872C6B3940> Banana 35_100.jpg
<PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=100x100 at 0x7F872C5951B0> Banana 154_100.jpg
<PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=100x100 at 0x7F872C6B30A0> Apricot r_284_100.jpg
<PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=100x100 at 0x7F872C5D2EC0> Raspberry r_91_100.jpg


27it [00:02, 15.78it/s]

<PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=100x100 at 0x7F872C6B3790> Kiwi r_212_100.jpg
<PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=100x100 at 0x7F872D53ED40> Raspberry r_98_100.jpg
<PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=100x100 at 0x7F872C6D8910> Apricot r_318_100.jpg
<PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=100x100 at 0x7F872C6D9C60> Kiwi 222_100.jpg


31it [00:02, 15.55it/s]

<PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=100x100 at 0x7F872D53ED40> Raspberry r_124_100.jpg
<PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=100x100 at 0x7F872C6D9300> Kiwi r_315_100.jpg
<PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=100x100 at 0x7F872C6D8BE0> Apricot r_109_100.jpg
<PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=100x100 at 0x7F872C8465C0> Banana 10_100.jpg


35it [00:02, 15.71it/s]

<PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=100x100 at 0x7F872C6D9B10> Apricot r_111_100.jpg
<PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=100x100 at 0x7F872C6D89D0> Kiwi r_316_100.jpg
<PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=100x100 at 0x7F872C6B3A00> Banana r_323_100.jpg
<PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=100x100 at 0x7F872C5D2E30> Kiwi 220_100.jpg


39it [00:02, 16.00it/s]

<PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=100x100 at 0x7F872D53F5E0> Kiwi 221_100.jpg
<PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=100x100 at 0x7F872C5D3FD0> Banana r_311_100.jpg
<PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=100x100 at 0x7F872C6D9300> Apricot r_316_100.jpg
<PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=100x100 at 0x7F872C5D3250> Kiwi r_317_100.jpg


43it [00:03, 15.98it/s]

<PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=100x100 at 0x7F872C6D87F0> Raspberry r_97_100.jpg
<PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=100x100 at 0x7F872C5D3250> Apricot r_112_100.jpg
<PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=100x100 at 0x7F872C594C70> Kiwi r_318_100.jpg
<PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=100x100 at 0x7F872D53ED40> Raspberry r_88_100.jpg


47it [00:03, 16.05it/s]

<PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=100x100 at 0x7F872C5AEB30> Kiwi r_213_100.jpg
<PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=100x100 at 0x7F872C8465C0> Apricot r_285_100.jpg
<PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=100x100 at 0x7F872C6D8AF0> Apricot r_283_100.jpg
<PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=100x100 at 0x7F872C8465C0> Banana 280_100.jpg


50it [00:03, 13.87it/s]

<PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=100x100 at 0x7F872C6D97E0> Banana 37_100.jpg





## Empty AWS s3 bucket input folder

In [13]:
# Connect to S3 storage
#s3_bucket = boto3.resource('s3').Bucket(S3_BUCKET_NAME)

# Empty S3 storage
#s3_bucket_images = [image for image in s3_bucket.objects.filter(Prefix=s3_input_images_to_process_bucket_folder) if '.jpg' in image.key]
#for s3_bucket_image in s3_bucket_images:
#    s3_bucket_image.delete()    