# P8 Déployer un modèle dans le cloud - Upload dataset to AWS S3 bucket using boto3

In [1]:
import random

import boto3
import requests
from tqdm import tqdm

import glob
from PIL import Image
import io

## Load dataset

In [6]:
dataset_folder = "/home/ubuntu/fruits-360-dataset/LightTraining"

In [3]:
def iterate_fruits_360_dataset(n_images: int = 1000):

    # List all training images in dataset
    images_paths = [file for file in glob.iglob(dataset_folder + '/**', recursive=True) if ('Training' in file and '.jpg' in file)]

    # Yield training images
    images_paths = random.sample(images_paths, n_images) if n_images else images_paths
    for image_path in images_paths:
        image_label = image_path.split('/')[-2].replace(' ', '')
        image_name = image_path.split('/')[-1]
        pil_image = Image.open(image_path)

        yield pil_image , image_label, image_name

## Check access and current data in S3 bucket

In [4]:
S3_BUCKET_NAME = 'cloud-fruits-p8-bucket'

s3_input_images_to_process_bucket_folder = 'input_images_to_process'

In [9]:
print("... list images from Fruits 360 dataset in S3 input_images_to_process bucket")

# Connect to S3 storage
s3_bucket = boto3.resource('s3').Bucket(S3_BUCKET_NAME)

# list images from Fruits 360 dataset
image_keys = [image.key for image in s3_bucket.objects.filter(Prefix=s3_input_images_to_process_bucket_folder) if '.jpg' in image.key]
print(len(image_keys), image_keys)

... list images from Fruits 360 dataset in S3 input_images_to_process bucket
50 ['input_images_to_process/Apricot_145_100.jpg', 'input_images_to_process/Apricot_146_100.jpg', 'input_images_to_process/Apricot_148_100.jpg', 'input_images_to_process/Apricot_179_100.jpg', 'input_images_to_process/Apricot_181_100.jpg', 'input_images_to_process/Apricot_182_100.jpg', 'input_images_to_process/Apricot_r_108_100.jpg', 'input_images_to_process/Apricot_r_110_100.jpg', 'input_images_to_process/Apricot_r_111_100.jpg', 'input_images_to_process/Apricot_r_286_100.jpg', 'input_images_to_process/Apricot_r_314_100.jpg', 'input_images_to_process/Apricot_r_317_100.jpg', 'input_images_to_process/Apricot_r_320_100.jpg', 'input_images_to_process/Banana_10_100.jpg', 'input_images_to_process/Banana_11_100.jpg', 'input_images_to_process/Banana_152_100.jpg', 'input_images_to_process/Banana_153_100.jpg', 'input_images_to_process/Banana_216_100.jpg', 'input_images_to_process/Banana_217_100.jpg', 'input_images_to_pro

## Upload to S3 bucket

In [8]:
# Connect to S3 storage
s3_bucket = boto3.resource('s3').Bucket(S3_BUCKET_NAME)

# Empty S3 storage
#s3_bucket.objects.all().delete()

# Iterate over Fruits 360 dataset and upload training images to S3 storage
for pil_image, img_label, img_name in tqdm(iterate_fruits_360_dataset(n_images=50)):
    print(pil_image, img_label, img_name)
    
    # Save the image to an in-memory file
    in_mem_file = io.BytesIO()
    pil_image.save(in_mem_file, format=pil_image.format)
    in_mem_file.seek(0)
    
    # Upload image to s3
    s3_bucket.upload_fileobj(in_mem_file, Key=f'{s3_input_images_to_process_bucket_folder}/{img_label}_{img_name}')    

1it [00:00,  9.43it/s]

<PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=100x100 at 0x7F3C5B593F70> Raspberry r_326_100.jpg
<PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=100x100 at 0x7F3C5B593B20> Banana 216_100.jpg
<PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=100x100 at 0x7F3C5AC95310> Apricot r_317_100.jpg
<PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=100x100 at 0x7F3C5B593F70> Raspberry r_94_100.jpg


7it [00:00, 22.11it/s]

<PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=100x100 at 0x7F3C5AC95820> Banana r_308_100.jpg
<PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=100x100 at 0x7F3C5AC1E4C0> Apricot r_320_100.jpg
<PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=100x100 at 0x7F3C5AC1E730> Raspberry r_327_100.jpg
<PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=100x100 at 0x7F3C5AC1E280> Raspberry r_113_100.jpg
<PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=100x100 at 0x7F3C5AC3D280> Kiwi r_317_100.jpg
<PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=100x100 at 0x7F3C5AC1E670> Raspberry r_71_100.jpg


13it [00:00, 24.07it/s]

<PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=100x100 at 0x7F3C5AC3DB20> Raspberry r_91_100.jpg
<PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=100x100 at 0x7F3C5AC1E430> Banana 279_100.jpg
<PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=100x100 at 0x7F3C5AC2ECA0> Kiwi r_318_100.jpg
<PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=100x100 at 0x7F3C5AC1E190> Banana 217_100.jpg
<PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=100x100 at 0x7F3C5AC525E0> Apricot 179_100.jpg
<PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=100x100 at 0x7F3C5AC524C0> Banana 282_100.jpg


19it [00:00, 26.16it/s]

<PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=100x100 at 0x7F3C5AC52850> Kiwi 222_100.jpg
<PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=100x100 at 0x7F3C5AC52700> Apricot 148_100.jpg
<PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=100x100 at 0x7F3C5AC52D60> Banana r_309_100.jpg
<PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=100x100 at 0x7F3C5AC2E820> Banana 10_100.jpg
<PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=100x100 at 0x7F3C5AC1E040> Apricot 145_100.jpg
<PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=100x100 at 0x7F3C5AC52A60> Apricot r_286_100.jpg
<PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=100x100 at 0x7F3C5AC1E280> Raspberry r_324_100.jpg


27it [00:01, 29.09it/s]

<PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=100x100 at 0x7F3C5AC523D0> Apricot r_314_100.jpg
<PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=100x100 at 0x7F3C5AC550D0> Apricot r_108_100.jpg
<PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=100x100 at 0x7F3C5AC559D0> Banana 153_100.jpg
<PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=100x100 at 0x7F3C5AC55AC0> Kiwi r_209_100.jpg
<PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=100x100 at 0x7F3C5AC1E640> Apricot 146_100.jpg
<PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=100x100 at 0x7F3C5AC957C0> Banana 283_100.jpg
<PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=100x100 at 0x7F3C5AC1E550> Raspberry r_72_100.jpg


34it [00:01, 26.80it/s]

<PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=100x100 at 0x7F3C5AC95280> Kiwi r_213_100.jpg
<PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=100x100 at 0x7F3C5AC1E8E0> Banana r_256_100.jpg
<PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=100x100 at 0x7F3C5AC3D100> Raspberry r_98_100.jpg
<PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=100x100 at 0x7F3C5AC2E580> Banana 152_100.jpg
<PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=100x100 at 0x7F3C5AC2EA90> Raspberry r_69_100.jpg


40it [00:01, 27.31it/s]

<PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=100x100 at 0x7F3C5AC1E7C0> Banana 281_100.jpg
<PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=100x100 at 0x7F3C5AC2ED90> Kiwi r_315_100.jpg
<PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=100x100 at 0x7F3C5AC48580> Raspberry r_125_100.jpg
<PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=100x100 at 0x7F3C5AC1E6D0> Apricot 182_100.jpg
<PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=100x100 at 0x7F3C5AC95550> Kiwi r_319_100.jpg
<PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=100x100 at 0x7F3C5AC1E1C0> Banana 38_100.jpg


43it [00:01, 25.70it/s]

<PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=100x100 at 0x7F3C5AC4BB80> Apricot r_110_100.jpg
<PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=100x100 at 0x7F3C5AC1E130> Kiwi r_211_100.jpg
<PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=100x100 at 0x7F3C5AC52280> Raspberry r_70_100.jpg
<PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=100x100 at 0x7F3C5AC52FD0> Raspberry r_88_100.jpg
<PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=100x100 at 0x7F3C5AC52B50> Raspberry r_322_100.jpg
<PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=100x100 at 0x7F3C5AC3DEE0> Kiwi r_320_100.jpg


50it [00:01, 26.16it/s]

<PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=100x100 at 0x7F3C5AC521C0> Apricot r_111_100.jpg
<PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=100x100 at 0x7F3C5AC3DDC0> Apricot 181_100.jpg
<PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=100x100 at 0x7F3C5AC959D0> Banana 11_100.jpg





## Empty AWS s3 bucket input folder

In [13]:
# Connect to S3 storage
#s3_bucket = boto3.resource('s3').Bucket(S3_BUCKET_NAME)

# Empty S3 storage
#s3_bucket_images = [image for image in s3_bucket.objects.filter(Prefix=s3_input_images_to_process_bucket_folder) if '.jpg' in image.key]
#for s3_bucket_image in s3_bucket_images:
#    s3_bucket_image.delete()    