In [3]:
from google.cloud import storage
from tqdm import tqdm
import os
from concurrent.futures import ThreadPoolExecutor, as_completed

key_path = 'google-cloud-key.json'

client = storage.Client.from_service_account_json(key_path)

bucket_name = 'fashionism_dataset'
bucket = client.get_bucket(bucket_name)

folder_path = 'Dataset'

def upload_file(file_path):
    relative_path = os.path.relpath(file_path, folder_path)
    blob = bucket.blob(relative_path)
    blob.upload_from_filename(file_path)
    blob.make_public()

    return f'File {relative_path} uploaded to GCS and set to public.'

file_paths = []
for root, _, files in os.walk(folder_path):
    for file in files:
        file_path = os.path.join(root, file)
        file_paths.append(file_path)

# Use a ThreadPoolExecutor to perform parallel uploads
with ThreadPoolExecutor() as executor:
    upload_tasks = [executor.submit(upload_file, file_path) for file_path in file_paths]
    for future in as_completed(upload_tasks):
        result = future.result()
        print(result)

print('All files uploaded to GCS and set to public.')