# What

As in #231 the engine needs to run automatically. This means that it needs to store some of the engine intermediate output in the cloud.

This means that it can be ran each time without having to run the engine from the beginning.

## Modules

In [None]:
from azure.storage.blob import BlobServiceClient
import pytz
from tqdm import tqdm

from dotenv import load_dotenv
import os
from datetime import datetime

load_dotenv()

# Connecting to storage container

In [None]:
blob_service_client = BlobServiceClient.from_connection_string(
    f"DefaultEndpointsProtocol=https;AccountName={os.getenv('AZURE_STORAGE_ACCOUNT_NAME')};AccountKey={os.getenv('AZURE_STORAGE_ACCOUNT_KEY')}==;EndpointSuffix=core.windows.net"
)

containers = list(blob_service_client.list_containers())


In [None]:
engine_output_container = blob_service_client.get_container_client("engineoutput")
engine_output_container.create_container()

# Uploading the output folder to storage 

In [None]:
# Function to upload a file
def upload_file(file_path, container, uploaded_file_name):
    blob_client = container.get_blob_client(uploaded_file_name)
    print(f"Uploading {file_path} to {uploaded_file_name}")
    with open(file_path, "rb") as data:
        blob_client.upload_blob(data)

# Function to upload a folder
def upload_folder(folder_path, container, uploaded_folder_name):
    for root, _, files in tqdm(os.walk(folder_path)):
        for file_name in files:
            file_path = os.path.join(root, file_name)
            uploaded_file_name = os.path.join(uploaded_folder_name, os.path.relpath(file_path, folder_path))
            upload_file(file_path, container, uploaded_file_name)

upload_folder("../../output", engine_output_container, datetime.now(pytz.timezone("Pacific/Auckland")).strftime("%Y-%m-%d_%H:%M:%S"))

# Download the output folder from storage

In [None]:
def get_latest_folder(container):
    blob_names  = list(container.list_blobs())

    folders = list(set([f.name.split("/")[0]  for f in blob_names]))

    dates = [datetime.strptime(date, "%Y-%m-%d_%H:%M:%S").astimezone(pytz.timezone("Pacific/Auckland")) for date in folders]

    return folders[dates.index(max(dates))]

get_latest_folder(engine_output_container)

In [None]:
def download_folder(container, folder_name, downloaded_folder_name):
    blobs = container.list_blobs(name_starts_with=folder_name)

        
    for blob in blobs:
        blob_name = blob.name
        blob_client = container.get_blob_client(blob_name)
        print(f"Downloading {blob_name} to {downloaded_folder_name}")
        download_path = os.path.join(downloaded_folder_name, os.path.relpath(blob_name, folder_name))
        if not os.path.exists(os.path.dirname(download_path)):
            os.makedirs(os.path.dirname(download_path))
        with open(download_path, "wb") as data:
            blob_client.download_blob().readinto(data)

download_folder(engine_output_container, get_latest_folder(engine_output_container), "test")