# Dropbox Download Notebook
This notebook downloads the NMEA data from Dropbox, parses the multiple hourly files into a single daily file, and saves it in the correct folder.

<b>Note:</b> This notebook requires the credentials file generated in the `dropbox_credentials.ipynb` notebook to be present in the same directory.

In [None]:
from dropbox import Dropbox
from dropbox.files import FileMetadata, FolderMetadata
from datetime import datetime
import json
import os
import shutil
import glob

The below code will ensure that the access token is valid and refresh it if necessary.

In [None]:
app_key = 'your_app_key_here'
app_secret = 'your_app_secret_here'


def load_credentials():
    try:
        with open('dropbox_creds.json', 'r') as f:
            creds = json.load(f)
            return creds
    except FileNotFoundError:
        print("Credentials file not found. Please run the dropbox_credentials.ipynb notebook first.")
        return None


def create_dropbox_client():
    creds = load_credentials()
    if creds is None:
        return None
    access_token = creds['access_token']
    refresh_token = creds['refresh_token']
    dbx = Dropbox(app_key=app_key,
                  app_secret=app_secret,
                  oauth2_access_token=access_token,
                  oauth2_refresh_token=refresh_token)
    dbx.refresh_access_token()

    return dbx


dbx = create_dropbox_client()
if dbx is None:
    print("Failed to create Dropbox client.")
else:
    print("Successfully connected to Dropbox account: " + 
          f"{dbx.users_get_current_account().name.display_name}")

## Download files
The below function creates a temp directory to store downloaded NMEA data in, gets all files inside the apps directory in Dropbox, then loops over the files, downloads them, checks to see if the file is a valid file (not DEFAULT.log) and then adds it to an array for later processing

In [None]:
def download_files():
    """
    Downloads all files from Dropbox into a tmp directory.
    """
    os.makedirs("tmp", exist_ok=True)

    entries = []
    result = dbx.files_list_folder("")
    entries.extend(result.entries)

    downloaded = []

    # handle pagination
    while result.has_more:
        result = dbx.files_list_folder_continue(result.cursor)
        entries.extend(result.entries)

    for entry in entries:
        if isinstance(entry, FileMetadata):
            remote_path = entry.path_lower
            fname = os.path.basename(remote_path) #just the file name without any direcotry slashes
            name, _ = os.path.splitext(fname) #just the filename ignoring the extension (.log)
            local_path = os.path.join("tmp", fname) # create the local file path

            print(f"Downloading {remote_path} -> {local_path} ...")
            dbx.files_download_to_file(local_path, remote_path)
            try:
                downloaded.append(datetime.strptime(name, "%y%m%d%H"))
            except ValueError:
                # skip this file it has an invalid date, is probably default.log
                print(f"Deleting file {fname} due to invalid date format")
                os.remove(local_path)
        else:
            print(f"Skipping folder/other file type: {entry.path_lower}")

    print("Finished Downloading!")
    return downloaded

In [None]:
files_downloaded = download_files() # actually run the function, this can take a while to download if you have lots of files

## Format Files
Currently our files are in the wrong directory and are hourly files, while we need daily files.
This code will loop over all the files downloaded, format the file name correctly (`station_nameDDD0.YY.A`) and then copy all files that are from the same day into the same folder. If you would like to use a different station name change `station_name` below. It needs to be a 4 character alphanumeric code.

In [None]:
station_name = 'feld'

downloaded_dates = []
files_downloaded = sorted(files_downloaded)
for file_time in files_downloaded:
    date = file_time.date()
    if date in downloaded_dates:
        continue
    downloaded_dates.append(date)
    # Get the day of year
    day_of_year = file_time.timetuple().tm_yday
    output_file_name = f"/{station_name}{day_of_year}0.{date.strftime('%y')}.A"
    output_file_path = "refl_code/nmea/"+station_name+"/"+str(file_time.timetuple().tm_year)
    os.makedirs(output_file_path, exist_ok=True)
    with open(output_file_path+"/"+output_file_name, "wb+") as f:
        with open("tmp/"+file_time.strftime("%y%m%d%H")+".log", "rb+") as f2:
            shutil.copyfileobj(f2,f)
        f2.close()
    f.close()

## Cleanup
Removes the all files from the tmp directory and deletes the tmp directory.

In [None]:
cleanup = glob.glob('tmp/*')
for file in cleanup:
    os.remove(file)
shutil.rmtree("tmp")