In [10]:
import zipfile as zf
import os

In [9]:
def unzip_data(zip_dir, new_dir, end):
    """
    Function extracts all files of interest from [multiple] zip files that have nested directories (zipped or otherwise) within them.
    Made for windows os.
    This function assumes that all zipped directories of interest of been placed inside an non-zipped directory and the path of that directory is known (this is what the zip_dir variable represents).
    This function only removes the end file type of interest - specified by end variable - by itself; it does not preserve the structure of directories it my belong to (file is taken out of place).
    Any directory [path] can be choosen as new_dir and this is where the extracted files will all be moved to. 
    Be carefull of running function multiple times as it does not check if files extracted already exist in the new directory; watch out for duplicates.
    
    The inputs are the path to the directory the zipped direcotries are in (old_dir); path to the directory you wish to put the files in (new_dir); and the file type (end), specified by the '.[blank]' at the end of the file.
    The output will be the movement of the files of interest to the selected directory (new_dir).
    """
    
    print(f'unzipping folders in {zip_dir}') #display zipped directories that the files of interest will be extracted from 
    #open zipped directory
    files = os.listdir(zip_dir) #get all file names within the an unzipped directory
    for file in files:
        if file.endswith('.zip'): #error handling to ensure only zip files are attempting to be unzipped (from within the unzipped directory)
            file_path = os.path.join(zip_dir, file) #create the path used for opening the unzipped directories
            #unzip directory
            #derivative of Gerhard Götz's answer at https://stackoverflow.com/questions/4917284/extract-files-from-zip-without-keeping-the-structure-using-python-zipfile
            with zf.ZipFile(file_path) as zip_folder:
                for zip_info in zip_folder.infolist(): 
                    #handle nested zip folders
                    if '.zip' in zip_info.filename: #detect if directory is zip or not
                        print(zip_info.filename) #tell user what file is being looked at
                        temp = os.path.join(zip_dir,'temp') #open a temperary directory to store all zipped directory
                        zip_folder.extractall(temp) #move all nested zip folders to temp directory
                        unzip_data(temp,new_dir,end) #repeat process with nested zip folders (call same function using the path leading to temp dir that has the [originally] nested folders)
                        
                        #attempt to delete temp directory path as it is no longer needed 
                        #original code from https://stackoverflow.com/questions/6996603/how-can-i-delete-a-file-or-folder-in-python by Anand Tripathi
                        try:
                            os.remove(new_dir)
                        except OSError as e:
                            # If it fails, inform the user.
                            print("Error: %s - %s." % (e.filename, e.strerror))
                        #end of borrowed code
                        
                        break #further steps have already been run when the function was recalled; prevent them from being re-ran
                    #take data from unzipped directories
                    else:
                        #only lookes at files and not other directories. We only want to take files from directories not directories from other directories
                        if zip_info.is_dir():
                            continue #skips files that fall under this condition
                        #ensures that we are only extracting the files type we want from the directory; 'end' referes to the file the .[blank] at the end of a file (specified when calling function)
                        elif zip_info.filename.endswith(end) != True:
                            continue #skips files that fall under this condition
                        #prevents the extraction of files formated for MAC OS (as this is being run on a windows PC)
                        elif '_MACOSX' in zip_info.filename: #can change to "elif '_MACOSX' not in zip_info.filename:" if running on MAC
                            continue #skips files that fall under this condition
                        #extracts only the files of interest from directory and puts them in a new directory (specified when calling function)
                        else:
                            print(zip_info.filename) #tell user what file is being extracted
                            zip_info.filename = os.path.basename(zip_info.filename) #makes it so we ONLY get the end file (out of place) and not the directories that it was in as well (in place)
                            zip_folder.extract(zip_info, new_dir) #put files into designated new directory
            #end of derivative 
        #connected to the if statement about only extracting zip files from original directory path; skips over them
        else: 
            continue #skips files that fall under this condition

In [7]:
CWAM_pathway = #pathway to directory where CWAM folders downloaded from bitgrit NASA challenge where stored
new_CWAM_dir = #pathway to directory you want extracted CWAM files to be placed in
CWAM_file_ending = '.bz2'
"""
Expected CWAM pathway:
    non-zipped DIR (CWAM_pathway)
    -zip dir 1 (download from bitgrit challenge)
        --_MACOSX
        --Dir with YYMMDD_YYMMDD name representing the time window it refers to
            ---Dir with MM name representing the month it refers to no. 1
                ----Dir with DD name representing the day it refers to no. 1
                    -----Dir with full datatime of forecast name no. 1
                        ------.bz2 file with data for datetime
                        
                    -----Dir with full datetime of forecast name no. 2
                    -----Dir with full datetime of forecast name etc.
                ----Dir with DD name no. 2
                ----Dir with DD name etc.
            ---Dir with MM name no. 2
            ---Dir with MM name etc.
            ---.DS_Store
    -zip dir 2 (download from bitgrit challenge)
    -zip dir 3 (download from bitgrit challenge)
    -zip dir etc. (download from bitgrit challenge)
"""

FUSAR_pathway = #pathway to directory where FUSAR folders downloaded from bitgrit NASA challenge where stored
new_FUSAR_dir = #pathway to directory you want extracted FUSAR files to be placed in
FUSAR_file_ending = '.csv'
"""
Expected FUSAR pathway:
    non-zipped DIR (FUSAR_pathway)
    -zip dir 1 (download from bitgrit challenge)
        --_MACOSX
        --Dir with Airport ICAO as name
            ---.csv file of fusar data no. 1
            
            ---.csv file of fusar data no. 2
            ---.csv file of fusar data etc.
            ---.DS_Store
    -zip dir 2 (download from bitgrit challenge)
    -zip dir 3 (download from bitgrit challenge)
    -zip dir etc. (download from bitgrit challenge)
"""

METAR_pathway = #pathway to directory where METAR folders downloaded from bitgrit NASA challenge where stored
new_METAR_dir = #pathway to directory you want extracted METAR files to be placed in
METAR_file_ending = '.txt'
"""
Expected METAR pathway:
    non-zipped DIR (METAR_pathway)
    -zip dir (download from bitgrit challenge)
        --nested zip dir no. 1
            ---_MACOSX
            ---Dir named METAR_train_part_[#]
                ---.txt file with METAR data for a timestamp no. 1
                ---.txt file with METAR data for a timestamp no. 2
                ---.txt file with METAR data for a timestamp etc.
            
        --nested zip dir no. 2
        --nested zip dir etc.
"""

TAF_pathway = #pathway to directory where TAF folders downloaded from bitgrit NASA challenge where stored
new_TAF_dir = #pathway to directory you want extracted TAF files to be placed in
TAF_file_ending = '.txt'
"""
Expected TAF pathway:
    non-zipped DIR (TAF_pathway)
    -zipped dir (download from bitgrit challenge)
        --_MACOSX
        --dir named TAF_train
            ---.txt file of TAF data for a timestamp no. 1
            ---.txt file of TAF data for a timestamp no. 2
            ---.txt file of TAF data for a timestamp no. etc.
"""

#METAR
unzip_data(METAR_pathway,new_METAR_dir,METAR_file_ending) 
#FUSAR
unzip_data(FUSAR_pathway,new_FUSAR_dir,FUSAR_file_ending)
#TAF
unzip_data(TAF_pathway,new_TAF_dir,TAF_file_ending)
#CWAM
unzip_data(CWAM_pathway,new_CWAM_dir,CWAM_file_ending)

unzipping folders in E:\Users\Alex\ENVS\bitgrit Challenge\DATA\METAR\Train
METAR_train_part_1.zip
unzipping folders in E:\Users\Alex\ENVS\bitgrit Challenge\DATA\METAR\Train\temp
METAR_train_part_1/metar.20221023.23Z.txt
METAR_train_part_1/metar.20220922.14Z.txt
METAR_train_part_1/metar.20221011.17Z.txt
METAR_train_part_1/metar.20220910.20Z.txt
METAR_train_part_1/metar.20220921.04Z.txt
METAR_train_part_1/metar.20221012.07Z.txt
METAR_train_part_1/metar.20221015.03Z.txt
METAR_train_part_1/metar.20221009.20Z.txt
METAR_train_part_1/metar.20220908.17Z.txt
METAR_train_part_1/metar.20220916.09Z.txt
METAR_train_part_1/metar.20220906.09Z.txt
METAR_train_part_1/metar.20220918.17Z.txt
METAR_train_part_1/metar.20221019.20Z.txt
METAR_train_part_1/metar.20221005.03Z.txt
METAR_train_part_1/metar.20220915.19Z.txt
METAR_train_part_1/metar.20221016.13Z.txt
METAR_train_part_1/metar.20221006.13Z.txt
METAR_train_part_1/metar.20220905.19Z.txt
METAR_train_part_1/metar.20221005.15Z.txt
METAR_train_part_1/metar