## Testing of methods written for the file_handling_methods.py module

In [2]:
import os # import os system module so that we can use its functions to e.g. check directory contents
start_path = ("C:/Users/svreugdenhil/OneDrive - NIOZ/Documenten/GitHub/") # start of the path all my files are located

#### URL functions

In [2]:
def download_from_url_a(url, filename):
    """
    Description
        ----
        This function uses the urllib.request module that is included in Python 
        (version 3.10.9) to download a file from a given URL link. The file is 
        downloaded into the current working directory if only a filename is 
        provided, or into a specified exisiting directory (absolute or relative 
        filepath). 
        This function was tested in a Jupyter notebook.
        ----
    Parameters
        ----
        url
            >> the url link from which to download the file
        filename
            >> the name the file is to be called in the current working 
            directory
            >> OR a filepath (absolute or relative) of an existing folder 
    Returns
        ----
        urllib.request.urlretrieve returns a tuple of the filename and some 
        information.
        out
            >> the filename
        response
            >> the information
        Prints the message that the file was downloaded succesfully and the 
        information and downloads the file in the current working directory (
        if only the filename was specified) or the directory that was 
        specified.
        ----
    Versioning
        ----
        Python: 3.10.9
        urllib included in Python version
        ----
    """
    import urllib.request
    # store tuple outcome of urlretrieve into 2 variables:
    out, response = urllib.request.urlretrieve(url, filename) 
    print(f"Successfully downloaded {out}\n")
    print(response)

In [3]:
# Define URL
url = "https://github.com/PacktPublishing/Learning-Geospatial-Analysis-with-Python-Fourth-Edition/raw/main/B19730_02_Asset_Files/hancock.zip"
# Define filename
filename = "test_files/hancock.zip" 

# Download file
download_from_url_a(url, filename)

Successfully downloaded test_files/hancock.zip

Connection: close
Content-Length: 17341
Cache-Control: max-age=300
Content-Security-Policy: default-src 'none'; style-src 'unsafe-inline'; sandbox
Content-Type: application/zip
ETag: "27b48bb2069d369cf9e3265594d6b9733ee524ee3298ad109e17773a2bf5387a"
Strict-Transport-Security: max-age=31536000
X-Content-Type-Options: nosniff
X-Frame-Options: deny
X-XSS-Protection: 1; mode=block
X-GitHub-Request-Id: 6EAC:0E20:21888BA:22F11B3:660588F3
Accept-Ranges: bytes
Date: Thu, 28 Mar 2024 15:16:27 GMT
Via: 1.1 varnish
X-Served-By: cache-ams21040-AMS
X-Cache: HIT
X-Cache-Hits: 1
X-Timer: S1711638987.445082,VS0,VE1
Vary: Authorization,Accept-Encoding,Origin
Access-Control-Allow-Origin: *
Cross-Origin-Resource-Policy: cross-origin
X-Fastly-Request-ID: 1e434e7d23ea6dcc4823f646f3fde44b3131f7ff
Expires: Thu, 28 Mar 2024 15:21:27 GMT
Source-Age: 215




In [4]:
os.listdir(f"{start_path}/digital_data_EDS/test_files") # check if files are present

['.ipynb_checkpoints', 'hancock.zip']

In [5]:
def download_from_url_b(url, filename):
    """
    IMPORTANT NOTE: 
    download requests module into your environment before using this function
    Description
        ----
        This function uses the requests module to download a file from a given 
        URL link. The file is downloaded into the current working directory if 
        only a filename is provided, or into a specified existing directory
        (absolute or relative filepath). 
        This function was tested in a Jupyter notebook.
        ----
    Parameters
        ----
        url
            >> the url link from which to download the file
        filename
            >> the name the file is to be called in the current working 
            directory
            >> OR a filepath (absolute or relative) of an existing folder
        ----
    Returns
        ----
        Prints a statement that the file was succesfully created and downloads
        the file to the current working directory (if only filename specified)
        or the directory that was specified.
        ----
    Versioning
        ----
        Python: 3.10.9
        requests: 2.31.0
        ----
    """
    import requests 
    r = requests.get(url) # access url
    # write content of the opened file into a new file:
    with open(filename, "wb") as f:
        f.write(r.content)
    print(f"The file {filename} was succesfully created.")

In [6]:
# Define URL
url = "https://github.com/PacktPublishing/Learning-Geospatial-Analysis-with-Python-Fourth-Edition/raw/main/B19730_02_Asset_Files/hancock.zip"
# Define filename
filename = "test_files/hancock_2.zip" 

# Download file
download_from_url_b(url, filename)

The file test_files/hancock_2.zip was succesfully created.


In [9]:
os.listdir(f"{start_path}/digital_data_EDS/test_files") # check if files are present

['.ipynb_checkpoints', 'hancock.zip', 'hancock_2.zip']

#### FTP functions

In [14]:
def check_ftp_content(
    server, username="anonymous", password="anonymous", dir=""
    ):
    """
    Description
        ----
        This function uses the ftplib module that is included in Python (
        version 3.10.9) to view the contents of a specified FTP server.
        When accessing a public server, username and password are usually both 
        "anonymous". When a username and password are not provided, this 
        function automatically uses "anonymous" as both username and password. 
        The function makes use of an encrypted connection in order to be able 
        to access public servers. 
        If no directory is specified, the contents of the home directory of the
        FTP server are listed, else the contents of the specified directory are
        listed.
        NOTE 28-03-2024: this function has NOT been tested with a non-public
        FTP server yet!
        ----
    Parameters
        ----
        server
            >> the address of the FTP server you are reaching
        username (optional)
            >> username for the FTP server, if not provided = "anonymous"
        password (optional)
            >> password for the FTP server, if not provided = "anonymous"
        dir (optional)
            >> the name or path of the directory within the FTP server
        ----
    Returns
        ----
        If no directory is specified, the function returns the listed contents 
        of the home directory of the FTP server, else it lists the contents of
        the specified directory within the FTP server
        ----
    Versioning
        ----
        Python: 3.10.9
        ftplib included in Python version
    """
    import ftplib
    ftp = ftplib.FTP_TLS(server)
    ftp.login() 
    ftp.prot_p() 
    if dir != "":
        ftp.cwd(dir)
        ftp.retrlines('LIST')
    else:
        ftp.retrlines('LIST')
    ftp.quit()

In [11]:
# Specify server
server = "ftp.pmel.noaa.gov"

In [15]:
check_ftp_content(server)

drwxrwxr-x    9 1054     310          4096 Aug 26  2013 AD
drwxrwxr-x   11 1054     310          4096 Nov 02  2018 CARD
drwxrwxr-x   13 1054     310          4096 Oct 30  2014 CNSD
drwxrwxr-x    5 1054     310          4096 Jun 05  2019 EDD
drwxrwsr-x    8 1052     310          4096 Apr 24  2015 EPIC
drwxr-xr-x    3 1054     310          4096 Feb 01  2011 GTMBAdata
drwxrwxr-x    3 1054     310          4096 Sep 03  2008 NOAAServer
drwxr-xr-x   63 1054     310          4096 Mar 15  2013 OCRD
drwxrwxr-x    7 1054     310          4096 Nov 16  2011 OD
drwxr-xr-x    3 1054     310          4096 Aug 05  2010 OER
drwxr-xr-x   13 1054     310          4096 Jul 20  2020 OERD
drwxr-xr-x    2 1054     310          4096 Feb 28  2005 PMEL
drwxrwxr-x    6 1054     310          4096 Jul 13  2017 arctic-heat
drwxrwxr-x    4 1054     310          4096 Aug 08  2022 asvco2
drwxrwxr-x    5 1054     310          4096 Apr 12  2004 atlas
drwxrwxr-x    7 1054     310          4096 Nov 07  2013 cfc
drwxrwxr-x

In [16]:
check_ftp_content(server, dir="AD")

drwxr-xr-x    2 1054     310          4096 Jun 12  2001 anderson
drwxrwxr-x   25 1054     310          4096 Jun 07  2018 graphics
drwxrwxr-x    5 1054     310          4096 Sep 09  2010 ryan
drwxrwxr-x    2 1054     310          4096 Aug 26  2013 sandra
drwxr-xr-x    4 1054     310          4096 Mar 11  1998 sim
drwxr-xr-x    2 1054     310          4096 Nov 12  2014 tracey
drwxr-xr-x    2 1054     310          4096 Jun 05  2006 tsu_res


In [17]:
check_ftp_content(server, dir="AD/ryan")

drwxr-xr-x    2 1054     310          4096 May 21  2003 spillane
drwxr-xr-x    2 1054     310          4096 Sep 09  2010 tsunami_forecast_reports
drwxr-xr-x    2 1054     310          4096 Jun 28  2010 tsunami_hazard_assessment


In [35]:
def file_from_ftp(
    server, dir, filename, username="anonymous", password="anonymous"
    ):
    """
    Description
        ----
        This function uses the ftplib module that is included in Python (
        version 3.10.9) to download a specific file from a FTP server. The file
        is always downloaded to the current working directory, it is not 
        possible to specify a directory.
        When dowloading files from a public server, username and password are
        usually both "anonymous". When a username and password are not provided,
        this function automatically uses "anonymous" as both username and
        password. The function makes use of an encrypted connection in order
        to be able to download from public servers.
        NOTE 28-03-2024: this function has NOT been tested with a non-public
        FTP server yet!
        ----
    Parameters
        ----
        server
            >> the address of the FTP server you are reaching
        dir
            >> the name of the directory in which the file(s) you are 
            downloading is located (can ba a path)
        filename
            >> the name of the file that you are downloading
        username
            >> username for the FTP server, if not provided = "anonymous"
        password
            >> password for the FTP server, if not provided = "anonymous"
        ----
    Returns
        ----
        Prints a statement that the file was succesfully downloaded and 
        downloads the file to the current working directory.
        ----
    Versioning
        ----
        Python: 3.10.9
        ftplib included in Python version
        ----
    """
    import ftplib 
    ftp = ftplib.FTP_TLS(server) # access server through encrypted session
    ftp.login(username, password) # login on server
    ftp.prot_p() # explicitely call for protected transfer
    ftp.cwd(dir) # go to the directory of the file
    # Download the file 
    with open(filename, "wb") as out:
        ftp.retrbinary(f"RETR {filename}", out.write)
    ftp.quit() # quit the ftp server
    print(f"The file {filename} was succesfully downloaded.")
    print(f"This is out: {out}")

In [19]:
# define variables
server = "ftp.pmel.noaa.gov"
dir = "taodata"
filename = "taobuoypos.dat"

In [33]:
# Call function
file_from_ftp(server, dir, filename) 

The file taobuoypos.dat was succesfully downloaded.
This is out: <_io.BufferedWriter name='taobuoypos.dat'>


In [22]:
os.listdir(f"{start_path}/digital_data_EDS/") # check if files are present

['.git',
 '.gitattributes',
 '.ipynb_checkpoints',
 'file_handling_methods.py',
 'file_handling_methods_testing.ipynb',
 'LICENSE',
 'list_of_handy_modules.md',
 'README.md',
 'taobuoypos.dat',
 'test_files']

In [36]:
# Read into the file
with open(filename) as tao:
    buoy = tao.readlines() [5]
    loc = buoy.split()
    print("Buoy " + str(loc[0]) + " is located at " + str(' '.join(loc[4:8])))

Buoy DM467A is located at 8 03.1N 94 55.2W


In [37]:
os.rename(filename, f"test_files/{filename}")
os.listdir(f"{start_path}/digital_data_EDS/test_files")

['.ipynb_checkpoints', 'hancock.zip', 'hancock_2.zip', 'taobuoypos.dat']

#### ZIP

In [3]:
def extract_zip(file):
    """
    Description
        ----
        This function uses the zipfile module that is included in Python (
        version 3.10.9) to extact files from a zip. This function extracts
        the files from the zip and places them into the current working 
        directory. This function then uses the the build-in os module to 
        create a new directory and move the files into that new directory.
        The only thing that it needs to work is the path to the zipfile.
        ----
    Parameters
        ----
        file
            >> the path to the zipfile
        ----
    Returns
        ----
        Creates a folder into the current working directory with the files 
        from the zip.
        ----
    Versioning
        ----
        Python: 3.10.9
        zipfile included in Python version
        os included in Python version
        ----
    """
    import zipfile # import module for opening zip files
    import os      # import os to create a new directory to append zipfiles to
    
    zip = open(file, "rb")          # open the zipfile 
    zipShape = zipfile.ZipFile(zip) # read the zipfile
    
    files = [] # create a list to append files from inside the zip to
    
    for filename in zipShape.namelist(): # obtain filenames and give them to the files inside the zip
        filename = f"{filename}"             # get actual filename
        out = open(filename, "wb")  
        out.write(zipShape.read(filename))
        out.close()
        files.append(filename)               # append filenames to the list
    
    folder_name = filename.split(".")[0] # create a foldername form the beginning of the filename
    os.mkdir(folder_name)                # create a new folder
    for file in files:
        os.rename(f"{file}", f"{folder_name}/{file}") # move the files to the folder

In [6]:
file = "hancock.zip"
extract_zip(file)

In [4]:
os.listdir(f"{start_path}/digital_data_EDS/") # check if files are present

['.git',
 '.gitattributes',
 '.ipynb_checkpoints',
 'file_handling_methods.py',
 'file_handling_methods_testing.ipynb',
 'hancock',
 'LICENSE',
 'list_of_handy_modules.md',
 'README.md',
 'test_files']

#### TAR

In [19]:
def files_to_tar_gz(files, tar_name, dir_path=""):
    """Description
        ----
        This function uses the tarfile module that is included in Python (
        version 3.10.9) to create a tar archive with given files. To do so
        it uses the w:gz mode for gzipped compression. The names of the files
        need to be provided in a list, and the to be name of the archive must
        be provided as well. If files in the current working directory need to 
        be archived and compressed, a dir_path should not be provided. If files
        in another directory need to be archived, you do need to provide a path.
        An alternative for providing a dir_path, is providing the filenames as a path.
        (e.g. ["hancock/hancock.dbf", "hancock/hancock.shp", "hancock/hancock/shx"], 
        note that this way is not officially tested but should work nonetheless)
        Please note that the new compressed archive file is always created in the
        current working directory. 
        ----
    Parameters
        ----
        files
            >> a list of file names (or paths) that need to be archived and compressed
        tar_name
            >> the name of the new tar.gz file, provide the name without the file extension!
        dir_path
            >> the name(s) of the directory(s) that lead to the files to be archived
            are located
        ----
    Returns
        ----
        Creates a gzipped tar archive in the currnt working directory.
        ----
    Versioning
        ----
        Python: 3.10.9
        tarfile included in Python version
        ----
    
    """
    import tarfile
    tar = tarfile.open(f"{tar_name}.tar.gz", "w:gz")
    for file in files:
        if folder_name != "":
            file_path = f"{dir_path}/{file}"
        else:
            file_path = file
        tar.add(file_path)
    tar.close()

In [18]:
files = ["hancock.dbf", "hancock.shp", "hancock.shx"]
tar_name = "hancock"
folder_name = "hancock"
files_to_tar_gz(files, tar_name, folder_name)