# Useful standard library modules exercise solutions

In [5]:
import os
from pathlib import Path
import shutil
import subprocess
import sys
import zipfile

### Exercise: Make a script with a command line argument using sys.argv

1) Using a text editor such as VSCode, make a new ``*.py`` file with the following contents:

```python
import sys

if len(sys.argv) > 1:
    for argument in sys.argv[1:]:
        print(argument)
else:
    print("usage is: python <script name>.py argument")
    quit()
```

2) Try running the script at the command line

In [6]:
write_text = (
    'import sys\n\n'
    'if len(sys.argv) > 1:\n'
    '    for argument in sys.argv[1:]:\n'
    '        print(argument)\n'
    'else:\n'
    '   print("usage is: python <script name>.py argument")\n'
    'quit()\n'
)

with open('myscript.py', 'w') as dest:
    dest.write(write_text)

In [14]:
result = subprocess.run(['python', 'myscript.py'], check=True)
result.stdout

usage is: python <script name>.py argument


In [15]:
result = subprocess.run(['python', 'myscript.py', 'arg1', 'arg2'], check=True)
result.stdout

arg1
arg2


## Testing Your Skills with a truly awful example:

#### the problem:
Pretend that the file `data/fileio/netcdf_data.zip` contains some climate data that we downloaded. If you open `data/fileio/netcdf_data.zip`, you'll see that within a subfolder `zipped` are a bunch of additional subfolders, each for a different year. Within each subfolder is another zipfile. Within each of these zipfiles is yet another subfolder, inside of which is the actual data file we want (`prcp.nc`). 

#### the goal:
To extract all of these `prcp.nc` files into a single folder, after renaming them with their respective years (obtained from their enclosing folders or zip files). e.g.  
```
prcp_1980.nc
prcp_1981.nc
...
```
This will allow us to open them together as a dataset in `xarray` (more on that later). Does this sound awful? I'm not making this up. This is the kind of structure you get if when downloading tiles of climate data with the [Daymet Tile Selection Tool](https://daymet.ornl.gov/gridded/)

#### hint:
you might find these functions helpful:
```
glob.glob
os.path.isdir
os.makedirs
zipfile.ZipFile
os.path.split
os.path.splitext
os.path.join
shutil.move
os.rename
os.rmdir
```

### solution

First, extract the master zipfile

In [29]:
with zipfile.ZipFile('data/fileio/netcdf_data.zip') as src:
    src.extractall('data/fileio/')

Make a list of the zipfiles

In [30]:
zipfiles = sorted(glob.glob('data/fileio/netcdf_data/zipped/*/*.zip'))
zipfiles[:5]

['data/fileio/netcdf_data/zipped/zipped_1980/12270_1980.zip',
 'data/fileio/netcdf_data/zipped/zipped_1981/12270_1981.zip',
 'data/fileio/netcdf_data/zipped/zipped_1982/12270_1982.zip',
 'data/fileio/netcdf_data/zipped/zipped_1983/12270_1983.zip',
 'data/fileio/netcdf_data/zipped/zipped_1984/12270_1984.zip']

In [31]:
# declare a destination path
dest_path = 'extracted_data'
variable = 'prcp'

for f in zipfiles:
    with zipfile.ZipFile(f) as src:
        # get the path to the source file and the year
        _, fname = os.path.split(f)
        name = os.path.splitext(fname)[0].replace('.tar', '')
        srcfile = '{}/{}.nc'.format(name, variable)
        year = name.split('_')[1]

        # where we want the extracted .nc file to end up
        destfile = os.path.join(dest_path, '{}_{}.nc'.format(variable, year))

        # extract the srcfile path to the /daymet folder
        # unfortunately this extracts the whole path, not just the file
        src.extract(srcfile, dest_path)
        # move the file up from subfolders to /daymet
        shutil.move(os.path.join(dest_path, srcfile), dest_path)
        # rename to include year
        os.rename(os.path.join(dest_path, '{}.nc'.format(variable)),
                  destfile)
        # trash subfolders that were extracted
        os.rmdir(os.path.join(dest_path, name))
        print('{}/{} --> {}'.format(f, srcfile, destfile))

data/fileio/netcdf_data/zipped/zipped_1980/12270_1980.zip/12270_1980/prcp.nc --> extracted_data/prcp_1980.nc
data/fileio/netcdf_data/zipped/zipped_1981/12270_1981.zip/12270_1981/prcp.nc --> extracted_data/prcp_1981.nc
data/fileio/netcdf_data/zipped/zipped_1982/12270_1982.zip/12270_1982/prcp.nc --> extracted_data/prcp_1982.nc
data/fileio/netcdf_data/zipped/zipped_1983/12270_1983.zip/12270_1983/prcp.nc --> extracted_data/prcp_1983.nc
data/fileio/netcdf_data/zipped/zipped_1984/12270_1984.zip/12270_1984/prcp.nc --> extracted_data/prcp_1984.nc
data/fileio/netcdf_data/zipped/zipped_1985/12270_1985.zip/12270_1985/prcp.nc --> extracted_data/prcp_1985.nc
data/fileio/netcdf_data/zipped/zipped_1986/12270_1986.zip/12270_1986/prcp.nc --> extracted_data/prcp_1986.nc
data/fileio/netcdf_data/zipped/zipped_1987/12270_1987.zip/12270_1987/prcp.nc --> extracted_data/prcp_1987.nc
data/fileio/netcdf_data/zipped/zipped_1988/12270_1988.zip/12270_1988/prcp.nc --> extracted_data/prcp_1988.nc
data/fileio/netcdf_

## Bonus -- Determining the location of an executable

There are often times that you run an executable that is nested somewhere deep within your system path.  It can often be a good idea to know exactly where that executable is located.  This might help you one day from accidently using an older version of an executable, such as MODFLOW.

In [32]:
# Define two functions to help determine 'which' program you are using
def is_exe(fpath):
    """
    Return True if fpath is an executable, otherwise return False
    """
    return os.path.isfile(fpath) and os.access(fpath, os.X_OK)

def which(program):
    """
    Locate the program and return its full path.  Return
    None if the program cannot be located.
    """
    fpath, fname = os.path.split(program)
    if fpath:
        if is_exe(program):
            return program
    else:
        # test for exe in current working directory
        if is_exe(program):
            return program
        # test for exe in path statement
        for path in os.environ["PATH"].split(os.pathsep):
            path = path.strip('"')
            exe_file = os.path.join(path, program)
            if is_exe(exe_file):
                return exe_file
    return None

In [33]:
which('MODFLOW-NWT_64.exe')