In [1]:
# Include so all modules are re-imported before any cell is executed. 
# This is so custom modules do not have to be manually reloaded if changed.
%load_ext autoreload
%autoreload 2

# Import standard packaged
import os
import sys

# Import third party packages
import numpy as np
import pandas as pd

In [50]:
def get_a_dims(dir_path, 
               current_row=0, 
               max_depth=0, 
               dirs_only=False):
        
    for item in os.listdir(dir_path):
        nested_dir_path = os.path.join(dir_path, item)
        # If item is a directory or if it's not a directory but dirs_only is False:
        if os.path.isdir(nested_dir_path) or not dirs_only:
            # Move to the next row
            current_row += 1
        if os.path.isdir(nested_dir_path):
            max_depth += 1
            current_row, max_depth = get_a_dims(nested_dir_path, 
                                                current_row=current_row, 
                                                max_depth=max_depth, 
                                                dirs_only=dirs_only)
            
    return current_row, max_depth

get_a_dims(r'..\test-dir-00', dirs_only=False)

(11, 3)

In [95]:
def get_a_dims(dir_path, 
               current_row=0, 
               current_depth=0,
               dirs_only=False):
    
    # Depth list created
    depth_list = []
        
    for item in os.listdir(dir_path):
        nested_dir_path = os.path.join(dir_path, item)
            
        if os.path.isdir(nested_dir_path):
            current_row += 1
            current_depth += 1
            depth_list.append(current_depth)
            current_row, current_depth, nested_depth = get_a_dims(
                nested_dir_path, 
                current_row=current_row, 
                current_depth=current_depth,
                dirs_only=dirs_only)
            depth_list.append(nested_depth)
            current_depth -= 1
        # If item is not a directory and dirs_only is False (meaning we're collecting all files):
        elif not dirs_only:
            current_row += 1
            
    if len(depth_list) != 0:
        max_depth = max(depth_list)
    else:
        max_depth = 0
            
    return current_row, current_depth, max_depth

print(get_a_dims(r'..\test-dir-00', dirs_only=False))
print(get_a_dims(r'..\test-dir-00', dirs_only=True))

print(get_a_dims(r'C:\Users\gusb\Creative Cloud Files', dirs_only=False))
print(get_a_dims(r'C:\Users\gusb\Creative Cloud Files', dirs_only=True))

(11, 0, 2)
(3, 0, 2)
(1524, 0, 6)
(180, 0, 6)


In [13]:
def fill_dir_array(dir_path, 
                   a, 
                   current_row=0, 
                   current_depth=0, 
                   max_depth=0, 
                   dirs_only=False):
    
    for item in os.listdir(dir_path):
        nested_dir_path = os.path.join(dir_path, item)
        # If item is a directory or if it's not a directory but dirs_only is False:
        if os.path.isdir(nested_dir_path) or not dirs_only:
            # Add the item to the location of the current row and current depth in the array 
            a[current_row, current_depth] = item
            # Move to the next row
            current_row += 1
        if os.path.isdir(nested_dir_path):
            # Increase depth (represented as columns in the array) by one to symbolize 
            # going inside the directory. current_depth will be decremented after going 
            # through the contents of the directory, but max_depth will stay the same 
            # or continue increasing to accurately gauge the maximum depth or nested-ness
            current_depth += 1
            max_depth += 1
            # Recursively call fill_dir() once again to continue probing directories
            a, current_row, max_depth = fill_dir_array(nested_dir_path,
                                                       a,
                                                       current_row=current_row, 
                                                       current_depth=current_depth, 
                                                       max_depth=max_depth,
                                                       dirs_only=dirs_only)
            # Once the items in this directory have been completely probed 
            # (fill_dir() returns current_row and max_depth), decrement current_depth
            # to symbolize backing out of the directory
            current_depth -= 1
            
    return a, current_row, max_depth

In [99]:
def get_dir_df(dir_path, 
               dirs_only=False, 
               csv_save_path=None): 

    rows, placeholder, cols = get_a_dims(dir_path, dirs_only=dirs_only)
    # Make an array a one larger than the number of rows = rows (items in directories; 
    # with or without non-directory entries dependent on value of dirs_only) and 
    # two larger than the number of columns = cols (max depth of nested directories)
    # to account for the insertion of dir_path into the array a at row = 0, col = 0
    a = np.full((rows + 1, cols + 2), np.nan).astype('object')
    # Fill the first entry (row = 0, col = 0) of the array a with the passed directory path dir_path
    a[0, 0] = dir_path
    # Start filling the array  
    a, n_dirs, max_depth = fill_dir_array(dir_path, a, current_row=1, current_depth=1, max_depth=1, dirs_only=dirs_only)
    # Create pandas.DataFrame object df from array a
    df = pd.DataFrame(data=a)
    
    if csv_save_path is not None:
        # Put path into lowercase characters and strip off any trailing slashes
        csv_save_path = csv_save_path.lower()
        csv_save_path = csv_save_path.rstrip('\\')
        csv_save_path = csv_save_path.rstrip('/')
        # If the path doesn't end in .csv, append to the end of the string
        if not csv_save_path.endswith('.csv'):
            csv_save_path = f'{csv_save_path}.csv'
        
        # Save DataFrame df as csv at the passed save path
        df.to_csv(csv_save_path)
        print(f'CSV saved: {csv_save_path}')
    
    return df
    
# get_dir_df(r'..\test-dir-00', dirs_only=True, csv_save_path=r'..\test-dirs-only.csv')
get_dir_df(r'..\test-dir-00', dirs_only=False)
# get_dir_df(r'..\test-dir-00', dirs_only=True)
# get_dir_df(r'C:\Users\gusb\Creative Cloud Files', dirs_only=False)
# get_dir_df(r'C:\Users\gusb\Creative Cloud Files', dirs_only=True)
# get_dir_df(r'C:\Users\gusb\Creative Cloud Files', dirs_only=True, csv_save_path=r'..\results\gus-cc-dirs-only.csv')
# get_dir_df(r'C:\Users\gusb\Creative Cloud Files', dirs_only=False, csv_save_path=r'..\results\gus-cc.csv')

Unnamed: 0,0,1,2,3
0,..\test-dir-00,,,
1,,blank-00.txt,,
2,,blank-01.txt,,
3,,test-dir-01,,
4,,,blank-02.txt,
5,,,blank-03.txt,
6,,test-dir-02,,
7,,,blank-04.txt,
8,,,blank-05.txt,
9,,,blank-06.txt,


## These updated functions containing directory-only functionality have replaced the functions previously refactored into `.src\dir_funcs.py`