In [1]:
import os
import shutil
import send2trash
import re

In [2]:
def walk_through_directory(directory):
    """
    A function that walks through a directory and prints the name of each file,
    folder and subfolder in the directory.

    Parameters
    ----------
    directory : str
        The directory to walk through.
    
    Returns
    -------
    None

    Example:
        >>> walk_through_directory('/home/user/Desktop')
    """
    for folder, subfolder, files in os.walk(directory):
        print(f"Current Folder: {folder}")
        print("Subfolders:", subfolder)
        for file in files:
            print(file)

In [4]:
def organize_by_format(directory, formats, destination):
    """
    A function to organize files by format, eg. by images, videos, audio, etc.\\
    The functions organizes all the files with some specific format in a\\
    specific folder. These files need not be in the parent directory, the\\
    function will walk through the whole directory.

    Parameters
    ----------
    directory : str
        The directory which we need to organize.
    formats : list
        A list of file formats to organize.
    destination : str
       The destination directory to move the files to.

    Returns
    -------
        None
        
    Example:
        >>> organize_by_format("/home/user/Desktop/", ["jpg", "png"], "/home/user/Desktop/images")
    """
    new_dir = os.path.join(directory, destination)
    if not os.path.exists(new_dir):
        print("Creating the directory: ", new_dir)
        os.mkdir(new_dir)
    else:
        print("Directory already exists")

    for folder, subfolder, files in os.walk(directory):
        print(f"Current Folder: {folder}")
        for file in files:
            if folder == new_dir:
                continue
            for format in formats:
                if file.endswith(format):
                    original_dir = os.path.join(folder, file)
                    shutil.move(original_dir, new_dir)
                    
    print("Organization complete!")

In [5]:
def calculate_directory_size(directory, output_unit = 'kb', exclude_files_by_format = None, 
    exclude_files_by_size = None, exclude_files_units = None, number_of_files = False):
    """
    A function to calculate the size of a directory.

    Parameters
    ----------
    directory : str
        The directory to calculate the size of.
    
    output_unit : str
        The unit to output the size in. Default is kb.\\
        valid units are: b, kb, mb and gb
    
    exclude_files_format: list
        A list of file formats to exclude from the size calculation.\\
        Default is None.

    exclude_files_size: int
        Every file with a size smaller than this value will be excluded\\
        from the size calculation. Default is `None`.\\
        The unit of the size is specified by the `exclude_files_units` argument. If not, it's assumed\\
        to be same as the `output_unit`.
    
    exclude_files_units: str
        The unit of the exclude_files_size argument.\\
        Default is kb. valid units are: b, kb, mb and gb
   
    number_of_files: bool 
        If True, the number of files in the directory will also be returned.\\
        Note: Only those files which are not excluded will be counted.

    Returns
    -------
    int/tuple of two ints
        The size of the directory in units of `output_unit`.

    Example
    -------
        >>> calculate_directory_size("/home/user/Desktop/", "mb", ["jpg", "png"], 100)
    """
    #Initialize the total_size variable
    total_size = 0
    num_files = 0
    files_excluded = 0

    #Creating a conversion dictionary
    conversion_dict = {
        'b': 1,
        'kb': 1024,
        'mb': 1024**2,
        'gb': 1024**3,
        None: 1024
    }

    try:
        #Get the output unit conversion factor
        output_unit_factor = conversion_dict[output_unit.lower()]
        
        #Get the exclude_files_size conversion factor
        if exclude_files_units is not None:
            exclude_files_size_factor = conversion_dict[exclude_files_units.lower()]
        else:
            exclude_files_size_factor = output_unit_factor
    except KeyError:
        print("Invalid output_unit")
        print("Valid units are: b, Kb, Mb and Gb")
        return None
    
    for folder, subfolder, files in os.walk(directory):
        for file in files:
            file_size = os.path.getsize(os.path.join(folder, file))

            #Excluding the files with the specified format
            if exclude_files_by_format is not None:
                for format in exclude_files_by_format:
                    if file.endswith(format):
                        files_excluded += 1
                        file_size = 0
                        num_files += 0
                        break

            #Excluding the files with the specified size
            if exclude_files_by_size is not None:
                if file_size < exclude_files_by_size * exclude_files_size_factor:
                    files_excluded += 1
                    file_size = 0
                    num_files += 0

            total_size += file_size
            num_files += 1
        
    total_file_size = round(total_size / output_unit_factor, 2)

    #Printing number of excluded files
    if exclude_files_by_format or exclude_files_by_size:
        print(f"Total number of files excluded is: {files_excluded}")

    #Printing the outputs
    if number_of_files:
        print(f"The size of the directory is: {total_file_size} {output_unit.title()}")
        print(f"The number of files in the directory is: {num_files}")
        print(f"Numbers of files included in the size calculation: {num_files - files_excluded}")
        return total_file_size, num_files
    else:
        print(f"The size of the directory is: {total_file_size} {output_unit.title()}")
        return total_file_size

In [6]:
calculate_directory_size(r"C:\Users\harik\Downloads",exclude_files_by_size=1000, exclude_files_by_format=['png'],
     number_of_files=True, output_unit = "mB", exclude_files_units = "kB")

Total number of files excluded is: 287
The size of the directory is: 12592.2 Mb
The number of files in the directory is: 482
Numbers of files included in the size calculation: 195


(12592.2, 482)

In [7]:
calculate_directory_size(r"C:\Users\harik\Downloads", number_of_files=True, output_unit = "mB")

The size of the directory is: 12667.52 Mb
The number of files in the directory is: 482
Numbers of files included in the size calculation: 482


(12667.52, 482)

In [30]:
def large_files_in_directory(directory, min_size = 10, unit = 'mb', verbose = False, exclude_files_by_format =None):
    """
    Prints the names of the files and location in the directory which are larger than the specified\\
         size.

    Parameters
    ----------
    directory : str
        The directory to search for the files.t.

    min_size : int
        The minimum size of the files to be printed. Default is 10.\\
        The unit of the size is specified by the `unit` argumen
    
    unit : str
        The unit of the min_size argument.\\
        Default is mb. valid units are: b, kb, mb and gb
    
    verbose : bool
        If True, the size of the files will be printed.\\
        Default is False.

    exclude_files_by_format: list
        A list of file formats to exclude from the size calculation.\\
        Default is None.

    Returns
    -------
    list:
        A list of the files which are larger than the specified size.
    
    Example
    -------
        >>> large_files_in_directory("/home/user/Desktop/", min_size = 10, unit = 'mb')
    """

    total_files = 0
    total_space = 0
    found_files = []
    excluded = 0
    #Creating a conversion dictionary
    conversion_dict = {
        'b': 1,
        'kb': 1024,
        'mb': 1024**2,
        'gb': 1024**3,
    }

    try:
        #Get the output unit conversion factor
        unit_factor = conversion_dict[unit.lower()]
    except KeyError:
        print("Invalid unit")
        print("Valid units are: b, Kb, Mb and Gb")
        return None
    print("Searching...")
    for folder, subfolder, files in os.walk(directory):
        for file in files:
            try:
                file_size = os.path.getsize(os.path.join(folder, file))
            except FileNotFoundError:
                continue
            file_size_in_unit = round(file_size / unit_factor, 2)
            
            if exclude_files_by_format is not None:
                for format in exclude_files_by_format:
                    if file.endswith(format):
                        file_size_in_unit = 0
                        excluded += 1
                        break

            if file_size_in_unit > min_size:
                total_files += 1
                total_space += file_size_in_unit
                if verbose:
                    print(f"'{file}', with size of {file_size_in_unit} {unit.title()} is larger than {min_size} {unit.title()}")
                    print(f"\tFound at: {folder}")
                found_files.append(os.path.join(folder, file))

    if total_files == 0:
        print("No files found")
    else:
        print(f"Total number of files found: {total_files}")
        print(f"Number of files excluded: {excluded}")
        print(f"Total space of files found: {total_space} {unit.title()}")
        print("Done!")
    return found_files


In [31]:
files = large_files_in_directory(r"C:\Users\harik", min_size = 100, unit = 'mb')

Searching...
Total number of files found: 347
Number of files excluded: 0
Total space of files found: 107549.42999999996 Mb
Done!


In [32]:
# files

['C:\\Users\\harik\\anaconda3\\Lib\\site-packages\\tensorflow\\python\\_pywrap_tensorflow_internal.pyd',
 'C:\\Users\\harik\\anaconda3\\Lib\\site-packages\\xgboost\\lib\\xgboost.dll',
 'C:\\Users\\harik\\anaconda3\\pkgs\\mkl-2020.2-256.conda',
 'C:\\Users\\harik\\anaconda3\\pkgs\\mkl-2021.4.0-haa95532_640.conda',
 'C:\\Users\\harik\\anaconda3\\pkgs\\pandoc-2.11-h9490d1a_0\\Scripts\\pandoc.exe',
 'C:\\Users\\harik\\anaconda3\\pkgs\\pandoc-2.12-haa95532_0\\Scripts\\pandoc.exe',
 'C:\\Users\\harik\\anaconda3\\Scripts\\pandoc.exe',
 'C:\\Users\\harik\\AppData\\Local\\atom\\app-1.46.0\\resources\\app.asar',
 'C:\\Users\\harik\\AppData\\Local\\atom\\app-1.47.0\\resources\\app.asar',
 'C:\\Users\\harik\\AppData\\Local\\atom\\packages\\atom-1.47.0-delta.nupkg',
 'C:\\Users\\harik\\AppData\\Local\\atom\\packages\\atom-1.47.0.nupkg',
 'C:\\Users\\harik\\AppData\\Local\\BlueStacksSetup\\BlueStacksInstaller_4.150.0.1118_native_09ba7d29227824f9f5b301587b5eda39.exe',
 'C:\\Users\\harik\\AppData\\Loc