# Organize files by category based on file extensions


## Step 1: Mount Google Drive folder to Google Colab

Before we can begin organizing our files, we need to mount our Google Drive folder to Google Colab. We can do this using the following code:


In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).



## Step 2: Install the `tqdm` library

We will be using the `tqdm` library to display a progress bar while we iterate through our files. To install this library, we can use the following code:


In [None]:
!pip install tqdm 

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [None]:
import os
import shutil
from tqdm import tqdm



## Step 3: Print the unique file extensions

Now that we have defined our function, we can use it to print the unique file extensions found in our Google Drive folder as follows:


In [None]:
def get_unique_file_extensions(src_folder):
    unique_file_extensions = set()
    
    for root, _, files in os.walk(src_folder):
        for filename in files:
            file_ext = os.path.splitext(filename)[1]
            unique_file_extensions.add(file_ext)
            
    return unique_file_extensions

if __name__ == "__main__":
    source_folder = "/content/drive/MyDrive"
    unique_extensions = get_unique_file_extensions(source_folder)
    print(unique_extensions)


{'', '.glb', '.ipynb', '.xrm-ms', '.msi', '.pde', '.pptx', '.html', '.xlsx', '.gdoc', '.py', '.dat', '.LOG', '.csv', '.msu', '.gif', '.mp4', '.cat', '.jpeg', '.c', '.opax', '.h', '.pptm', '.htm', '.cab', '.js', '.opal', '.log', '.gsite', '.txt', '.xml', '.url', '.jpg', '.sb', '.css', '.zip', '.png', '.lnk', '.gdraw', '.yml', '.sb3', '.json', '.CHK', '.ino', '.gsheet', '.gz', '.LNK', '.emmx', '.gform', '.ipynb"', '.cpp', '.mst', '.md', '.gjam', '.chm', '.xcf', '.sig', '.jar', '.obj', '.epub', '.gslides', '.pdf', '.docx', '.pyc', '.svg', '.jfif', '.properties', '.ps', '.exe', '.inf', '.gscript', '.sh', '.sb2', '.TTF', '.mp3', '.dll', '.key'}


## Step 4: Define a dictionary mapping file extensions to categories

In [None]:
file_categories = {
    'Office/Documents': ['.txt', '.pdf', '.doc', '.docx', '.rtf', '.gdoc', '.odt'],
    'Office/Presentations': ['.ppt', '.pptx', '.gslides', '.odp', '.key'],
    'Office/Spreadsheets': ['.xls', '.xlsx', '.gsheet', '.ods', '.numbers', '.csv'],
    'Coding&Data/Code': ['.xml', '.json', '.php', '.sh', '.cpp', '.js', '.pde', '.java', '.cs', '.c', '.h', '.go', '.rb', '.pl', '.swift', '.ts'],
    'Coding&Data/Python': ['.py', '.pyc'],
    'Coding&Data/HTML&CSS': ['.html', '.css', '.scss', '.sass', '.less'],
    'Coding&Data/Jupyter_Notebooks': ['.ipynb'],
    'Coding&Data/Database_Files': ['.db', '.sql', '.sqlite', '.accdb'],
    'Multimedia/Videos': ['.mp4', '.mkv', '.flv', '.avi', '.mov', '.wmv'],
    'Multimedia/Images': ['.svg', '.png', '.jpg', '.jpeg', '.gif', '.bmp', '.ico', '.tiff', '.webp'],
    'Multimedia/Audio': ['.mp3', '.wav', '.ogg', '.flac', '.m4a', '.aac'],
    'Multimedia/Fonts': ['.ttf', '.otf', '.woff', '.woff2', '.eot'],
    'Multimedia/3D_Models': ['.obj', '.fbx', '.dae', '.3ds', '.blend'],
    'Multimedia/Vector_Graphics': ['.ai', '.eps', '.sketch'],
    'Multimedia/CAD_Files': ['.dwg', '.dxf'],
    'Google_Workspace/Drawings': ['.gdraw'],
    'Google_Workspace/Sites': ['.gsite'],
    'Google_Workspace/Forms': ['.gform'],
    'Google_Workspace/Jamboard': ['.gjam'],
    'Google_Workspace/Scripts': ['.gscript'],
    'Others/Shortcuts': ['.lnk'],
    'Others/Markdown': ['.md', '.markdown'],
    'Others/PGP_Keys': ['.asc'],
    'Others/Python_Code': ['.pyc', '.pyo'],
    'Others/Archives': ['.zip', '.tar', '.rar', '.7z', '.gz', '.bz2'],
    'Others/eBooks': ['.epub', '.mobi', '.azw', '.azw3'],
    'Others': [''],
}






## Step 5: Define a function to get unique file extensions

To determine which categories we need to create, we first need to determine which file extensions are present in our Google Drive folder. We can define a function `get_unique_file_extensions` to do this as follows:

In [None]:
def categorize_files(src_folder, file_categories=file_categories, go_through=True):

    def move_file_to_category(file_path, filename, file_ext):
        for category, extensions in file_categories.items():
            if file_ext in extensions:
                dest_folder = os.path.join(src_folder, category)
                break
        else:
            dest_folder = os.path.join(src_folder, 'Others')

        os.makedirs(dest_folder, exist_ok=True)
        shutil.move(file_path, os.path.join(dest_folder, filename))

    def process_files(root, files):
        for filename in tqdm(files, desc=f"Processing files in {root}", unit="file"):
            file_path = os.path.join(root, filename)

            if os.path.isdir(file_path):
                continue

            file_ext = os.path.splitext(file_path)[1]
            move_file_to_category(file_path, filename, file_ext)
        if go_through:
          try:
              os.rmdir(root)
              print(f"Successful deleting folder: {root}")
          except OSError:
              pass

    if go_through:
        folder_iterator = os.walk(src_folder)
    else:
        folder_iterator = [(src_folder, [], os.listdir(src_folder))]
    
    [process_files(root, files) for root, _, files in folder_iterator]

##  Step 6: Using function 'categorize_files' for sorting files in google drive





In [None]:
source_folder = "/content/drive/MyDrive"
categorize_files(source_folder)

Processing files in /content/drive/MyDrive: 0file [00:00, ?file/s]
Processing files in /content/drive/MyDrive/Others: 100%|██████████| 103/103 [00:00<00:00, 1621.49file/s]
Processing files in /content/drive/MyDrive/Others/Archives: 100%|██████████| 2/2 [00:00<00:00, 508.25file/s]
Processing files in /content/drive/MyDrive/Others/Markdown: 100%|██████████| 38/38 [00:00<00:00, 1146.84file/s]
Processing files in /content/drive/MyDrive/Others/Shortcuts: 100%|██████████| 4/4 [00:00<00:00, 591.83file/s]
Processing files in /content/drive/MyDrive/Others/eBooks: 100%|██████████| 1/1 [00:00<00:00, 286.12file/s]
Processing files in /content/drive/MyDrive/Multimedia: 0file [00:00, ?file/s]
Processing files in /content/drive/MyDrive/Multimedia/Images: 100%|██████████| 418/418 [00:00<00:00, 1166.53file/s]
Processing files in /content/drive/MyDrive/Multimedia/3D_Models: 100%|██████████| 1/1 [00:00<00:00, 769.88file/s]
Processing files in /content/drive/MyDrive/Multimedia/Audio: 100%|██████████| 2/2 

Successful deleting folder: /content/drive/MyDrive/gf


Processing files in /content/drive/MyDrive/.ipynb_checkpoints: 0file [00:00, ?file/s]


Successful deleting folder: /content/drive/MyDrive/.ipynb_checkpoints
