# Google Drive File Organizer
This notebook organizes files in your Google Drive and suggests folders where files should be moved based on their names and types. It uses the Google Drive API to list files and folders and the OpenAI API to classify them.

## Setup and Installation
In this section, we'll install necessary libraries and authenticate with Google and OpenAI.

In [None]:
# Install required packages
!pip install openai --quiet
!pip install ipywidgets --quiet

import os
import pandas as pd
import ipywidgets as widgets
from IPython.display import display, clear_output
from google.colab import auth, drive, userdata
from googleapiclient.discovery import build
from googleapiclient.errors import HttpError
from openai import OpenAI
from tqdm.notebook import tqdm
from google.auth import default

In [None]:
# Authenticate and create the required clients
auth.authenticate_user()
drive.mount('/content/drive')
creds, _ = default()

# Initialize OpenAI client
client = OpenAI(api_key=userdata.get('openai_key'))

## Google Drive API Functions
The following functions interact with the Google Drive API to fetch files and folders.

In [None]:
# Google Drive API Service Initialization
def get_drive_service():
    """Initialize and return the Google Drive service."""
    return build('drive', 'v3', credentials=creds)

In [None]:
# Fetching Files and Folders from Google Drive
def get_personal_files(service):
    """Retrieve a list of personal files from Google Drive."""
    files = []
    page_token = None
    query = "'me' in owners and mimeType != 'application/vnd.google-apps.folder'"

    while True:
        results = service.files().list(
            q=query,
            pageSize=1000,
            fields="nextPageToken, files(id, name, mimeType, parents)",
            pageToken=page_token
        ).execute()

        files.extend(results.get('files', []))
        page_token = results.get('nextPageToken')

        if not page_token:
            break

    return files

In [None]:
def get_shared_drives(service):
    """Retrieve a list of shared drives."""
    drives = []
    page_token = None

    while True:
        response = service.drives().list(pageSize=100, pageToken=page_token).execute()
        drives.extend(response.get('drives', []))
        page_token = response.get('nextPageToken')

        if not page_token:
            break

    return drives

In [None]:
def get_folders_in_drive(service, drive_id):
    """Retrieve a list of folders in a specific shared drive."""
    folders = []
    page_token = None
    query = "mimeType='application/vnd.google-apps.folder'"

    while True:
        results = service.files().list(
            q=query,
            corpora='drive',
            driveId=drive_id,
            includeItemsFromAllDrives=True,
            supportsAllDrives=True,
            fields="nextPageToken, files(id, name, parents)",
            pageToken=page_token
        ).execute()

        folders.extend(results.get('files', []))
        page_token = results.get('nextPageToken')

        if not page_token:
            break

    return folders

In [None]:
def get_folder_path(service, folder_id):
    """Recursively build the full path of the folder using its ID."""
    path = []
    while folder_id:
        try:
            folder = service.files().get(
                fileId=folder_id,
                fields="id, name, parents",
                supportsAllDrives=True  # Include this for shared drives
            ).execute()
            path.append(folder['name'])
            folder_id = folder.get('parents', [None])[0]
        except HttpError as error:
            print(f"Error fetching folder path for ID {folder_id}: {error}")
            return None  # Return None if the folder cannot be found
    return '/'.join(reversed(path)) if path else None

## OpenAI API Function for Classification
This function uses the OpenAI API to classify files and suggest folders where they should be moved.

In [None]:
def classify_file(file_name, file_type, folders):
    """Classify a file and suggest whether it should be moved and to which folder."""
    prompt = f"""Given the file name '{file_name}' (type: {file_type}) and the following list of folders:
{', '.join([folder['name'] for folder in folders])}

Should this file be moved? If yes, to which folder? If no, say 'No move needed.'
Provide a brief explanation for your decision.
Untitled documents sheets and slides are typically spam and should be deleted
"""

    try:
        response = client.chat.completions.create(
            model="gpt-4o-mini",
            messages=[
                {"role": "system", "content": "You are a helpful assistant that organizes files."},
                {"role": "user", "content": prompt}
            ],
            max_tokens=150,
            temperature=0.7
        )

        content = response.choices[0].message.content.strip()

        if "No move needed" in content:
            return {
                "move_needed": False,
                "target_folder": None,
                "explanation": content,
                "folder_path": None,
                "folder_id": None
            }
        else:
            # Extract the folder name from the response if provided
            target_folder = next((folder for folder in folders if folder['name'] in content), None)

            if target_folder:
                print(f"Fetching path for folder ID: {target_folder['id'][:3]}")
                folder_path = get_folder_path(service, target_folder['id'])
                return {
                    "move_needed": True,
                    "target_folder": target_folder['name'],
                    "explanation": content,
                    "folder_path": folder_path,
                    "folder_id": target_folder['id']
                }
            else:
                print("Target folder not found in response content.")
                return {
                    "move_needed": True,
                    "target_folder": None,
                    "explanation": content,
                    "folder_path": None,
                    "folder_id": None
                }

    except HttpError as e:
        print(f"HTTP error during classification: {e}")
        return {
            "move_needed": False,
            "target_folder": None,
            "explanation": "Classification failed due to an HTTP error. Please try again later.",
            "folder_path": None,
            "folder_id": None
        }
    except Exception as e:
        print(f"Error during classification: {e}")
        return {
            "move_needed": False,
            "target_folder": None,
            "explanation": "Classification failed due to an error. Please try again later.",
            "folder_path": None,
            "folder_id": None
        }


## Running the Workflow
Now, let's run the functions to fetch the files and folders, classify them, and save the results.

In [None]:
# Initialize the Google Drive service
service = get_drive_service()

# Fetch personal files
print("Fetching personal files...")
personal_files = get_personal_files(service)

In [None]:
# Fetch shared drives and folders
print("Fetching shared drives and folders...")
shared_drives = get_shared_drives(service)
all_folders = []
for drive in shared_drives:
    folders = get_folders_in_drive(service, drive['id'])
    all_folders.extend(folders)

In [None]:
# Classify files
print("Classifying files...")
data = []
for file in tqdm(personal_files[:10]):  # Adjust the slice for more files
    suggestion = classify_file(file['name'], file['mimeType'], all_folders)
    data.append({
        'File Name': file['name'],
        'Current Location': 'Personal Drive',
        'Move Needed': suggestion['move_needed'],
        'Target Folder': suggestion['target_folder'],
        'Explanation': suggestion['explanation'],
        'Folder Path': suggestion['folder_path'],
        'Folder ID': suggestion['folder_id']
    })

df = pd.DataFrame(data)

# Display the classification results
print("\nClassification Results:")
display(df)

In [None]:
from google.colab import sheets
sheet = sheets.InteractiveSheet(df=df)

In [None]:
data = df.to_dict(orient='records')

# Define the pagination variables
page_size = 10  # Number of rows per page
current_page = 0  # Start at the first page

# Dictionary to keep track of the transfer state for each file
transfer_state = {i: None for i in range(len(data))}  # None, "Accepted", "Declined"

# Function to create the UI for each row
def create_row_widgets(index, row):
    accept_button = widgets.Button(description="Accept", button_style='success', layout=widgets.Layout(width='80px'))
    decline_button = widgets.Button(description="Decline", button_style='danger', layout=widgets.Layout(width='80px'))

    # Set button states based on previous actions
    if transfer_state[index] == "Accepted":
        accept_button.disabled = True
        decline_button.disabled = True
    elif transfer_state[index] == "Declined":
        accept_button.disabled = True
        decline_button.disabled = True

    # Define button click handlers
    def on_accept_clicked(b):
        accept_button.disabled = True
        decline_button.disabled = True
        transfer_state[index] = "Accepted"
        print(f"Accepted: {row['File Name']}")

    def on_decline_clicked(b):
        accept_button.disabled = True
        decline_button.disabled = True
        transfer_state[index] = "Declined"
        print(f"Declined: {row['File Name']}")

    accept_button.on_click(on_accept_clicked)
    decline_button.on_click(on_decline_clicked)

    row_widgets = widgets.HBox([
        widgets.Label(row['File Name'], layout=widgets.Layout(width='40%')),
        widgets.Label(row['Current Location'], layout=widgets.Layout(width='20%')),
        widgets.Label(row['Target Folder'] if row['Target Folder'] else "No move needed", layout=widgets.Layout(width='20%')),
        accept_button,
        decline_button
    ])
    return row_widgets

# Function to update the table
def update_table():
    with output:
        clear_output()
        start = current_page * page_size
        end = start + page_size
        page_data = data[start:end]

        # Display each row with buttons
        for i, row in enumerate(page_data, start=start):
            display(create_row_widgets(i, row))

# Functions to handle pagination
def on_next_page(b):
    global current_page
    if (current_page + 1) * page_size < len(data):
        current_page += 1
        update_table()

def on_prev_page(b):
    global current_page
    if current_page > 0:
        current_page -= 1
        update_table()

# Create the pagination buttons
next_button = widgets.Button(description="Next", button_style='primary')
prev_button = widgets.Button(description="Previous", button_style='primary')

# Set button callbacks
next_button.on_click(on_next_page)
prev_button.on_click(on_prev_page)

# Output area for the table
output = widgets.Output()

# Display the UI
display(widgets.HBox([prev_button, next_button]))
display(output)
update_table()