# Google Drive Sync for Kaggle/Colab

This notebook provides utilities to sync files with Google Drive when running on Kaggle or Google Colab.


In [1]:
# Install required packages
%pip install -q pydrive2 google-api-python-client google-auth-httplib2 google-auth-oauthlib

import os
import sys

# Detect if running on Kaggle or Colab
IS_KAGGLE = 'KAGGLE_KERNEL_RUN_TYPE' in os.environ
IS_COLAB = 'COLAB_GPU' in os.environ or 'google.colab' in sys.modules

print(f"Running on: {'Kaggle' if IS_KAGGLE else 'Colab' if IS_COLAB else 'Local'}")


Running on: Colab


## Method 1: Google Colab - Direct Mount


In [2]:
# For Google Colab - Mount Google Drive
if IS_COLAB:
    from google.colab import drive
    
    # Mount Google Drive
    drive.mount('/content/drive')
    
    # Set drive path
    DRIVE_PATH = '/content/drive/MyDrive'
    print(f"Google Drive mounted at: {DRIVE_PATH}")
    
    # Create directory for saving models if it doesn't exist
    MODEL_DIR = os.path.join(DRIVE_PATH, 'emoji_classification_models')
    os.makedirs(MODEL_DIR, exist_ok=True)
    print(f"Model directory: {MODEL_DIR}")
else:
    print("Not running on Colab - skipping drive mount")


KeyboardInterrupt: 

## Method 2: Kaggle - Google Drive API


In [None]:
# For Kaggle - Use Google Drive API
# NOTE: For Kaggle, you'll need to set up Google Drive API credentials
# 1. Go to Google Cloud Console and create a project
# 2. Enable Google Drive API
# 3. Create OAuth 2.0 credentials
# 4. Download client_secrets.json and upload it to Kaggle as a dataset or secret
# 5. Alternatively, use service account credentials for automated access

if IS_KAGGLE:
    from pydrive2.auth import GoogleAuth
    from pydrive2.drive import GoogleDrive
    
    # Authenticate and create the PyDrive client
    gauth = GoogleAuth()
    
    # For Kaggle, you may need to use service account or pre-authenticated credentials
    # If you have client_secrets.json in your Kaggle dataset, it will be loaded automatically
    # Otherwise, you'll need to authenticate manually (which may not work in Kaggle kernels)
    
    # Try to load saved client credentials
    if os.path.exists("credentials.json"):
        gauth.LoadCredentialsFile("credentials.json")
    
    if gauth.credentials is None:
        # For Kaggle, you might need to use service account authentication
        # or pre-authenticate and save credentials
        print("Authenticating with Google Drive...")
        print("NOTE: In Kaggle, LocalWebserverAuth may not work.")
        print("Consider using service account credentials or pre-authenticated tokens.")
        try:
            gauth.LocalWebserverAuth()
        except Exception as e:
            print(f"Authentication failed: {e}")
            print("Please set up credentials manually or use service account authentication")
    elif gauth.access_token_expired:
        # Refresh them if expired
        gauth.Refresh()
    else:
        # Initialize the saved creds
        gauth.Authorize()
    
    # Save the credentials for the next run
    if gauth.credentials:
        gauth.SaveCredentialsFile("credentials.json")
        drive = GoogleDrive(gauth)
        print("Google Drive API authenticated successfully!")
    else:
        print("Warning: Google Drive authentication not completed")
        drive = None
    
    # Function to upload file to Google Drive
    def upload_to_drive(file_path, folder_id=None, file_name=None):
        """
        Upload a file to Google Drive
        
        Args:
            file_path: Path to the file to upload
            folder_id: Optional Google Drive folder ID
            file_name: Optional custom file name
        """
        if drive is None:
            print("Google Drive not authenticated. Cannot upload.")
            return None
        
        if not os.path.exists(file_path):
            print(f"File not found: {file_path}")
            return None
        
        file_name = file_name or os.path.basename(file_path)
        
        file_metadata = {'title': file_name}
        if folder_id:
            file_metadata['parents'] = [{'id': folder_id}]
        
        file_drive = drive.CreateFile(file_metadata)
        file_drive.SetContentFile(file_path)
        file_drive.Upload()
        
        print(f"Uploaded {file_name} to Google Drive (ID: {file_drive['id']})")
        return file_drive['id']
    
    # Function to download file from Google Drive
    def download_from_drive(file_id, save_path):
        """
        Download a file from Google Drive
        
        Args:
            file_id: Google Drive file ID
            save_path: Path to save the downloaded file
        """
        if drive is None:
            print("Google Drive not authenticated. Cannot download.")
            return None
        
        file_drive = drive.CreateFile({'id': file_id})
        file_drive.GetContentFile(save_path)
        print(f"Downloaded file to: {save_path}")
        return save_path
    
    # Function to list files in a folder
    def list_drive_files(folder_id=None, query=None):
        """
        List files in Google Drive
        
        Args:
            folder_id: Optional folder ID to list files from
            query: Optional query string for filtering
        """
        if drive is None:
            print("Google Drive not authenticated. Cannot list files.")
            return []
        
        if folder_id:
            query = f"'{folder_id}' in parents and trashed=false"
        elif query:
            query = f"{query} and trashed=false"
        else:
            query = "trashed=false"
        
        file_list = drive.ListFile({'q': query}).GetList()
        return file_list
    
    print("Google Drive API functions loaded!")
else:
    print("Not running on Kaggle - Google Drive API not needed")


## Universal Functions (Work on both Kaggle and Colab)


In [None]:
def save_model_to_drive(model_path, model_name="best_swin_emoji_model.pt"):
    """
    Save model to Google Drive (works on both Colab and Kaggle)
    
    Args:
        model_path: Path to the model file to save
        model_name: Name for the saved model
    """
    if not os.path.exists(model_path):
        print(f"Model file not found: {model_path}")
        return None
    
    if IS_COLAB:
        # Colab: Copy to mounted drive
        drive_model_path = os.path.join(MODEL_DIR, model_name)
        import shutil
        shutil.copy2(model_path, drive_model_path)
        print(f"Model saved to Google Drive: {drive_model_path}")
        return drive_model_path
    
    elif IS_KAGGLE:
        # Kaggle: Upload via API
        # You can create a folder in Google Drive and get its ID
        # For now, uploading to root. Replace with your folder ID if needed.
        FOLDER_ID = None  # Set this to your Google Drive folder ID if you want to organize files
        
        file_id = upload_to_drive(model_path, folder_id=FOLDER_ID, file_name=model_name)
        return file_id
    
    else:
        print("Not running on Colab or Kaggle - cannot save to Google Drive")
        return None

def load_model_from_drive(model_name="best_swin_emoji_model.pt", local_path=None):
    """
    Load model from Google Drive (works on both Colab and Kaggle)
    
    Args:
        model_name: Name of the model file in Google Drive
        local_path: Local path to save/load the model
    """
    local_path = local_path or model_name
    
    if IS_COLAB:
        # Colab: Copy from mounted drive
        drive_model_path = os.path.join(MODEL_DIR, model_name)
        if os.path.exists(drive_model_path):
            import shutil
            shutil.copy2(drive_model_path, local_path)
            print(f"Model loaded from Google Drive: {drive_model_path}")
            return local_path
        else:
            print(f"Model not found in Google Drive: {drive_model_path}")
            return None
    
    elif IS_KAGGLE:
        # Kaggle: Download via API
        # You need to provide the file ID. You can get it from list_drive_files()
        # For now, this is a placeholder - you'll need to set FILE_ID
        FILE_ID = None  # Set this to your model file ID
        
        if FILE_ID:
            download_from_drive(FILE_ID, local_path)
            return local_path
        else:
            print("Please set FILE_ID to download model from Google Drive")
            print("You can use list_drive_files() to find the file ID")
            return None
    
    else:
        print("Not running on Colab or Kaggle - cannot load from Google Drive")
        return None

print("Universal functions loaded!")


## Usage Examples


In [None]:
# Example 1: Save model to Google Drive
# After training, save your model:
# save_model_to_drive('best_swin_emoji_model.pt', 'best_swin_emoji_model.pt')

# Example 2: List files in Google Drive (Kaggle only)
if IS_KAGGLE:
    # List all files in root
    # files = list_drive_files()
    # for file in files:
    #     print(f"File: {file['title']} (ID: {file['id']})")
    pass

# Example 3: Load model from Google Drive
# model_path = load_model_from_drive('best_swin_emoji_model.pt', 'best_swin_emoji_model.pt')

print("Examples ready! Uncomment the code above to use.")


## Integration with Training Notebook

To use this in your training notebook, add this cell after training completes:

```python
# At the end of training, save to Google Drive
if os.path.exists('best_swin_emoji_model.pt'):
    save_model_to_drive('best_swin_emoji_model.pt', 'best_swin_emoji_model.pt')
    print("Model saved to Google Drive!")
```


In [None]:
# Quick setup check
print("\n=== Google Drive Sync Setup ===")
print(f"Platform: {'Kaggle' if IS_KAGGLE else 'Colab' if IS_COLAB else 'Local'}")
if IS_COLAB:
    print(f"Drive mounted: {os.path.exists('/content/drive')}")
    if 'MODEL_DIR' in globals():
        print(f"Model directory: {MODEL_DIR}")
        print(f"Model directory exists: {os.path.exists(MODEL_DIR)}")
elif IS_KAGGLE:
    print("Google Drive API: Ready")
    print("Note: You'll need to authenticate on first run")
else:
    print("Not running on supported platform")
print("=" * 30)


## Alternative: Using Kaggle Datasets (Recommended for Kaggle)

For Kaggle, an easier alternative is to save your model as a Kaggle Dataset output:


In [None]:
# Alternative method for Kaggle - Save as Kaggle Dataset Output
# This is often easier than Google Drive API for Kaggle notebooks

if IS_KAGGLE:
    import kaggle
    
    def save_to_kaggle_dataset(file_path, dataset_name="emoji-classification-model", version_notes="Model checkpoint"):
        """
        Save file as Kaggle Dataset output (Kaggle only)
        
        Args:
            file_path: Path to file to save
            dataset_name: Name for the dataset
            version_notes: Notes for this version
        """
        if not os.path.exists(file_path):
            print(f"File not found: {file_path}")
            return None
        
        # Create output directory
        output_dir = f"/kaggle/working/{dataset_name}"
        os.makedirs(output_dir, exist_ok=True)
        
        # Copy file to output directory
        import shutil
        output_path = os.path.join(output_dir, os.path.basename(file_path))
        shutil.copy2(file_path, output_path)
        
        print(f"File saved to: {output_path}")
        print(f"After notebook run, this will be available as a Kaggle dataset: {dataset_name}")
        print("You can download it from the notebook's output section or from your datasets page.")
        return output_path
    
    print("Kaggle dataset save function loaded!")
    print("Usage: save_to_kaggle_dataset('best_swin_emoji_model.pt')")
else:
    print("Not running on Kaggle - Kaggle dataset method not available")
