In [None]:
# Install the necessary library
!pip install huggingface_hub -q

# Log in using the securely stored token
from huggingface_hub import login
from google.colab import userdata

try:
  login(token=userdata.get('HF_TOKEN'))
  print("✅ Successfully logged in to Hugging Face!")
except userdata.SecretNotFoundError:
  print("❌ Secret 'HF_TOKEN' not found. Please add it to Colab's Secrets manager.")
except Exception as e:
  print(f"An error occurred during login: {e}")

In [None]:
import os
from huggingface_hub import HfApi
from tqdm.notebook import tqdm
import shutil # Use shutil for copying

# --- 1. SETUP YOUR PATHS AND REPO INFO ---
google_drive_path = "/content/drive/My Drive/FineWeb-Edu-Tokens"
repo_id = "ShallowU/FineWeb-Edu-10B-Tokens-NPY" # Using your repo_id from the log

# --- 2. INITIALIZE THE API ---
api = HfApi()
print(f"Resuming upload to repository: {repo_id}")

# --- 3. GET THE LIST OF FILES AND UPLOAD THEM ---
try:
    all_files_in_drive = sorted([f for f in os.listdir(google_drive_path) if f.endswith(".npy")])
    print(f"Found {len(all_files_in_drive)} total .npy files in Google Drive.")

    # Get list of files already in the Hub repo to skip them
    files_in_repo = set(api.list_repo_files(repo_id=repo_id, repo_type="dataset"))
    print(f"Found {len(files_in_repo)} files already on Hugging Face Hub. Skipping them.")

    files_to_upload = [f for f in all_files_in_drive if f not in files_in_repo]
    print(f"--> Starting upload for the remaining {len(files_to_upload)} files.")

    if not files_to_upload:
        print("✅ No new files to upload. Everything is already in sync!")
    else:
        # Loop through the remaining files and upload each one
        for filename in tqdm(files_to_upload, desc="Uploading remaining files"):
            drive_file_path = os.path.join(google_drive_path, filename)
            colab_local_path = os.path.join("/content/", filename) # Temp path on Colab's local disk

            try:
                # 1. Copy file from Drive to Colab's local storage (fast and stable)
                print(f"\nCopying {filename} to Colab local disk...")
                shutil.copy(drive_file_path, colab_local_path)

                # 2. Upload the local file to Hugging Face
                print(f"Uploading {filename} to Hugging Face...")
                api.upload_file(
                    path_or_fileobj=colab_local_path,
                    path_in_repo=filename,
                    repo_id=repo_id,
                    repo_type="dataset"
                )

            finally:
                # 3. Clean up by deleting the local copy to save space
                if os.path.exists(colab_local_path):
                    os.remove(colab_local_path)
                    print(f"Cleaned up local file: {filename}")

        print(f"\n✅ All remaining files successfully uploaded to {repo_id}!")

except FileNotFoundError:
    print(f"❌ ERROR: The directory was not found: '{google_drive_path}'")
except Exception as e:
    print(f"❌ An error occurred during upload: {e}")