In [None]:
# Notebook title: Get Sharable Links from Google Drive Folder and Subfolders

from google.colab import auth
from google.auth import default
from googleapiclient.discovery import build
from tqdm.notebook import tqdm
import re
import json
from collections import OrderedDict

# Authenticate and create the Drive API service
auth.authenticate_user()
creds, _ = default()
service = build('drive', 'v3', credentials=creds)

In [None]:
def get_folder_id(folder_name):
    """Lấy ID của thư mục theo tên."""
    results = service.files().list(
        q=f"mimeType='application/vnd.google-apps.folder' and name='{folder_name}' and trashed=false",
        fields="files(id, name)").execute()
    items = results.get('files', [])
    return items[0]['id'] if items else None

def get_subfolders(parent_folder_id, pattern):
    """Lấy tất cả thư mục con trong thư mục cha phù hợp với mẫu tên cụ thể."""
    results = service.files().list(
        q=f"'{parent_folder_id}' in parents and mimeType='application/vnd.google-apps.folder' and trashed=false",
        fields="files(id, name)").execute()
    subfolders = results.get('files', [])
    return [folder for folder in subfolders if re.match(pattern, folder['name'])]

def get_files_in_folder(folder_id):
    """Lấy tất cả file trong một thư mục."""
    results = service.files().list(
        q=f"'{folder_id}' in parents and trashed=false",
        fields="files(id, name)").execute()
    return results.get('files', [])

def get_sharable_links(files, lesson, version):
    """Tạo link chia sẻ cho nhiều file với key định dạng mới."""
    links = {}
    for file in files:
        match = re.match(r'frame_(\d+)\.jpg', file['name'])
        if match:
            frame_number = int(match.group(1))
            key = f"L{lesson:02d}_V{version:03d}_{frame_number:05d}.jpg"
            links[key] = f"https://drive.google.com/file/d/{file['id']}/view?usp=sharing"
    return links

def process_folder(folder_name):
    """Xử lý một thư mục cha và tất cả thư mục con phù hợp."""
    folder_id = get_folder_id(folder_name)
    if not folder_id:
        return None

    lesson = int(folder_name[1:3])  # Lấy phần số của L01, L02, ...
    pattern = f"L{lesson:02d}_V\d{{3}}_keyframes_filtered"
    subfolders = get_subfolders(folder_id, pattern)
    print(subfolders)

    all_files = {}
    for subfolder in subfolders:
        version_match = re.search(r'V(\d{3})', subfolder['name'])
        if version_match and version_match.group() == "V001":
            version = int(version_match.group(1))
            print(version)
            files = get_files_in_folder(subfolder['id'])
            print(len(files))
            subfolder_links = get_sharable_links(files, lesson, version)
            all_files.update(subfolder_links)

    return all_files

def custom_sort_key(item):
    """Tạo key tùy chỉnh cho việc sắp xếp."""
    key = item[0]  # item[0] là key của dictionary
    match = re.match(r'L(\d+)_V(\d+)_(\d+)\.jpg', key)
    if match:
        lesson, version, frame = map(int, match.groups())
        return (lesson, version, frame)
    return (0, 0, 0)  # Fallback nếu key không khớp với mẫu

In [None]:

# Main execution
# folder_name = input("Enter the name of the parent folder in your Google Drive: ")
FOLDERS = [f'L{i:02d}_new' for i in range(1, 2)]
all_links = {}

for folder in tqdm(FOLDERS, desc="Đang xử lý các thư mục"):
    links = process_folder(folder)
    if links:
        all_links[folder] = links
        print(f"Đã xử lý {folder}: Tìm thấy {len(links)} thư mục con phù hợp")
    else:
        print(f"Bỏ qua {folder}: Không tìm thấy thư mục con phù hợp hoặc thư mục không tồn tại")

Đang xử lý các thư mục:   0%|          | 0/1 [00:00<?, ?it/s]

[{'id': '1-0JSdGOTAZZ3uN91qrSgse2qdj6cAwTH', 'name': 'L01_V012_keyframes_filtered'}, {'id': '1cvB_Za-T4Dcj3Cr9Bx8IXWrlEF6LcJEB', 'name': 'L01_V003_keyframes_filtered'}, {'id': '181Ir4najr-rE4TDYUaC6aV2Hco-HgB3l', 'name': 'L01_V002_keyframes_filtered'}, {'id': '1TNg18QKJNhRjV5TALoecaqwdF2KRB2fq', 'name': 'L01_V005_keyframes_filtered'}, {'id': '1n3mmqBDrQqmzKG36-Tzoxeh_6rskIfFj', 'name': 'L01_V013_keyframes_filtered'}, {'id': '13VwFdHROUU2fLBMUlv2cEEyFYtKc8hfZ', 'name': 'L01_V017_keyframes_filtered'}, {'id': '1ZiQGpna3hvelG8ImB5uQcuOznLguzrZN', 'name': 'L01_V029_keyframes_filtered'}, {'id': '1mEZgUx47Mifp2MOTDyG45yTv3jbxit3h', 'name': 'L01_V022_keyframes_filtered'}, {'id': '1KLFBjsMiiaiHLe-qWTVGvKTzsPoviuiQ', 'name': 'L01_V016_keyframes_filtered'}, {'id': '1VX7PsSaXEtHDM2zlnqcXL97q5VjkbeLu', 'name': 'L01_V031_keyframes_filtered'}, {'id': '1qg8nDZNYg7VoBFr1qUfAyHKFVTZqX7Zj', 'name': 'L01_V030_keyframes_filtered'}, {'id': '1HxQR35EGrjdULHQ7FswJ_DQMj3IpCkfV', 'name': 'L01_V026_keyframes_fil

In [None]:
for i, (key, value) in enumerate(all_links.items()):
    for (key1, value1) in enumerate(value.items()):
      if 'L01_V001' in value1[0]:
        print(f'{value1[0]}: {value1[1]}')

L01_V001_09670.jpg: https://drive.google.com/file/d/1CyUe0UuEinh8uvf5JCfWp25ud_S40jX9/view?usp=sharing
L01_V001_09648.jpg: https://drive.google.com/file/d/1D0-JgtPUgpssZ_dT7PnC9RNwdayQ0vae/view?usp=sharing
L01_V001_09606.jpg: https://drive.google.com/file/d/1D1YL2A5bxmZH1niVivtp-LsDX35L5Mh3/view?usp=sharing
L01_V001_09586.jpg: https://drive.google.com/file/d/1D83GuMNYJBMep8UjPgOIJy5K2esLIW2t/view?usp=sharing
L01_V001_09551.jpg: https://drive.google.com/file/d/1D9gsiSmzgZprNrl5W8tIoI6nM2Cfs_IK/view?usp=sharing
L01_V001_09499.jpg: https://drive.google.com/file/d/1ClWqbMILg1BU9S7kpzAZ4x7usPAqdADp/view?usp=sharing
L01_V001_09467.jpg: https://drive.google.com/file/d/1CqDjjvnFN3ocXTsR4LjvWP8HniZL2vBr/view?usp=sharing
L01_V001_09466.jpg: https://drive.google.com/file/d/1CuddRAiOthks7EazQC60Ax9vBUmj0i80/view?usp=sharing
L01_V001_09425.jpg: https://drive.google.com/file/d/1CxhaOxb__mDBdtfzoEACMRHDfP347fsQ/view?usp=sharing
L01_V001_09412.jpg: https://drive.google.com/file/d/1CxhkTXE09R9DkmlplodG

In [None]:


sorted_links = OrderedDict(sorted(all_links.items(), key=custom_sort_key))

# Xuất kết quả đã sắp xếp ra file JSON
with open('sharable_links.json', 'w', encoding='utf-8') as f:
    json.dump(sorted_links, f, ensure_ascii=False, indent=2)

print("\nKết quả đã được sắp xếp và lưu vào file 'sharable_links.json'")

# Hiển thị 10 key đầu tiên của file JSON đã sắp xếp
print("\nMười key đầu tiên của file JSON đã sắp xếp:")
for i, (key, value) in enumerate(sorted_links.items()):
    if i >= 10:
        break
    print(f"{key}: {value}")


Kết quả đã được sắp xếp và lưu vào file 'sharable_links.json'

Mười key đầu tiên của file JSON đã sắp xếp:
L01_new: {'L01_V012_09949.jpg': 'https://drive.google.com/file/d/1aMfx_259sZbCVsIcGRelaStaTPlz2Tzt/view?usp=sharing', 'L01_V012_09887.jpg': 'https://drive.google.com/file/d/1aNlubUoyyay57cw-YG9nNRfaL60omdT1/view?usp=sharing', 'L01_V012_09817.jpg': 'https://drive.google.com/file/d/1aNy66LkhOpngPKTiLacTLQDkqrhFEYcV/view?usp=sharing', 'L01_V012_09375.jpg': 'https://drive.google.com/file/d/1aUs_2tPML-FI3le74dsIE6zZdQNU2etF/view?usp=sharing', 'L01_V012_00093.jpg': 'https://drive.google.com/file/d/1aVeVg9P1Y7DLLwVF5vfErEj7x02Sukmi/view?usp=sharing', 'L01_V012_00929.jpg': 'https://drive.google.com/file/d/1aW4o44RiVydlkw9NepnO5hiLLCH7mms5/view?usp=sharing', 'L01_V012_09228.jpg': 'https://drive.google.com/file/d/1aai-9qzHbKsAaer4kFepycDXQyFvzMkP/view?usp=sharing', 'L01_V012_09227.jpg': 'https://drive.google.com/file/d/1ahCN_ETUGueRgbBv8Y3GErrVSgFgvBaq/view?usp=sharing', 'L01_V012_09204.jp

In [None]:
from google.colab import auth
from google.auth import default
from googleapiclient.discovery import build
from tqdm.notebook import tqdm
import re
import json
from collections import OrderedDict

# Authenticate and create the Drive API service
auth.authenticate_user()
creds, _ = default()
service = build('drive', 'v3', credentials=creds)

def get_folder_contents(folder_id):
    results = []
    page_token = None
    while True:
        response = service.files().list(
            q=f"'{folder_id}' in parents",
            spaces='drive',
            fields='nextPageToken, files(id, name, mimeType)',
            pageToken=page_token
        ).execute()
        results.extend(response.get('files', []))
        page_token = response.get('nextPageToken', None)
        if page_token is None:
            break
    return results

def get_sharable_link(file_id):
    try:
        file = service.files().get(fileId=file_id, fields='webViewLink').execute()
        return file.get('webViewLink')
    except Exception as e:
        print(f"Error getting link for file {file_id}: {str(e)}")
        return None

def process_folder(folder_id, folder_name='', depth=0):
    items = get_folder_contents(folder_id)
    results = OrderedDict()

    for item in tqdm(items, desc=f"Processing {'  ' * depth}{folder_name}"):
        if item['mimeType'] == 'application/vnd.google-apps.folder':
            subfolder_results = process_folder(item['id'], item['name'], depth + 1)
            if subfolder_results:
                results[item['name']] = subfolder_results
        else:
            link = get_sharable_link(item['id'])
            if link:
                results[item['name']] = link

    return results

# Main execution
ROOT_FOLDER_ID = '1Z5NAIIrXcItuYwGwo0Xxm1q1za6DlkEK'  # Replace with your folder ID
ROOT_FOLDER_NAME = 'L01_V001_keyframes_filtered'  # Replace with your root folder name

print(f"Starting to process {ROOT_FOLDER_NAME}...")
all_results = process_folder(ROOT_FOLDER_ID, ROOT_FOLDER_NAME)

# Save results to a JSON file
with open('drive_links.json', 'w', encoding='utf-8') as f:
    json.dump(all_results, f, ensure_ascii=False, indent=2)

print("Processing complete. Results saved to 'drive_links.json'")

# Optional: Print the results
print(json.dumps(all_results, ensure_ascii=False, indent=2))

Starting to process L01_V001_keyframes_filtered...


Processing L01_V001_keyframes_filtered:   0%|          | 0/654 [00:00<?, ?it/s]

Processing complete. Results saved to 'drive_links.json'
{
  "frame_9670.jpg": "https://drive.google.com/file/d/1CyUe0UuEinh8uvf5JCfWp25ud_S40jX9/view?usp=drivesdk",
  "frame_9648.jpg": "https://drive.google.com/file/d/1D0-JgtPUgpssZ_dT7PnC9RNwdayQ0vae/view?usp=drivesdk",
  "frame_9606.jpg": "https://drive.google.com/file/d/1D1YL2A5bxmZH1niVivtp-LsDX35L5Mh3/view?usp=drivesdk",
  "frame_9586.jpg": "https://drive.google.com/file/d/1D83GuMNYJBMep8UjPgOIJy5K2esLIW2t/view?usp=drivesdk",
  "frame_9551.jpg": "https://drive.google.com/file/d/1D9gsiSmzgZprNrl5W8tIoI6nM2Cfs_IK/view?usp=drivesdk",
  "frame_9499.jpg": "https://drive.google.com/file/d/1ClWqbMILg1BU9S7kpzAZ4x7usPAqdADp/view?usp=drivesdk",
  "frame_9467.jpg": "https://drive.google.com/file/d/1CqDjjvnFN3ocXTsR4LjvWP8HniZL2vBr/view?usp=drivesdk",
  "frame_9466.jpg": "https://drive.google.com/file/d/1CuddRAiOthks7EazQC60Ax9vBUmj0i80/view?usp=drivesdk",
  "frame_9425.jpg": "https://drive.google.com/file/d/1CxhaOxb__mDBdtfzoEACMRHDfP347fs