In [None]:
# =================== SET-UP ===================
from google.colab import drive
import os, re, shutil
from collections import defaultdict

# 1️⃣  Mount Drive (run & grant permission)
drive.mount('/content/drive')




Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:

# 2️⃣  CHANGE THIS to the folder holding your videos:
FOLDER = '/content/drive/MyDrive/NN/Yoga'

In [None]:
def choose_primary(candidates):
    for name in candidates:
        if not name.startswith('Copy of') and not name.startswith('of '):
            return name
    return min(candidates, key=len)

In [None]:
# Pattern to capture the number after 'vid'  (e.g. vid12, Vid003, VID7 …)
vid_pattern = re.compile(r'vid(\d+)', re.IGNORECASE)



In [None]:
# =================== MAIN LOGIC ===================
all_mp4 = [f for f in os.listdir(FOLDER) if f.lower().endswith('.mp4')]

# --- 1. Group files by their vid-number -----------------------------
groups = defaultdict(list)
for fname in all_mp4:
    m = vid_pattern.search(fname)
    if m:
        vid_num = m.group(1).lstrip('0') or '0'      # normalise leading zeros
        groups[vid_num].append(fname)
    else:
        print(f"[WARN] Skipping file with no 'vid##' number: {fname}")



In [None]:
# --- 2. Delete duplicates, retaining one per vid-number -------------
deleted = []
for vid_num, files in groups.items():
    if len(files) > 1:
        keep = choose_primary(files)
        for f in files:
            if f != keep:
                os.remove(os.path.join(FOLDER, f))
                deleted.append(f)
        print(f"✓ vid{vid_num}: kept '{keep}', deleted {len(files)-1} duplicates")



In [None]:
# --- 3. Rename the remaining files ----------------------------------
rename_map = {}
for fname in os.listdir(FOLDER):
    if not fname.lower().endswith('.mp4'):
        continue
    cleaned = fname.replace('Copy of Copy ', '').replace('Copy of ', '')
    if cleaned != fname:
        src = os.path.join(FOLDER, fname)
        dst = os.path.join(FOLDER, cleaned)
        # Avoid accidental overwrite
        if not os.path.exists(dst):
            os.rename(src, dst)
            rename_map[fname] = cleaned
        else:
            print(f"[SKIP] Target name already exists: {cleaned}")



In [None]:
# =================== REPORT ===================
print("\n--- Summary ---")
print(f"Deleted {len(deleted)} duplicate file(s).")
if rename_map:
    print("Renamed:")
    for old, new in rename_map.items():
        print(f"  {old}  →  {new}")
else:
    print("No files needed renaming.")


--- Summary ---
Deleted 0 duplicate file(s).
No files needed renaming.


In [None]:
# from google.colab import drive
# import os, re, shutil
# from collections import defaultdict

# # 1️⃣ Mount your Google Drive
# drive.mount('/content/drive')

# # 2️⃣ CHANGE THIS to your target folder
# FOLDER = '/content/drive/MyDrive/Video Class/Data/test_videos'

# =================================================
# Helper: choose best filename among duplicates
def choose_primary(candidates):
    for name in candidates:
        if not name.startswith('Copy of') and not name.startswith('of '):
            return name
    return min(candidates, key=len)

# Pattern to extract the number after "vid"
vid_pattern = re.compile(r'vid(\d+)', re.IGNORECASE)

# --- 1. Scan MP4 files in the folder
all_mp4 = [f for f in os.listdir(FOLDER) if f.lower().endswith('.mp4')]

# --- 2. Group files by vid number
groups = defaultdict(list)
for fname in all_mp4:
    m = vid_pattern.search(fname)
    if m:
        vid_num = m.group(1).lstrip('0') or '0'
        groups[vid_num].append(fname)
    else:
        print(f"[WARN] Skipping file with no 'vid##' number: {fname}")

# --- 3. Delete duplicates
deleted = []
for vid_num, files in groups.items():
    if len(files) > 1:
        keep = choose_primary(files)
        for f in files:
            if f != keep:
                os.remove(os.path.join(FOLDER, f))
                deleted.append(f)
        print(f"✓ vid{vid_num}: kept '{keep}', deleted {len(files)-1} duplicates")

# --- 4. Rename files by removing unwanted prefixes
rename_map = {}
for fname in os.listdir(FOLDER):
    if not fname.lower().endswith('.mp4'):
        continue

    cleaned = fname
    # Remove all 3 possible patterns
    if cleaned.startswith("Copy of Copy "):
        cleaned = cleaned.replace("Copy of Copy ", "", 1)
    if cleaned.startswith("Copy of "):
        cleaned = cleaned.replace("Copy of ", "", 1)
    if cleaned.startswith("of "):
        cleaned = cleaned.replace("of ", "", 1)

    # Perform rename if needed
    if cleaned != fname:
        src = os.path.join(FOLDER, fname)
        dst = os.path.join(FOLDER, cleaned)
        if not os.path.exists(dst):
            os.rename(src, dst)
            rename_map[fname] = cleaned
        else:
            print(f"[SKIP] Cannot rename: '{cleaned}' already exists.")

# --- 5. Summary Report
print("\n--- Summary ---")
print(f"Deleted {len(deleted)} duplicate file(s).")
if rename_map:
    print("Renamed:")
    for old, new in rename_map.items():
        print(f"  {old}  →  {new}")
else:
    print("No files needed renaming.")



--- Summary ---
Deleted 0 duplicate file(s).
Renamed:
  of VID20250127145820.mp4  →  VID20250127145820.mp4
  of VID20250127145609.mp4  →  VID20250127145609.mp4
  of VID20250127144930.mp4  →  VID20250127144930.mp4
  of VID20250127145657.mp4  →  VID20250127145657.mp4
  of VID20250127144823.mp4  →  VID20250127144823.mp4
  of VID20250127145847.mp4  →  VID20250127145847.mp4
  of VID20250127145553.mp4  →  VID20250127145553.mp4
  of VID20250127145909.mp4  →  VID20250127145909.mp4
  of VID20250127145632.mp4  →  VID20250127145632.mp4
  of VID20250127144731.mp4  →  VID20250127144731.mp4
  of VID20250127142801.mp4  →  VID20250127142801.mp4
  of VID20250127144402.mp4  →  VID20250127144402.mp4
  of VID20250127144541.mp4  →  VID20250127144541.mp4
  of VID20250127144417.mp4  →  VID20250127144417.mp4
  of VID20250127144433.mp4  →  VID20250127144433.mp4
  of VID20250127151738.mp4  →  VID20250127151738.mp4
  of VID20250127152056.mp4  →  VID20250127152056.mp4
  of VID20250127151920.mp4  →  VID2025012715

In [None]:
# --- 6. Delete exact duplicate filenames (filename-level check only)
seen = set()
duplicate_name_deletions = []

for fname in sorted(os.listdir(FOLDER)):  # Sorted to preserve order
    if not fname.lower().endswith('.mp4'):
        continue

    if fname in seen:
        os.remove(os.path.join(FOLDER, fname))
        duplicate_name_deletions.append(fname)
    else:
        seen.add(fname)

# --- Report
if duplicate_name_deletions:
    print("\n--- Duplicate Filename Cleanup ---")
    print(f"Deleted {len(duplicate_name_deletions)} duplicate filename(s):")
    for name in duplicate_name_deletions:
        print(f"  {name}")
else:
    print("\nNo duplicate filenames found.")



No duplicate filenames found.
