In [1]:
import os          # untuk navigasi dan manipulasi file/direktori
import shutil      # untuk memindahkan/menyalin file
import pathlib     # untuk manajemen path yang lebih modern
import pandas as pd    # jika kamu memproses data (misalnya .csv, .xlsx)
from pathlib import Path
import datetime
import re
import matplotlib.pyplot as plt
import numpy as np
from datetime import datetime

In [2]:
# Direktori Project Portfolio
portfolio_dir = Path(r"C:\Users\ASUS\project-portfolio")
portfolio_folder_info = []

In [3]:
# Cari semua file Word (.doc dan .docx) secara rekursif
word_files = list(portfolio_dir.rglob("*.doc")) + list(portfolio_dir.rglob("*.docx"))

In [4]:
# Hapus file satu per satu
for file in word_files:
    try:
        file.unlink()  # Hapus file
        print(f"Hapus: {file}")
    except Exception as e:
        print(f"Gagal menghapus {file}: {e}")

In [3]:
# Folder output utama
output_dir = Path("Output")
output_dir.mkdir(exist_ok=True)

In [6]:
def extract_file_info(file_path, source_label):
    stat = file_path.stat()
    size_kb = stat.st_size / 1024
    size_gb = stat.st_size / (1024 ** 3)
    
    # Buat path yang lebih sederhana: hanya relatif ke root_dir
    try:
        simple_path = file_path.relative_to(file_path.anchor)
    except ValueError:
        simple_path = file_path.name  # fallback jika error

    return {
        "FileName": file_path.stem,
        "FileExt": file_path.suffix.lower(),
        "DateModified": datetime.fromtimestamp(stat.st_mtime),
        "Size_KB": round(size_kb, 2),
        "Size_GB": round(size_gb, 4),
        "Source": source_label,
        "Path": str(file_path),
        "SimplePath": str(simple_path),
        "ParentPath": str(file_path.parent)
    }

In [7]:
# Loop semua file dalam Downloads (rekursif)
for file in portfolio_dir.rglob("*"):
    if file.is_file():
        portfolio_folder_info.append(extract_file_info(file, "project-portfolio"))

In [8]:
# Konversi ke DataFrame
df_portfolio = pd.DataFrame(portfolio_folder_info)
df_portfolio.sort_values(by="DateModified", ascending=False, inplace=True)

In [9]:
df_portfolio

Unnamed: 0,FileName,FileExt,DateModified,Size_KB,Size_GB,Source,Path,SimplePath,ParentPath
4,management-portfolio,.ipynb,2025-07-31 14:56:44.874859,6.17,0.0000,project-portfolio,C:\Users\ASUS\project-portfolio\management-por...,Users\ASUS\project-portfolio\management-portfo...,C:\Users\ASUS\project-portfolio
10,FETCH_HEAD,,2025-07-31 14:54:43.618649,0.00,0.0000,project-portfolio,C:\Users\ASUS\project-portfolio\.git\FETCH_HEAD,Users\ASUS\project-portfolio\.git\FETCH_HEAD,C:\Users\ASUS\project-portfolio\.git
103,HEAD,,2025-07-31 14:48:35.062087,0.79,0.0000,project-portfolio,C:\Users\ASUS\project-portfolio\.git\logs\HEAD,Users\ASUS\project-portfolio\.git\logs\HEAD,C:\Users\ASUS\project-portfolio\.git\logs
104,main,,2025-07-31 14:48:35.062087,0.79,0.0000,project-portfolio,C:\Users\ASUS\project-portfolio\.git\logs\refs...,Users\ASUS\project-portfolio\.git\logs\refs\he...,C:\Users\ASUS\project-portfolio\.git\logs\refs...
603,main,,2025-07-31 14:48:35.062087,0.04,0.0000,project-portfolio,C:\Users\ASUS\project-portfolio\.git\refs\head...,Users\ASUS\project-portfolio\.git\refs\heads\main,C:\Users\ASUS\project-portfolio\.git\refs\heads
...,...,...,...,...,...,...,...,...,...
1509,7_Penggunaan Lahan,.pdf,2014-01-09 21:18:40.000000,2045.92,0.0020,project-portfolio,C:\Users\ASUS\project-portfolio\projects\patgt...,Users\ASUS\project-portfolio\projects\patgtl-2...,C:\Users\ASUS\project-portfolio\projects\patgt...
1506,4_Gunungapi,.pdf,2014-01-09 21:18:32.000000,1533.31,0.0015,project-portfolio,C:\Users\ASUS\project-portfolio\projects\patgt...,Users\ASUS\project-portfolio\projects\patgtl-2...,C:\Users\ASUS\project-portfolio\projects\patgt...
1503,1_Kelas_Lereng,.pdf,2014-01-09 21:18:28.000000,2025.40,0.0019,project-portfolio,C:\Users\ASUS\project-portfolio\projects\patgt...,Users\ASUS\project-portfolio\projects\patgtl-2...,C:\Users\ASUS\project-portfolio\projects\patgt...
1507,5_Pengamatan,.pdf,2014-01-09 21:18:24.000000,1617.38,0.0015,project-portfolio,C:\Users\ASUS\project-portfolio\projects\patgt...,Users\ASUS\project-portfolio\projects\patgtl-2...,C:\Users\ASUS\project-portfolio\projects\patgt...


In [12]:
duplicates = df_portfolio[df_portfolio.duplicated(subset=["FileName", "FileExt", "Size_KB", "Path"])]
print(duplicates)

Empty DataFrame
Columns: [FileName, FileExt, DateModified, Size_KB, Size_GB, Source, Path, SimplePath, ParentPath]
Index: []


In [14]:
import difflib

dupes = []
filenames = df_portfolio["FileName"].tolist()

for i, name in enumerate(filenames):
    matches = difflib.get_close_matches(name, filenames, n=5, cutoff=0.9)
    if len(matches) > 1:
        dupes.append((name, matches))

for name, matches in dupes:
    print(f"File: {name} → Mirip dengan: {matches}")


File: HEAD → Mirip dengan: ['HEAD', 'HEAD']
File: main → Mirip dengan: ['main', 'main']
File: main → Mirip dengan: ['main', 'main']
File: metadata → Mirip dengan: ['metadata', 'metadata', 'metadata', 'metadata', 'metadata']
File: README → Mirip dengan: ['README', 'README', 'README', 'README', 'README']
File: README → Mirip dengan: ['README', 'README', 'README', 'README', 'README']
File: README → Mirip dengan: ['README', 'README', 'README', 'README', 'README']
File: README → Mirip dengan: ['README', 'README', 'README', 'README', 'README']
File: HEAD → Mirip dengan: ['HEAD', 'HEAD']
File: preprocessing_utils → Mirip dengan: ['preprocessing_utils', 'preprocessing_utils']
File: preprocessing_utils → Mirip dengan: ['preprocessing_utils', 'preprocessing_utils']
File: folder-documents-metadata → Mirip dengan: ['folder-documents-metadata', 'folder-documents-metadata']
File: folder-documents-metadata → Mirip dengan: ['folder-documents-metadata', 'folder-documents-metadata']
File: folder-downloa

In [16]:
df_portfolio["DateModified"] = pd.to_datetime(df_portfolio["DateModified"])

In [17]:
dupe_df = df_portfolio[df_portfolio.duplicated(subset=["FileName", "Size_KB"], keep=False)]

In [18]:
latest_files = dupe_df.loc[
    dupe_df.groupby(["FileName", "Size_KB"])["DateModified"].idxmax()
].reset_index(drop=True)

In [19]:
latest_files

Unnamed: 0,FileName,FileExt,DateModified,Size_KB,Size_GB,Source,Path,SimplePath,ParentPath
0,AreaDIY,.shx,2022-05-20 05:19:12,0.11,0.0000,project-portfolio,C:\Users\ASUS\project-portfolio\projects\patgt...,Users\ASUS\project-portfolio\projects\patgtl-2...,C:\Users\ASUS\project-portfolio\projects\patgt...
1,Frekuensi Bencana di Indonesia,.png,2024-02-08 09:08:18,109.31,0.0001,project-portfolio,C:\Users\ASUS\project-portfolio\projects\lnob-...,Users\ASUS\project-portfolio\projects\lnob-202...,C:\Users\ASUS\project-portfolio\projects\lnob-...
2,Geologi Lingkungan,.pdf,2022-05-20 05:19:14,1151.59,0.0011,project-portfolio,C:\Users\ASUS\project-portfolio\projects\patgt...,Users\ASUS\project-portfolio\projects\patgtl-2...,C:\Users\ASUS\project-portfolio\projects\patgt...
3,IKP in Kabupaten Jayawijaya,.png,2024-02-08 09:09:00,103.67,0.0001,project-portfolio,C:\Users\ASUS\project-portfolio\projects\lnob-...,Users\ASUS\project-portfolio\projects\lnob-202...,C:\Users\ASUS\project-portfolio\projects\lnob-...
4,Kekeringan,.jpg,2024-02-08 09:08:42,124.42,0.0001,project-portfolio,C:\Users\ASUS\project-portfolio\projects\lnob-...,Users\ASUS\project-portfolio\projects\lnob-202...,C:\Users\ASUS\project-portfolio\projects\lnob-...
...,...,...,...,...,...,...,...,...,...
92,rekomendasi titik Kai dan Tual,.prj,2022-07-04 15:02:50,0.40,0.0000,project-portfolio,C:\Users\ASUS\project-portfolio\projects\siste...,Users\ASUS\project-portfolio\projects\sistem-l...,C:\Users\ASUS\project-portfolio\projects\siste...
93,sta,.adf,2022-05-20 05:19:26,0.03,0.0000,project-portfolio,C:\Users\ASUS\project-portfolio\projects\patgt...,Users\ASUS\project-portfolio\projects\patgtl-2...,C:\Users\ASUS\project-portfolio\projects\patgt...
94,stk,.adf,2022-05-20 05:19:24,0.21,0.0000,project-portfolio,C:\Users\ASUS\project-portfolio\projects\patgt...,Users\ASUS\project-portfolio\projects\patgtl-2...,C:\Users\ASUS\project-portfolio\projects\patgt...
95,w001001x,.adf,2022-05-20 05:19:24,15.84,0.0000,project-portfolio,C:\Users\ASUS\project-portfolio\projects\patgt...,Users\ASUS\project-portfolio\projects\patgtl-2...,C:\Users\ASUS\project-portfolio\projects\patgt...
