In [None]:
from pymongo import MongoClient, UpdateOne

# --- MongoDB connection ---
MONGO_URI = "Mongo URI"  # <-- fill in your Mongo URI
DB_NAME = "copyright"
COL_NAME = "testing_writein"

client = MongoClient(MONGO_URI)
col = client[DB_NAME][COL_NAME]

def determine_court_level(court_name: str) -> str:
    if not court_name:
        return "Unknown"

    court_name_lower = str(court_name).lower()

    # District
    if "district" in court_name_lower:
        return "District"

    # Circuit (contains "circuit" or ordinal words)
    ordinal_words = [
        "first", "second", "third", "fourth", "fifth", "sixth",
        "seventh", "eighth", "ninth", "tenth", "eleventh", "twelfth"
    ]
    if "circuit" in court_name_lower or any(w in court_name_lower for w in ordinal_words):
        return "Circuit"

    # Supreme (English/Spanish)
    if "supreme" in court_name_lower or "suprema" in court_name_lower:
        return "Supreme"

    # Treat Superior Court as District (your rule)
    if "superior" in court_name_lower:
        return "District"

    return "Unknown"


# --- Read -> compute -> write back (bulk) ---
batch_size = 1000
ops = []
updated = 0
skipped = 0

# Only pull fields we need for efficiency
cursor = col.find({}, {"Court": 1})

for doc in cursor:
    court = doc.get("Court", None)
    level = determine_court_level(court)

    # If you want to skip documents with missing Court, do this:
    # if not court:
    #     skipped += 1
    #     continue

    ops.append(
        UpdateOne(
            {"_id": doc["_id"]},
            {"$set": {"Court Level": level}}
        )
    )

    if len(ops) >= batch_size:
        res = col.bulk_write(ops, ordered=False)
        updated += res.modified_count
        ops = []

# flush remaining ops
if ops:
    res = col.bulk_write(ops, ordered=False)
    updated += res.modified_count

print(f"Done. updated={updated}, skipped={skipped}")


Done. updated=2164, skipped=0


In [None]:
# import pandas as pd

# # 假設你已經載入了 CSV 檔案，並且有一個名為 'Court' 的欄位
# df = pd.read_csv('circuit.csv')

# # 新增一個函數來判斷法院層級
# def determine_court_level(court_name):
#     court_name_lower = court_name.lower()
    
#     # 檢查是否包含 'district'
#     if 'district' in court_name_lower:
#         return 'District'
    
#     # 檢查是否包含 'circuit' 或 'first, second...' 這類詞
#     elif 'circuit' in court_name_lower or any(num in court_name_lower for num in [
#         'first', 'second', 'third', 'fourth', 'fifth', 'sixth', 'seventh', 
#         'eighth', 'ninth', 'tenth', 'eleventh', 'twelfth']):
#         return 'Circuit'
    
#     # 檢查是否包含 'supreme' 或 西班牙語 'suprema'
#     elif 'supreme' in court_name_lower or 'suprema' in court_name_lower:
#         return 'Supreme'
    
#     # 檢查是否為 Superior Court 並將其歸類為 District
#     elif 'superior' in court_name_lower:
#         return 'District'
    
#     # 如果不符合上述條件則標記為未知
#     else:
#         return 'Unknown'

# # 新增"法院層級"欄位，並根據函數的結果填入資料
# df['Court Level'] = df['Court'].apply(determine_court_level)

# # 檢查結果
# print(df.head())

# # 如果需要儲存結果到新的 CSV 檔案
# df.to_csv('circuit.csv', index=False)


            pdf                                              Title  \
0  data\cp1.pdf                  Washingtonian Pub. Co. v. Pearson   
1  data\cp1.pdf                     Kepner-Tregoe, Inc. v. Carabio   
2  data\cp1.pdf  Dealer Adver. Dev., Inc. v. Barbara Allan Fin....   
3  data\cp1.pdf  Original Appalachian Artworks, Inc. v. Toy Lof...   
4  data\cp1.pdf                                  Streeter v. Rolfe   

                                               Court  \
0  United States Court of Appeals for the Distric...   
1  United States District Court for the Eastern D...   
2  United States District Court for the Western D...   
3  United States District Court for the Northern ...   
4  United States District Court for the Western D...   

                                       Date                     No  page  \
0   Argued Dec. 15, 1943 ; January 17, 1944               No. 8473    22   
1  April 20, 1979; As amended July 23, 1979            No. 8-71025    26   
2             