In [4]:
import requests
import os
import zipfile
from concurrent.futures import ThreadPoolExecutor
from io import BytesIO

# 設定下載目錄
download_dir = r'C:\Users\User\Desktop\學\大學\資料探勘\plvr_land_csv'
if not os.path.exists(download_dir):
    os.makedirs(download_dir)

# 定義年份與季節
years = [101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113]
seasons = [1, 2, 3, 4]

# 定義函數：下載並直接解壓縮檔案
def download_and_extract_in_memory(year, season):
    url = f'https://plvr.land.moi.gov.tw//DownloadSeason?season={year}S{season}&type=zip&fileName=lvr_landcsv.zip'
    try:
        print(f"Downloading {year}Q{season}...")
        response = requests.get(url, timeout=10)
        response.raise_for_status()
        
        # 使用 BytesIO 儲存檔案流，避免存儲到硬碟
        with zipfile.ZipFile(BytesIO(response.content)) as zfile:
            extract_dir = os.path.join(download_dir, f"{year}Q{season}")
            if not os.path.exists(extract_dir):
                os.makedirs(extract_dir)
            
            # 解壓縮到目標資料夾
            zfile.extractall(extract_dir)
            print(f"{year}Q{season} extracted to {extract_dir}")
            
            # 檔案重命名
            for file_name in os.listdir(extract_dir):
                old_path = os.path.join(extract_dir, file_name)
                new_file_name = f"{year}Q{season}_{file_name}"
                new_path = os.path.join(extract_dir, new_file_name)
                os.rename(old_path, new_path)
                print(f"Renamed {file_name} to {new_file_name}")

    except requests.exceptions.RequestException as e:
        print(f"Error downloading {year}Q{season}: {e}")
    except zipfile.BadZipFile:
        print(f"Error: {year}Q{season} is not a valid ZIP file.")

# 使用 ThreadPoolExecutor 提高並行數量
max_workers = 8  # 根據硬體性能調整
with ThreadPoolExecutor(max_workers=max_workers) as executor:
    for year in years:
        for season in seasons:
            executor.submit(download_and_extract_in_memory, year, season)


Downloading 101Q1...
Downloading 101Q2...
Downloading 101Q3...
Downloading 101Q4...
Downloading 102Q1...
Downloading 102Q2...
Downloading 102Q3...
Downloading 102Q4...
101Q3 extracted to C:\Users\User\Desktop\學\大學\資料探勘\plvr_land_csv\101Q3
Renamed build.ttt to 101Q3_build.ttt
Renamed b_lvr_land_a.csv to 101Q3_b_lvr_land_a.csv
Renamed b_lvr_land_a_land.csv to 101Q3_b_lvr_land_a_land.csv
Renamed d_lvr_land_a.csv to 101Q3_d_lvr_land_a.csv
Renamed d_lvr_land_a_build.csv to 101Q3_d_lvr_land_a_build.csv
Renamed d_lvr_land_a_land.csv to 101Q3_d_lvr_land_a_land.csv
Renamed e_lvr_land_a.csv to 101Q3_e_lvr_land_a.csv
Renamed e_lvr_land_a_build.csv to 101Q3_e_lvr_land_a_build.csv
Renamed e_lvr_land_a_land.csv to 101Q3_e_lvr_land_a_land.csv
Renamed f_lvr_land_a.csv to 101Q3_f_lvr_land_a.csv
Renamed f_lvr_land_a_build.csv to 101Q3_f_lvr_land_a_build.csv
Renamed f_lvr_land_a_land.csv to 101Q3_f_lvr_land_a_land.csv
Renamed h_lvr_land_a.csv to 101Q3_h_lvr_land_a.csv
Renamed h_lvr_land_a_build.csv to 10

In [6]:
import os

# 指定目標資料夾
target_dir = r'C:\Users\User\Desktop\學\大學\資料探勘\plvr_land_csv'

# 遍歷資料夾內所有檔案（不包含資料夾）
def clean_non_a_csv_files(directory):
    for root, _, files in os.walk(directory):
        for file_name in files:
            file_path = os.path.join(root, file_name)
            # 檢查檔案名稱是否以 "a.csv" 結尾
            if not file_name.endswith("a.csv"):
                try:
                    os.remove(file_path)
                    print(f"Deleted: {file_path}")
                except Exception as e:
                    print(f"Error deleting {file_path}: {e}")

# 執行刪除操作
clean_non_a_csv_files(target_dir)


Deleted: C:\Users\User\Desktop\學\大學\資料探勘\plvr_land_csv\101Q1\101Q1_build.ttt
Deleted: C:\Users\User\Desktop\學\大學\資料探勘\plvr_land_csv\101Q1\101Q1_manifest.csv
Deleted: C:\Users\User\Desktop\學\大學\資料探勘\plvr_land_csv\101Q2\101Q2_build.ttt
Deleted: C:\Users\User\Desktop\學\大學\資料探勘\plvr_land_csv\101Q2\101Q2_h_lvr_land_a_land.csv
Deleted: C:\Users\User\Desktop\學\大學\資料探勘\plvr_land_csv\101Q2\101Q2_manifest.csv
Deleted: C:\Users\User\Desktop\學\大學\資料探勘\plvr_land_csv\101Q2\101Q2_schema-land.csv
Deleted: C:\Users\User\Desktop\學\大學\資料探勘\plvr_land_csv\101Q2\101Q2_schema-main.csv
Deleted: C:\Users\User\Desktop\學\大學\資料探勘\plvr_land_csv\101Q3\101Q3_build.ttt
Deleted: C:\Users\User\Desktop\學\大學\資料探勘\plvr_land_csv\101Q3\101Q3_b_lvr_land_a_land.csv
Deleted: C:\Users\User\Desktop\學\大學\資料探勘\plvr_land_csv\101Q3\101Q3_d_lvr_land_a_build.csv
Deleted: C:\Users\User\Desktop\學\大學\資料探勘\plvr_land_csv\101Q3\101Q3_d_lvr_land_a_land.csv
Deleted: C:\Users\User\Desktop\學\大學\資料探勘\plvr_land_csv\101Q3\101Q3_e_lvr_land_a_build.

In [8]:
import os
import pandas as pd

# 設定目標資料夾
target_dir = r'C:\Users\User\Desktop\學\大學\資料探勘\plvr_land_csv'

# 儲存未處理的檔案路徑
unprocessed_files = []

# 遍歷目標資料夾內所有 .csv 檔案
def process_csv_files(directory):
    for root, _, files in os.walk(directory):
        for file_name in files:
            if file_name.endswith(".csv"):
                file_path = os.path.join(root, file_name)
                
                try:
                    # 匯入 .csv 檔案
                    df = pd.read_csv(file_path)
                    
                    # 如果資料不足兩行，無法檢查，跳過處理
                    if df.shape[0] < 2:
                        unprocessed_files.append(file_path)
                        continue
                    
                    # 獲取第一個變數第一筆資料
                    first_value = str(df.iloc[0, 0])
                    
                    # 檢查是否以 "The" 開頭
                    if first_value.startswith("The"):
                        # 刪除第一筆資料並重新存檔
                        df = df.iloc[1:].reset_index(drop=True)
                        df.to_csv(file_path, index=False)
                        print(f"Processed and removed: {file_path} (First value: '{first_value}')")
                    else:
                        # 暫時不刪除，記錄未處理檔案
                        unprocessed_files.append(file_path)
                        print(f"Skipped: {file_path} (First value: '{first_value}')")
                except Exception as e:
                    print(f"Error processing {file_path}: {e}")

# 執行檔案處理
process_csv_files(target_dir)

# 回報未處理的檔案路徑
if unprocessed_files:
    print("\nFiles not processed due to first value not starting with 'The':")
    for file in unprocessed_files:
        print(file)
else:
    print("\nAll files processed successfully.")


Processed and removed: C:\Users\User\Desktop\學\大學\資料探勘\plvr_land_csv\101Q2\101Q2_h_lvr_land_a.csv (First value: 'The villages and towns urban district')
Processed and removed: C:\Users\User\Desktop\學\大學\資料探勘\plvr_land_csv\101Q3\101Q3_b_lvr_land_a.csv (First value: 'The villages and towns urban district')
Processed and removed: C:\Users\User\Desktop\學\大學\資料探勘\plvr_land_csv\101Q3\101Q3_d_lvr_land_a.csv (First value: 'The villages and towns urban district')
Processed and removed: C:\Users\User\Desktop\學\大學\資料探勘\plvr_land_csv\101Q3\101Q3_e_lvr_land_a.csv (First value: 'The villages and towns urban district')
Processed and removed: C:\Users\User\Desktop\學\大學\資料探勘\plvr_land_csv\101Q3\101Q3_f_lvr_land_a.csv (First value: 'The villages and towns urban district')
Processed and removed: C:\Users\User\Desktop\學\大學\資料探勘\plvr_land_csv\101Q3\101Q3_h_lvr_land_a.csv (First value: 'The villages and towns urban district')
Processed and removed: C:\Users\User\Desktop\學\大學\資料探勘\plvr_land_csv\101Q3\101Q3_k

  df = pd.read_csv(file_path)


Processed and removed: C:\Users\User\Desktop\學\大學\資料探勘\plvr_land_csv\102Q1\102Q1_h_lvr_land_a.csv (First value: 'The villages and towns urban district')
Processed and removed: C:\Users\User\Desktop\學\大學\資料探勘\plvr_land_csv\102Q1\102Q1_i_lvr_land_a.csv (First value: 'The villages and towns urban district')
Processed and removed: C:\Users\User\Desktop\學\大學\資料探勘\plvr_land_csv\102Q1\102Q1_j_lvr_land_a.csv (First value: 'The villages and towns urban district')
Processed and removed: C:\Users\User\Desktop\學\大學\資料探勘\plvr_land_csv\102Q1\102Q1_k_lvr_land_a.csv (First value: 'The villages and towns urban district')
Processed and removed: C:\Users\User\Desktop\學\大學\資料探勘\plvr_land_csv\102Q1\102Q1_m_lvr_land_a.csv (First value: 'The villages and towns urban district')
Processed and removed: C:\Users\User\Desktop\學\大學\資料探勘\plvr_land_csv\102Q1\102Q1_n_lvr_land_a.csv (First value: 'The villages and towns urban district')
Processed and removed: C:\Users\User\Desktop\學\大學\資料探勘\plvr_land_csv\102Q1\102Q1_o

  df = pd.read_csv(file_path)


Processed and removed: C:\Users\User\Desktop\學\大學\資料探勘\plvr_land_csv\102Q3\102Q3_f_lvr_land_a.csv (First value: 'The villages and towns urban district')
Processed and removed: C:\Users\User\Desktop\學\大學\資料探勘\plvr_land_csv\102Q3\102Q3_g_lvr_land_a.csv (First value: 'The villages and towns urban district')
Processed and removed: C:\Users\User\Desktop\學\大學\資料探勘\plvr_land_csv\102Q3\102Q3_h_lvr_land_a.csv (First value: 'The villages and towns urban district')


  df = pd.read_csv(file_path)


Processed and removed: C:\Users\User\Desktop\學\大學\資料探勘\plvr_land_csv\102Q3\102Q3_i_lvr_land_a.csv (First value: 'The villages and towns urban district')
Processed and removed: C:\Users\User\Desktop\學\大學\資料探勘\plvr_land_csv\102Q3\102Q3_j_lvr_land_a.csv (First value: 'The villages and towns urban district')
Processed and removed: C:\Users\User\Desktop\學\大學\資料探勘\plvr_land_csv\102Q3\102Q3_k_lvr_land_a.csv (First value: 'The villages and towns urban district')
Processed and removed: C:\Users\User\Desktop\學\大學\資料探勘\plvr_land_csv\102Q3\102Q3_m_lvr_land_a.csv (First value: 'The villages and towns urban district')
Processed and removed: C:\Users\User\Desktop\學\大學\資料探勘\plvr_land_csv\102Q3\102Q3_n_lvr_land_a.csv (First value: 'The villages and towns urban district')
Processed and removed: C:\Users\User\Desktop\學\大學\資料探勘\plvr_land_csv\102Q3\102Q3_o_lvr_land_a.csv (First value: 'The villages and towns urban district')
Processed and removed: C:\Users\User\Desktop\學\大學\資料探勘\plvr_land_csv\102Q3\102Q3_p

  df = pd.read_csv(file_path)


Processed and removed: C:\Users\User\Desktop\學\大學\資料探勘\plvr_land_csv\102Q4\102Q4_h_lvr_land_a.csv (First value: 'The villages and towns urban district')
Processed and removed: C:\Users\User\Desktop\學\大學\資料探勘\plvr_land_csv\102Q4\102Q4_i_lvr_land_a.csv (First value: 'The villages and towns urban district')
Processed and removed: C:\Users\User\Desktop\學\大學\資料探勘\plvr_land_csv\102Q4\102Q4_j_lvr_land_a.csv (First value: 'The villages and towns urban district')
Processed and removed: C:\Users\User\Desktop\學\大學\資料探勘\plvr_land_csv\102Q4\102Q4_k_lvr_land_a.csv (First value: 'The villages and towns urban district')
Processed and removed: C:\Users\User\Desktop\學\大學\資料探勘\plvr_land_csv\102Q4\102Q4_m_lvr_land_a.csv (First value: 'The villages and towns urban district')
Processed and removed: C:\Users\User\Desktop\學\大學\資料探勘\plvr_land_csv\102Q4\102Q4_n_lvr_land_a.csv (First value: 'The villages and towns urban district')
Processed and removed: C:\Users\User\Desktop\學\大學\資料探勘\plvr_land_csv\102Q4\102Q4_o

  df = pd.read_csv(file_path)


Processed and removed: C:\Users\User\Desktop\學\大學\資料探勘\plvr_land_csv\103Q1\103Q1_b_lvr_land_a.csv (First value: 'The villages and towns urban district')
Processed and removed: C:\Users\User\Desktop\學\大學\資料探勘\plvr_land_csv\103Q1\103Q1_c_lvr_land_a.csv (First value: 'The villages and towns urban district')
Processed and removed: C:\Users\User\Desktop\學\大學\資料探勘\plvr_land_csv\103Q1\103Q1_d_lvr_land_a.csv (First value: 'The villages and towns urban district')
Processed and removed: C:\Users\User\Desktop\學\大學\資料探勘\plvr_land_csv\103Q1\103Q1_e_lvr_land_a.csv (First value: 'The villages and towns urban district')
Processed and removed: C:\Users\User\Desktop\學\大學\資料探勘\plvr_land_csv\103Q1\103Q1_f_lvr_land_a.csv (First value: 'The villages and towns urban district')


  df = pd.read_csv(file_path)


Processed and removed: C:\Users\User\Desktop\學\大學\資料探勘\plvr_land_csv\103Q1\103Q1_g_lvr_land_a.csv (First value: 'The villages and towns urban district')
Processed and removed: C:\Users\User\Desktop\學\大學\資料探勘\plvr_land_csv\103Q1\103Q1_h_lvr_land_a.csv (First value: 'The villages and towns urban district')
Processed and removed: C:\Users\User\Desktop\學\大學\資料探勘\plvr_land_csv\103Q1\103Q1_i_lvr_land_a.csv (First value: 'The villages and towns urban district')
Processed and removed: C:\Users\User\Desktop\學\大學\資料探勘\plvr_land_csv\103Q1\103Q1_j_lvr_land_a.csv (First value: 'The villages and towns urban district')
Processed and removed: C:\Users\User\Desktop\學\大學\資料探勘\plvr_land_csv\103Q1\103Q1_k_lvr_land_a.csv (First value: 'The villages and towns urban district')
Processed and removed: C:\Users\User\Desktop\學\大學\資料探勘\plvr_land_csv\103Q1\103Q1_m_lvr_land_a.csv (First value: 'The villages and towns urban district')
Processed and removed: C:\Users\User\Desktop\學\大學\資料探勘\plvr_land_csv\103Q1\103Q1_n

  df = pd.read_csv(file_path)


Processed and removed: C:\Users\User\Desktop\學\大學\資料探勘\plvr_land_csv\109Q3\109Q3_d_lvr_land_a.csv (First value: 'The villages and towns urban district')
Processed and removed: C:\Users\User\Desktop\學\大學\資料探勘\plvr_land_csv\109Q3\109Q3_e_lvr_land_a.csv (First value: 'The villages and towns urban district')


  df = pd.read_csv(file_path)


Processed and removed: C:\Users\User\Desktop\學\大學\資料探勘\plvr_land_csv\109Q3\109Q3_f_lvr_land_a.csv (First value: 'The villages and towns urban district')
Processed and removed: C:\Users\User\Desktop\學\大學\資料探勘\plvr_land_csv\109Q3\109Q3_g_lvr_land_a.csv (First value: 'The villages and towns urban district')


  df = pd.read_csv(file_path)


Processed and removed: C:\Users\User\Desktop\學\大學\資料探勘\plvr_land_csv\109Q3\109Q3_h_lvr_land_a.csv (First value: 'The villages and towns urban district')
Processed and removed: C:\Users\User\Desktop\學\大學\資料探勘\plvr_land_csv\109Q3\109Q3_i_lvr_land_a.csv (First value: 'The villages and towns urban district')
Processed and removed: C:\Users\User\Desktop\學\大學\資料探勘\plvr_land_csv\109Q3\109Q3_j_lvr_land_a.csv (First value: 'The villages and towns urban district')
Processed and removed: C:\Users\User\Desktop\學\大學\資料探勘\plvr_land_csv\109Q3\109Q3_k_lvr_land_a.csv (First value: 'The villages and towns urban district')
Processed and removed: C:\Users\User\Desktop\學\大學\資料探勘\plvr_land_csv\109Q3\109Q3_m_lvr_land_a.csv (First value: 'The villages and towns urban district')
Processed and removed: C:\Users\User\Desktop\學\大學\資料探勘\plvr_land_csv\109Q3\109Q3_n_lvr_land_a.csv (First value: 'The villages and towns urban district')
Processed and removed: C:\Users\User\Desktop\學\大學\資料探勘\plvr_land_csv\109Q3\109Q3_o

  df = pd.read_csv(file_path)


Processed and removed: C:\Users\User\Desktop\學\大學\資料探勘\plvr_land_csv\109Q4\109Q4_b_lvr_land_a.csv (First value: 'The villages and towns urban district')
Processed and removed: C:\Users\User\Desktop\學\大學\資料探勘\plvr_land_csv\109Q4\109Q4_c_lvr_land_a.csv (First value: 'The villages and towns urban district')
Processed and removed: C:\Users\User\Desktop\學\大學\資料探勘\plvr_land_csv\109Q4\109Q4_d_lvr_land_a.csv (First value: 'The villages and towns urban district')
Processed and removed: C:\Users\User\Desktop\學\大學\資料探勘\plvr_land_csv\109Q4\109Q4_e_lvr_land_a.csv (First value: 'The villages and towns urban district')
Processed and removed: C:\Users\User\Desktop\學\大學\資料探勘\plvr_land_csv\109Q4\109Q4_f_lvr_land_a.csv (First value: 'The villages and towns urban district')
Processed and removed: C:\Users\User\Desktop\學\大學\資料探勘\plvr_land_csv\109Q4\109Q4_g_lvr_land_a.csv (First value: 'The villages and towns urban district')


  df = pd.read_csv(file_path)


Processed and removed: C:\Users\User\Desktop\學\大學\資料探勘\plvr_land_csv\109Q4\109Q4_h_lvr_land_a.csv (First value: 'The villages and towns urban district')
Processed and removed: C:\Users\User\Desktop\學\大學\資料探勘\plvr_land_csv\109Q4\109Q4_i_lvr_land_a.csv (First value: 'The villages and towns urban district')
Processed and removed: C:\Users\User\Desktop\學\大學\資料探勘\plvr_land_csv\109Q4\109Q4_j_lvr_land_a.csv (First value: 'The villages and towns urban district')
Processed and removed: C:\Users\User\Desktop\學\大學\資料探勘\plvr_land_csv\109Q4\109Q4_k_lvr_land_a.csv (First value: 'The villages and towns urban district')
Processed and removed: C:\Users\User\Desktop\學\大學\資料探勘\plvr_land_csv\109Q4\109Q4_m_lvr_land_a.csv (First value: 'The villages and towns urban district')
Processed and removed: C:\Users\User\Desktop\學\大學\資料探勘\plvr_land_csv\109Q4\109Q4_n_lvr_land_a.csv (First value: 'The villages and towns urban district')
Processed and removed: C:\Users\User\Desktop\學\大學\資料探勘\plvr_land_csv\109Q4\109Q4_o

  df = pd.read_csv(file_path)


Processed and removed: C:\Users\User\Desktop\學\大學\資料探勘\plvr_land_csv\110Q2\110Q2_f_lvr_land_a.csv (First value: 'The villages and towns urban district')
Processed and removed: C:\Users\User\Desktop\學\大學\資料探勘\plvr_land_csv\110Q2\110Q2_g_lvr_land_a.csv (First value: 'The villages and towns urban district')
Processed and removed: C:\Users\User\Desktop\學\大學\資料探勘\plvr_land_csv\110Q2\110Q2_h_lvr_land_a.csv (First value: 'The villages and towns urban district')
Processed and removed: C:\Users\User\Desktop\學\大學\資料探勘\plvr_land_csv\110Q2\110Q2_i_lvr_land_a.csv (First value: 'The villages and towns urban district')
Processed and removed: C:\Users\User\Desktop\學\大學\資料探勘\plvr_land_csv\110Q2\110Q2_j_lvr_land_a.csv (First value: 'The villages and towns urban district')
Processed and removed: C:\Users\User\Desktop\學\大學\資料探勘\plvr_land_csv\110Q2\110Q2_k_lvr_land_a.csv (First value: 'The villages and towns urban district')
Processed and removed: C:\Users\User\Desktop\學\大學\資料探勘\plvr_land_csv\110Q2\110Q2_m

  df = pd.read_csv(file_path)


Processed and removed: C:\Users\User\Desktop\學\大學\資料探勘\plvr_land_csv\110Q4\110Q4_f_lvr_land_a.csv (First value: 'The villages and towns urban district')
Processed and removed: C:\Users\User\Desktop\學\大學\資料探勘\plvr_land_csv\110Q4\110Q4_g_lvr_land_a.csv (First value: 'The villages and towns urban district')
Processed and removed: C:\Users\User\Desktop\學\大學\資料探勘\plvr_land_csv\110Q4\110Q4_h_lvr_land_a.csv (First value: 'The villages and towns urban district')
Processed and removed: C:\Users\User\Desktop\學\大學\資料探勘\plvr_land_csv\110Q4\110Q4_i_lvr_land_a.csv (First value: 'The villages and towns urban district')
Processed and removed: C:\Users\User\Desktop\學\大學\資料探勘\plvr_land_csv\110Q4\110Q4_j_lvr_land_a.csv (First value: 'The villages and towns urban district')
Processed and removed: C:\Users\User\Desktop\學\大學\資料探勘\plvr_land_csv\110Q4\110Q4_k_lvr_land_a.csv (First value: 'The villages and towns urban district')
Processed and removed: C:\Users\User\Desktop\學\大學\資料探勘\plvr_land_csv\110Q4\110Q4_m

  df = pd.read_csv(file_path)


Processed and removed: C:\Users\User\Desktop\學\大學\資料探勘\plvr_land_csv\111Q2\111Q2_g_lvr_land_a.csv (First value: 'The villages and towns urban district')
Processed and removed: C:\Users\User\Desktop\學\大學\資料探勘\plvr_land_csv\111Q2\111Q2_h_lvr_land_a.csv (First value: 'The villages and towns urban district')
Processed and removed: C:\Users\User\Desktop\學\大學\資料探勘\plvr_land_csv\111Q2\111Q2_i_lvr_land_a.csv (First value: 'The villages and towns urban district')
Processed and removed: C:\Users\User\Desktop\學\大學\資料探勘\plvr_land_csv\111Q2\111Q2_j_lvr_land_a.csv (First value: 'The villages and towns urban district')
Processed and removed: C:\Users\User\Desktop\學\大學\資料探勘\plvr_land_csv\111Q2\111Q2_k_lvr_land_a.csv (First value: 'The villages and towns urban district')
Processed and removed: C:\Users\User\Desktop\學\大學\資料探勘\plvr_land_csv\111Q2\111Q2_m_lvr_land_a.csv (First value: 'The villages and towns urban district')
Processed and removed: C:\Users\User\Desktop\學\大學\資料探勘\plvr_land_csv\111Q2\111Q2_n

  df = pd.read_csv(file_path)


Processed and removed: C:\Users\User\Desktop\學\大學\資料探勘\plvr_land_csv\112Q4\112Q4_f_lvr_land_a.csv (First value: 'The villages and towns urban district')
Processed and removed: C:\Users\User\Desktop\學\大學\資料探勘\plvr_land_csv\112Q4\112Q4_g_lvr_land_a.csv (First value: 'The villages and towns urban district')
Processed and removed: C:\Users\User\Desktop\學\大學\資料探勘\plvr_land_csv\112Q4\112Q4_h_lvr_land_a.csv (First value: 'The villages and towns urban district')
Processed and removed: C:\Users\User\Desktop\學\大學\資料探勘\plvr_land_csv\112Q4\112Q4_i_lvr_land_a.csv (First value: 'The villages and towns urban district')
Processed and removed: C:\Users\User\Desktop\學\大學\資料探勘\plvr_land_csv\112Q4\112Q4_j_lvr_land_a.csv (First value: 'The villages and towns urban district')
Processed and removed: C:\Users\User\Desktop\學\大學\資料探勘\plvr_land_csv\112Q4\112Q4_k_lvr_land_a.csv (First value: 'The villages and towns urban district')
Processed and removed: C:\Users\User\Desktop\學\大學\資料探勘\plvr_land_csv\112Q4\112Q4_m

  df = pd.read_csv(file_path)


Processed and removed: C:\Users\User\Desktop\學\大學\資料探勘\plvr_land_csv\113Q2\113Q2_h_lvr_land_a.csv (First value: 'The villages and towns urban district')
Processed and removed: C:\Users\User\Desktop\學\大學\資料探勘\plvr_land_csv\113Q2\113Q2_i_lvr_land_a.csv (First value: 'The villages and towns urban district')
Processed and removed: C:\Users\User\Desktop\學\大學\資料探勘\plvr_land_csv\113Q2\113Q2_j_lvr_land_a.csv (First value: 'The villages and towns urban district')
Processed and removed: C:\Users\User\Desktop\學\大學\資料探勘\plvr_land_csv\113Q2\113Q2_k_lvr_land_a.csv (First value: 'The villages and towns urban district')
Processed and removed: C:\Users\User\Desktop\學\大學\資料探勘\plvr_land_csv\113Q2\113Q2_m_lvr_land_a.csv (First value: 'The villages and towns urban district')
Processed and removed: C:\Users\User\Desktop\學\大學\資料探勘\plvr_land_csv\113Q2\113Q2_n_lvr_land_a.csv (First value: 'The villages and towns urban district')
Processed and removed: C:\Users\User\Desktop\學\大學\資料探勘\plvr_land_csv\113Q2\113Q2_o

  df = pd.read_csv(file_path)


Processed and removed: C:\Users\User\Desktop\學\大學\資料探勘\plvr_land_csv\113Q3\113Q3_b_lvr_land_a.csv (First value: 'The villages and towns urban district')
Processed and removed: C:\Users\User\Desktop\學\大學\資料探勘\plvr_land_csv\113Q3\113Q3_c_lvr_land_a.csv (First value: 'The villages and towns urban district')
Processed and removed: C:\Users\User\Desktop\學\大學\資料探勘\plvr_land_csv\113Q3\113Q3_d_lvr_land_a.csv (First value: 'The villages and towns urban district')
Processed and removed: C:\Users\User\Desktop\學\大學\資料探勘\plvr_land_csv\113Q3\113Q3_e_lvr_land_a.csv (First value: 'The villages and towns urban district')
Processed and removed: C:\Users\User\Desktop\學\大學\資料探勘\plvr_land_csv\113Q3\113Q3_f_lvr_land_a.csv (First value: 'The villages and towns urban district')
Processed and removed: C:\Users\User\Desktop\學\大學\資料探勘\plvr_land_csv\113Q3\113Q3_g_lvr_land_a.csv (First value: 'The villages and towns urban district')


  df = pd.read_csv(file_path)


Processed and removed: C:\Users\User\Desktop\學\大學\資料探勘\plvr_land_csv\113Q3\113Q3_h_lvr_land_a.csv (First value: 'The villages and towns urban district')
Processed and removed: C:\Users\User\Desktop\學\大學\資料探勘\plvr_land_csv\113Q3\113Q3_i_lvr_land_a.csv (First value: 'The villages and towns urban district')
Processed and removed: C:\Users\User\Desktop\學\大學\資料探勘\plvr_land_csv\113Q3\113Q3_j_lvr_land_a.csv (First value: 'The villages and towns urban district')
Processed and removed: C:\Users\User\Desktop\學\大學\資料探勘\plvr_land_csv\113Q3\113Q3_k_lvr_land_a.csv (First value: 'The villages and towns urban district')
Processed and removed: C:\Users\User\Desktop\學\大學\資料探勘\plvr_land_csv\113Q3\113Q3_m_lvr_land_a.csv (First value: 'The villages and towns urban district')
Processed and removed: C:\Users\User\Desktop\學\大學\資料探勘\plvr_land_csv\113Q3\113Q3_n_lvr_land_a.csv (First value: 'The villages and towns urban district')
Processed and removed: C:\Users\User\Desktop\學\大學\資料探勘\plvr_land_csv\113Q3\113Q3_o

In [11]:
import os
import pandas as pd

# 設定目標資料夾
target_dir = r'C:\Users\User\Desktop\學\大學\資料探勘\plvr_land_csv'

# 儲存未處理的檔案路徑
unprocessed_files = []

# 用來存放所有資料的清單
data_list = []

# 遍歷目標資料夾內所有 .csv 檔案
def process_csv_files(directory):
    for root, _, files in os.walk(directory):
        for file_name in files:
            if file_name.endswith(".csv"):
                file_path = os.path.join(root, file_name)
                
                try:
                    # 匯入 .csv 檔案，並將其加入 data_list
                    df = pd.read_csv(file_path, encoding='utf-8')  # 如果有編碼問題，可能需要調整 encoding
                    data_list.append(df)
                except Exception as e:
                    # 若有錯誤，記錄檔案路徑
                    unprocessed_files.append(file_path)
                    print(f"Error processing {file_path}: {e}")

# 呼叫函式處理所有檔案
process_csv_files(target_dir)

# 合併所有資料
if data_list:
    merged_data = pd.concat(data_list, ignore_index=True)

    # 輸出合併後的結果為 CSV 檔案
    output_file = r'C:\Users\User\Desktop\merged_data.csv'
    merged_data.to_csv(output_file, index=False, encoding='utf-8')  # 編碼可根據需求調整

    print(f"Data merged and saved to {output_file}")
else:
    print("No data to merge.")

# 列出未處理成功的檔案
if unprocessed_files:
    print("The following files could not be processed:")
    for file in unprocessed_files:
        print(file)


  df = pd.read_csv(file_path, encoding='utf-8')  # 如果有編碼問題，可能需要調整 encoding
  df = pd.read_csv(file_path, encoding='utf-8')  # 如果有編碼問題，可能需要調整 encoding
  df = pd.read_csv(file_path, encoding='utf-8')  # 如果有編碼問題，可能需要調整 encoding
  df = pd.read_csv(file_path, encoding='utf-8')  # 如果有編碼問題，可能需要調整 encoding
  df = pd.read_csv(file_path, encoding='utf-8')  # 如果有編碼問題，可能需要調整 encoding


Data merged and saved to C:\Users\User\Desktop\merged_data.csv


In [13]:
import pandas as pd

# 讀取合併的 CSV 檔案
merged_data = pd.read_csv(r'C:\Users\User\Desktop\merged_data.csv', encoding='utf-8')

################################
########## 資料檢視:函數 ########
################################
# 定義函數取得唯一值
def get_unique_values(df, col_index):
    # 檢查 col_index 是否有效
    if col_index < 1 or col_index > df.shape[1]:
        raise ValueError("col_index 超出範圍")
    
    # 取得唯一值
    unique_values = df.iloc[:, col_index - 1].unique()
    
    # 顯示所有唯一值
    print(f"Column {col_index} unique values:", unique_values)
    return unique_values

# 檢視第 2 欄的唯一值
get_unique_values(merged_data, 2)

####################################
########## 排除 "土地", "車位" ######
####################################
# 排除第 2 欄中為 "土地" 或 "車位" 的觀察值
merged_data_BU = merged_data[~merged_data.iloc[:, 1].isin(["土地", "車位"])]

################################
########## 資料檢視:分區 ########
################################
# 再次檢視第 5 欄的唯一值
get_unique_values(merged_data_BU, 5)

# 計算特定值的出現次數
def count_value_occurrences(df, col_index, value):
    # 檢查 col_index 是否有效
    if col_index < 1 or col_index > df.shape[1]:
        raise ValueError("col_index 超出範圍")
    
    # 計算該值的出現次數
    count = (df.iloc[:, col_index - 1] == value).sum()
    print(f"Value '{value}' occurs {count} times in column {col_index}")
    return count

# 計算第 5 欄中各類型的出現次數
count_value_occurrences(merged_data_BU, 5, "住")
count_value_occurrences(merged_data_BU, 5, "商")
count_value_occurrences(merged_data_BU, 5, "工")
count_value_occurrences(merged_data_BU, 5, "農")
count_value_occurrences(merged_data_BU, 5, "其他")

###########################
########## 只保留 "住" #####
###########################
# 篩選資料，只保留第 5 欄為 "住" 的觀察值
merged_data_BULI = merged_data_BU[merged_data_BU.iloc[:, 4] == "住"]

# 檢視篩選後資料的欄位名稱
print("Column names of merged_data_BULI:", merged_data_BULI.columns.tolist())

# 如果需要儲存篩選後的結果到 CSV
output_file = r'C:\Users\User\Desktop\merged_data_BULI.csv'
merged_data_BULI.to_csv(output_file, index=False, encoding='utf-8')
print(f"Filtered data saved to {output_file}")


  merged_data = pd.read_csv(r'C:\Users\User\Desktop\merged_data.csv', encoding='utf-8')


Column 2 unique values: ['土地' '房地(土地+建物)' '房地(土地+建物)+車位' '車位' '建物' nan]
Column 5 unique values: [nan '住' '商' ... '都市：其他:第二之一種住宅區' '都市：其他:學校(文小)' '都市：其他:第二類一般管制區']
Value '住' occurs 2071886 times in column 5
Value '商' occurs 491731 times in column 5
Value '工' occurs 51106 times in column 5
Value '農' occurs 24883 times in column 5
Value '其他' occurs 44691 times in column 5
Column names of merged_data_BULI: ['鄉鎮市區', '交易標的', '土地位置建物門牌', '土地移轉總面積平方公尺', '都市土地使用分區', '非都市土地使用分區', '非都市土地使用編定', '交易年月日', '交易筆棟數', '移轉層次', '總樓層數', '建物型態', '主要用途', '主要建材', '建築完成年月', '建物移轉總面積平方公尺', '建物現況格局-房', '建物現況格局-廳', '建物現況格局-衛', '建物現況格局-隔間', '有無管理組織', '總價元', '單價元平方公尺', '車位類別', '車位移轉總面積(平方公尺)', '車位總價元', '備註', '編號', '主建物面積', '附屬建物面積', '陽台面積', '電梯', '移轉編號', '車位移轉總面積平方公尺']
Filtered data saved to C:\Users\User\Desktop\merged_data_BULI.csv
