In [8]:
import os
import re
import pandas as pd
import geopandas as gpd
from shapely.geometry import Point

# Unified data directories
Datadir = r'C:\Users\Administrator\Documents\MnewData'
output_path = r'C:\Users\Administrator\Documents\MnewData'

def list_csv_files(directory, keyword):
    """List CSV files in the specified directory that contain the keyword, case-insensitively."""
    return [file for file in os.listdir(directory) if keyword.lower() in file.lower() and file.lower().endswith('.csv')]

def read_and_process_files(directory, keyword, columns=None, encoding='gbk'):
    """Read and process CSV files, returning a concatenated DataFrame."""
    files = list_csv_files(directory, keyword)
    dfs = []
    for file in files:
        file_path = os.path.join(directory, file)
        try:
            df = pd.read_csv(file_path, encoding=encoding, usecols=columns,na_values=["n/a", "na", "-"]) if columns else pd.read_csv(file_path, encoding=encoding,skiprows=2, header=0, na_values=["n/a", "na", "-"])
            df.columns = df.columns.str.replace(' ', '')
            dfs.append(df)
        except Exception as e:
            print(f"Error reading file {file}: {e}")
    return pd.concat(dfs, ignore_index=True).drop_duplicates() if dfs else pd.DataFrame()

In [9]:
# 读取数据并重命名列
df_5GB = read_and_process_files(Datadir, '板卡规划(3000310)-5G', ['网元标识', '网元名称', '板类型'])
df_5GB['备用'] = df_5GB['网元名称'].str.extract(r'(.*)-D')
# 去掉字段 "板"
df_5GB['板类型'] = df_5GB['板类型'].str.replace('板', '')

# 删除包含 'FCU' 和 'PSU' 的行
df_5GB = df_5GB[~df_5GB['板类型'].isin(['FCU', 'PSU'])]

df_5GB.head(10)


Unnamed: 0,网元标识,网元名称,板类型,备用
2,6311500,崇阳白鹭广场-D5H,HSCTDa,崇阳白鹭广场
3,6337475,林区松柏开发区-D5H,HSCTDa,林区松柏开发区
5,6337475,林区松柏开发区-D5H,HBPOF,林区松柏开发区
7,6330397,崇阳余耕-D5H,HSCTDa,崇阳余耕
9,6330397,崇阳余耕-D5H,HBPOFbg,崇阳余耕
11,6337349,林区粮食储备仓库-D5H,HSCTDa,林区粮食储备仓库
13,6337349,林区粮食储备仓库-D5H,HBPOF,林区粮食储备仓库
16,6337485,竹溪移动综合楼-D5H,HBPOF,竹溪移动综合楼
18,6337485,竹溪移动综合楼-D5H,HSCTDa,竹溪移动综合楼
19,6337269,丹江玉景园-D5H,HBPOF,丹江玉景园


In [16]:
# 读取数据并重命名列
df_4GB = read_and_process_files(Datadir, '板卡规划(1000310)-4G',['网元标识', '网元名称', '板类型'])


df_4GB['备用2'] = df_4GB['网元名称'].str.extract(r'(.*)-D')
# 去掉字段 "板"
df_4GB['板类型'] = df_4GB['板类型'].str.replace('板', '')

# 包含 ['BPOKa', 'BPOI','SCTF','HSCTF']的行
df_4GB = df_4GB[df_4GB['板类型'].isin(['BPOKa', 'BPOI','SCTF','HSCTF'])]

df_4GB.head(10)

Unnamed: 0,网元标识,网元名称,板类型,备用2
0,575865,武当山景区好汉坡-DL3D,BPOKa,武当山景区好汉坡
10,575861,武当山杨家畈三组-DL3D,BPOKa,武当山杨家畈三组
18,351883,竹山卫校后山-DL3D,BPOKa,竹山卫校后山
19,711471,房县怡呈酒店-DLW,BPOI,房县怡呈酒店
25,929381,通山隐水洞旅游公司-DLW,BPOI,通山隐水洞旅游公司
30,453224,D_通城建材市场-DL3D,BPOKa,D_通城建材市场
38,567189,D_通城隽达龙湾城3期1栋3栋及7至10栋-DLW,HSCTF,D_通城隽达龙湾城3期1栋3栋及7至10栋
46,453438,D_通城城发集团4号楼-DLW,BPOI,D_通城城发集团4号楼
51,453408,D_崇阳新一中-DL3D,BPOKa,D_崇阳新一中
55,453442,D_通城中星天街A栋B2至3层-DLW,BPOI,D_通城中星天街A栋B2至3层


In [29]:
# 合并数据，根据 '备用' 和 '备用2' 进行匹配
merged_df = pd.merge(df_4GB, df_5GB, left_on='备用2', right_on='备用', how='left')

result_df = merged_df[['网元标识_y', '网元名称_y', '板类型_x', '备用']].rename(
    columns={
        '网元标识_y': '网元标识',
        '网元名称_y': '网元名称',
        '板类型_x': '板类型',
        '备用': '备用'
    }
)

# 删除空行
result_df = result_df.dropna(subset=['网元标识'])

# 追加数据到 df_5GB
df_5GB = pd.concat([df_5GB, result_df], ignore_index=True)


In [30]:
df_5GB.sample(10)

Unnamed: 0,网元标识,网元名称,板类型,备用
709,6337225.0,丹江明珠花园-D5H,HBPOF,丹江明珠花园
1435,6337486.0,林区湿地公园大门-D5H,HSCTDa,林区湿地公园大门
1771,6337464.0,林区农业局-D5H,HSCTDa,林区农业局
940,6311470.0,崇阳港口唐山-D5H,HSCTDa,崇阳港口唐山
416,6311557.0,通城人寿保险-D5H,HBPOFp,通城人寿保险
943,6337273.0,丹江自来水公司-D5H,HBPOF,丹江自来水公司
1052,6337253.0,丹江香山丽景-D5H,HBPOF,丹江香山丽景
1041,6337053.0,十堰房县十字街营业厅-D5S,HSCTD,十堰房县十字街营业厅
815,6341865.0,十堰郧西中医院西院区1-D5S,HBPOFbp,十堰郧西中医院西院区1
1767,6337602.0,竹山客运站-D5H,HBPOF,竹山客运站
